From 32b789996f17cc267172181ec79b69335e8d7d6a Mon Sep 17 00:00:00 2001 From: Sjoerd <svink@graphpolaris.com> Date: Sat, 12 Oct 2024 10:20:54 +0000 Subject: [PATCH] chore(statistics): refactor of statistics --- .../store/graphQueryResultSlice.ts | 2 + libs/shared/lib/statistics/graphStatistics.ts | 68 ++++++++++ libs/shared/lib/statistics/index.ts | 1 + .../shared/lib/statistics/statistics.types.ts | 97 ++++++++++++++ .../statistics/tests/attributeStats.spec.ts | 105 +++++++++++++++ .../statistics/tests/getAttributeType.spec.ts | 65 ++++++++++ .../statistics/tests/getNodeEdgeType.spec.ts | 66 ++++++++++ .../statistics/tests/graphStatistics.spec.ts | 121 ++++++++++++++++++ .../statistics/utils/attributeStats/array.ts | 7 + .../utils/attributeStats/boolean.ts | 11 ++ .../utils/attributeStats/categorical.ts | 16 +++ .../statistics/utils/attributeStats/index.ts | 7 + .../utils/attributeStats/initialize.ts | 44 +++++++ .../utils/attributeStats/numerical.ts | 11 ++ .../statistics/utils/attributeStats/object.ts | 7 + .../utils/attributeStats/temporal.ts | 12 ++ .../lib/statistics/utils/getAttributeType.ts | 78 +++++++++++ .../lib/statistics/utils/getNodeOrEdgeType.ts | 23 ++++ libs/shared/lib/statistics/utils/index.ts | 4 + .../lib/statistics/utils/updateStatistics.ts | 47 +++++++ 20 files changed, 792 insertions(+) create mode 100644 libs/shared/lib/statistics/graphStatistics.ts create mode 100644 libs/shared/lib/statistics/index.ts create mode 100644 libs/shared/lib/statistics/statistics.types.ts create mode 100644 libs/shared/lib/statistics/tests/attributeStats.spec.ts create mode 100644 libs/shared/lib/statistics/tests/getAttributeType.spec.ts create mode 100644 libs/shared/lib/statistics/tests/getNodeEdgeType.spec.ts create mode 100644 libs/shared/lib/statistics/tests/graphStatistics.spec.ts create mode 100644 libs/shared/lib/statistics/utils/attributeStats/array.ts create mode 100644 libs/shared/lib/statistics/utils/attributeStats/boolean.ts create mode 100644 libs/shared/lib/statistics/utils/attributeStats/categorical.ts create mode 100644 libs/shared/lib/statistics/utils/attributeStats/index.ts create mode 100644 libs/shared/lib/statistics/utils/attributeStats/initialize.ts create mode 100644 libs/shared/lib/statistics/utils/attributeStats/numerical.ts create mode 100644 libs/shared/lib/statistics/utils/attributeStats/object.ts create mode 100644 libs/shared/lib/statistics/utils/attributeStats/temporal.ts create mode 100644 libs/shared/lib/statistics/utils/getAttributeType.ts create mode 100644 libs/shared/lib/statistics/utils/getNodeOrEdgeType.ts create mode 100644 libs/shared/lib/statistics/utils/index.ts create mode 100644 libs/shared/lib/statistics/utils/updateStatistics.ts diff --git a/libs/shared/lib/data-access/store/graphQueryResultSlice.ts b/libs/shared/lib/data-access/store/graphQueryResultSlice.ts index 95e2f6bb2..a9f919a13 100755 --- a/libs/shared/lib/data-access/store/graphQueryResultSlice.ts +++ b/libs/shared/lib/data-access/store/graphQueryResultSlice.ts @@ -1,6 +1,7 @@ import { createSlice, PayloadAction } from '@reduxjs/toolkit'; import type { RootState } from './store'; import { getDimension, extractStatistics, GraphMetadata } from '../statistics'; +import { getGraphStatistics } from '../../statistics'; export interface GraphQueryResultFromBackendPayload { queryID: string; @@ -127,6 +128,7 @@ export const graphQueryBackend2graphQuery = (payload: GraphQueryResultFromBacken _edge.label = edgeType; return _edge; }); + return { metaData: extractStatistics(metaData), nodes, edges }; }; diff --git a/libs/shared/lib/statistics/graphStatistics.ts b/libs/shared/lib/statistics/graphStatistics.ts new file mode 100644 index 000000000..ffaf9fb4f --- /dev/null +++ b/libs/shared/lib/statistics/graphStatistics.ts @@ -0,0 +1,68 @@ +import { GraphQueryResultFromBackend } from '../data-access/store/graphQueryResultSlice'; +import { GraphStatistics } from './statistics.types'; +import { getAttributeType, getEdgeType, getNodeLabel, initializeStatistics, updateStatistics } from './utils'; + +const getGraphStatistics = (graph: GraphQueryResultFromBackend): GraphStatistics => { + const { nodes, edges } = graph; + + const n_nodes = nodes.length; + const n_edges = edges.length; + + const metaData: GraphStatistics = { + topological: { density: (n_edges * 2) / (n_nodes * (n_nodes - 1)), self_loops: 0 }, + nodes: { labels: [], count: n_nodes, types: {} }, + edges: { labels: [], count: n_edges, types: {} }, + }; + + nodes.forEach((node) => { + const nodeType = getNodeLabel(node); + if (!metaData.nodes.labels.includes(nodeType)) { + metaData.nodes.labels.push(nodeType); + } + + if (!metaData.nodes.types[nodeType]) { + metaData.nodes.types[nodeType] = { count: 0, attributes: {} }; + } + + metaData.nodes.types[nodeType].count++; + + Object.entries(node.attributes).forEach(([attributeId, attributeValue]) => { + const attributeType = getAttributeType(attributeValue); + + if (!metaData.nodes.types[nodeType].attributes[attributeId]) { + metaData.nodes.types[nodeType].attributes[attributeId] = { attributeType, statistics: initializeStatistics(attributeType) }; + } + + updateStatistics(metaData.nodes.types[nodeType].attributes[attributeId], attributeValue); + }); + }); + + edges.forEach((edge) => { + const edgeType = getEdgeType(edge); + if (!metaData.edges.labels.includes(edgeType)) { + metaData.edges.labels.push(edgeType); + } + + if (!metaData.edges.types[edgeType]) { + metaData.edges.types[edgeType] = { count: 0, attributes: {} }; + } + + metaData.edges.types[edgeType].count++; + + if (edge.from === edge.to) metaData.topological.self_loops++; + + Object.entries(edge.attributes).forEach(([attributeId, attributeValue]) => { + const attributeType = getAttributeType(attributeValue); + + if (!metaData.edges.types[edgeType].attributes[attributeId]) { + metaData.edges.types[edgeType].attributes[attributeId] = { attributeType, statistics: initializeStatistics(attributeType) }; + } + + updateStatistics(metaData.edges.types[edgeType].attributes[attributeId], attributeValue); + }); + }); + + return metaData; +}; + +export { getGraphStatistics }; diff --git a/libs/shared/lib/statistics/index.ts b/libs/shared/lib/statistics/index.ts new file mode 100644 index 000000000..e00f85d98 --- /dev/null +++ b/libs/shared/lib/statistics/index.ts @@ -0,0 +1 @@ +export * from './graphStatistics'; diff --git a/libs/shared/lib/statistics/statistics.types.ts b/libs/shared/lib/statistics/statistics.types.ts new file mode 100644 index 000000000..78a0ee339 --- /dev/null +++ b/libs/shared/lib/statistics/statistics.types.ts @@ -0,0 +1,97 @@ +type GraphStatistics = { + topological: TopologicalStats; + nodes: NodeOrEdgeStats; + edges: NodeOrEdgeStats; +}; + +type NodeOrEdgeStats = { + count: number; + labels: string[]; + types: { + [label: string]: { + count: number; + avgDegreeIn?: number; + avgDegreeOut?: number; + attributes: { + [id: string]: AttributeStats<AttributeType>; + }; + }; + }; +}; + +type AttributeStats<T extends AttributeType> = { + attributeType: T; + statistics: AttributeTypeStats<T>; +}; + +type AttributeTypeStats<T extends AttributeType> = T extends 'string' + ? CategoricalStats + : T extends 'boolean' + ? BooleanStats + : T extends 'number' + ? NumericalStats + : T extends 'date' | 'time' | 'datetime' | 'timestamp' + ? TemporalStats + : T extends 'array' + ? ArrayStats + : T extends 'object' + ? ObjectStats + : never; + +type AttributeType = 'string' | 'boolean' | 'number' | 'array' | 'object' | TemporalType; + +type TemporalType = 'date' | 'time' | 'datetime' | 'timestamp'; +// Date: Date in the YYYY-MM-DD format (ISO 8601 syntax) (e.g., 2021-09-28) +// Time: Time in the hh:mm:ss format for the time of day, time since an event, or time interval between events (e.g., 12:00:59) +// Datetime: Date and time together in the YYYY-MM-DD hh:mm:ss format (e.g., 2021-09-28 12:00:59) +// Timestamp: Number of seconds that have elapsed since midnight (00:00:00 UTC), 1st January (Unix time) (e.g., 1632855600) + +type TopologicalStats = { + density: number; + self_loops: number; +}; + +type NumericalStats = { + min: number; + max: number; + average: number; + count: number; +}; + +type BooleanStats = { + true: number; + false: number; +}; + +type CategoricalStats = { + uniqueItems: number; + values: string[]; + mode: string; +}; + +type TemporalStats = { + min: number; + max: number; + range: number; +}; + +type ArrayStats = { + length: number; +}; + +type ObjectStats = { + length: number; +}; + +export type { + GraphStatistics, + AttributeStats, + NumericalStats, + CategoricalStats, + BooleanStats, + TemporalStats, + AttributeType, + AttributeTypeStats, + ArrayStats, + ObjectStats, +}; diff --git a/libs/shared/lib/statistics/tests/attributeStats.spec.ts b/libs/shared/lib/statistics/tests/attributeStats.spec.ts new file mode 100644 index 000000000..da290a2d8 --- /dev/null +++ b/libs/shared/lib/statistics/tests/attributeStats.spec.ts @@ -0,0 +1,105 @@ +import { describe, it, expect } from 'vitest'; +import { + updateArrayStats, + updateBooleanStats, + updateCategoricalStats, + updateNumericalStats, + updateTemporalStats, + updateObjectStats, + initializeStatistics, +} from '../utils/attributeStats'; +import type { ArrayStats, BooleanStats, CategoricalStats, NumericalStats, TemporalStats, ObjectStats } from '../statistics.types'; + +describe('updateArrayStats', () => { + it('should update the length of the array', () => { + const stats: ArrayStats = { length: 0 }; + const value = [1, 2, 3]; + updateArrayStats(stats, value); + expect(stats.length).toBe(3); + }); +}); + +describe('updateBooleanStats', () => { + it('should update true count when value is true', () => { + const stats: BooleanStats = { true: 0, false: 0 }; + updateBooleanStats(stats, true); + expect(stats.true).toBe(1); + expect(stats.false).toBe(0); + }); + + it('should update false count when value is false', () => { + const stats: BooleanStats = { true: 0, false: 0 }; + updateBooleanStats(stats, false); + expect(stats.false).toBe(1); + expect(stats.true).toBe(0); + }); +}); + +describe('updateCategoricalStats', () => { + it('should update mode and unique items count', () => { + const stats: CategoricalStats = { uniqueItems: 0, values: [], mode: '' }; + updateCategoricalStats(stats, 'apple'); + updateCategoricalStats(stats, 'banana'); + updateCategoricalStats(stats, 'apple'); + + expect(stats.values).toEqual(['apple', 'banana', 'apple']); + expect(stats.uniqueItems).toBe(2); + expect(stats.mode).toBe('apple'); + }); +}); + +describe('updateNumericalStats', () => { + it('should update min, max, average, and count', () => { + const stats: NumericalStats = { min: Infinity, max: -Infinity, average: 0, count: 0 }; + updateNumericalStats(stats, 10); + updateNumericalStats(stats, 20); + updateNumericalStats(stats, 5); + + expect(stats.min).toBe(5); + expect(stats.max).toBe(20); + expect(stats.average).toBeCloseTo(11.67, 2); + expect(stats.count).toBe(3); + }); +}); + +describe('updateTemporalStats', () => { + it('should update min, max, and range for temporal values', () => { + const stats: TemporalStats = { min: Infinity, max: -Infinity, range: 0 }; + updateTemporalStats(stats, '2022-01-01'); + updateTemporalStats(stats, '2022-01-05'); + + expect(stats.min).toBe(new Date('2022-01-01').getTime()); + expect(stats.max).toBe(new Date('2022-01-05').getTime()); + expect(stats.range).toBe(new Date('2022-01-05').getTime() - new Date('2022-01-01').getTime()); + }); +}); + +describe('updateObjectStats', () => { + it('should update the length of the object keys', () => { + const stats: ObjectStats = { length: 0 }; + const value = { key1: 'value1', key2: 'value2' }; + updateObjectStats(stats, value); + expect(stats.length).toBe(2); + }); +}); + +describe('initializeStatistics', () => { + it('should initialize statistics for string type', () => { + const stats = initializeStatistics('string'); + expect(stats).toEqual({ uniqueItems: 0, values: [], mode: '' }); + }); + + it('should initialize statistics for boolean type', () => { + const stats = initializeStatistics('boolean'); + expect(stats).toEqual({ true: 0, false: 0 }); + }); + + it('should initialize statistics for number type', () => { + const stats = initializeStatistics('number'); + expect(stats).toEqual({ min: Infinity, max: -Infinity, average: 0 }); + }); + + it('should throw an error for an unknown type', () => { + expect(() => initializeStatistics('unknown' as any)).toThrow('Unknown attribute type: unknown'); + }); +}); diff --git a/libs/shared/lib/statistics/tests/getAttributeType.spec.ts b/libs/shared/lib/statistics/tests/getAttributeType.spec.ts new file mode 100644 index 000000000..562584548 --- /dev/null +++ b/libs/shared/lib/statistics/tests/getAttributeType.spec.ts @@ -0,0 +1,65 @@ +import { describe, it, expect } from 'vitest'; +import { getAttributeType } from '../utils/getAttributeType'; + +// Sample values for testing +const invalidDate = '2023-13-03'; +const validTime = '12:30:45'; +const invalidTime = '25:61:61'; +const invalidDatetime = '2023-10-03 25:61:61'; +const validNumber = '123.45'; +const invalidNumber = 'abc123'; +const booleanTrue = true; +const booleanFalse = false; +const numberValue = 123; +const arrayValue = [1, 2, 3]; +const objectValue = { key: 'value' }; +const dateInstance = new Date('2023-10-03T12:30:45'); + +// Unit tests for getAttributeType function +describe('getAttributeType', () => { + it('should correctly identify numbers as type "number"', () => { + expect(getAttributeType(validNumber)).toBe('number'); + expect(getAttributeType(numberValue)).toBe('number'); + }); + + it('should correctly identify strings as valid "time"', () => { + expect(getAttributeType(validTime)).toBe('time'); + }); + + it('should identify invalid datetime strings as "string"', () => { + expect(getAttributeType(invalidDatetime)).toBe('string'); + }); + + it('should identify invalid date strings as "string"', () => { + expect(getAttributeType(invalidDate)).toBe('string'); + }); + + it('should identify invalid time strings as "string"', () => { + expect(getAttributeType(invalidTime)).toBe('string'); + }); + + it('should correctly identify boolean values as type "boolean"', () => { + expect(getAttributeType(booleanTrue)).toBe('boolean'); + expect(getAttributeType(booleanFalse)).toBe('boolean'); + }); + + it('should correctly identify arrays as type "array"', () => { + expect(getAttributeType(arrayValue)).toBe('array'); + }); + + it('should correctly identify objects as type "object"', () => { + expect(getAttributeType(objectValue)).toBe('object'); + }); + + it('should correctly identify Date instances as type "datetime"', () => { + expect(getAttributeType(dateInstance)).toBe('datetime'); + }); + + it('should identify string representations of invalid numbers as "string"', () => { + expect(getAttributeType(invalidNumber)).toBe('string'); + }); + + it('should identify a regular string as type "string"', () => { + expect(getAttributeType('random string')).toBe('string'); + }); +}); diff --git a/libs/shared/lib/statistics/tests/getNodeEdgeType.spec.ts b/libs/shared/lib/statistics/tests/getNodeEdgeType.spec.ts new file mode 100644 index 000000000..5e91e5fa1 --- /dev/null +++ b/libs/shared/lib/statistics/tests/getNodeEdgeType.spec.ts @@ -0,0 +1,66 @@ +import { describe, it, expect } from 'vitest'; +import { getNodeLabel, getEdgeType } from '../utils/getNodeOrEdgeType'; +import { GraphQueryResultFromBackend } from '../../data-access/store/graphQueryResultSlice'; + +describe('getNodeLabel', () => { + it('should return node type based on _id', () => { + const node: GraphQueryResultFromBackend['nodes'][number] = { + _id: 'Person/123', + attributes: {}, + }; + const label = getNodeLabel(node); + expect(label).toBe('Person'); + }); + + it('should return node label if present', () => { + const node: GraphQueryResultFromBackend['nodes'][number] = { + _id: 'Person/123', + label: 'Student', + attributes: {}, + }; + const label = getNodeLabel(node); + expect(label).toBe('Student'); + }); + + it('should return first label from attributes if labels array is present', () => { + const node: GraphQueryResultFromBackend['nodes'][number] = { + _id: 'Person/123', + attributes: { labels: ['Teacher', 'Mentor'] }, + }; + const label = getNodeLabel(node); + expect(label).toBe('Teacher'); + }); + + it('should return _id-based node type if labels array is empty', () => { + const node: GraphQueryResultFromBackend['nodes'][number] = { + _id: 'Person/123', + attributes: { labels: [] }, + }; + const label = getNodeLabel(node); + expect(label).toBe('Person'); + }); +}); + +describe('getEdgeType', () => { + it('should return edge type based on _id', () => { + const edge: GraphQueryResultFromBackend['edges'][number] = { + _id: 'Relationship/456', + attributes: {}, + from: 'Person/123', + to: 'Person/456', + }; + const edgeType = getEdgeType(edge); + expect(edgeType).toBe('Relationship'); + }); + + it('should return edge type from attributes if _id does not contain /', () => { + const edge: GraphQueryResultFromBackend['edges'][number] = { + _id: '456', + attributes: { Type: 'FRIENDS_WITH' }, + from: 'Person/123', + to: 'Person/456', + }; + const edgeType = getEdgeType(edge); + expect(edgeType).toBe('FRIENDS_WITH'); + }); +}); diff --git a/libs/shared/lib/statistics/tests/graphStatistics.spec.ts b/libs/shared/lib/statistics/tests/graphStatistics.spec.ts new file mode 100644 index 000000000..772de8011 --- /dev/null +++ b/libs/shared/lib/statistics/tests/graphStatistics.spec.ts @@ -0,0 +1,121 @@ +import { describe, it, expect } from 'vitest'; +import { GraphQueryResultFromBackend } from '../../data-access/store/graphQueryResultSlice'; +import { getGraphStatistics } from '../graphStatistics'; + +describe('getGraphStatistics', () => { + it('should return correct statistics for a graph with no nodes and edges', () => { + const graph: GraphQueryResultFromBackend = { + nodes: [], + edges: [], + }; + + const stats = getGraphStatistics(graph); + + expect(stats).toEqual({ + topological: { density: 0, self_loops: 0 }, + nodes: { labels: [], count: 0, types: {} }, + edges: { labels: [], count: 0, types: {} }, + }); + }); + + it('should return correct statistics for a graph with nodes and no edges', () => { + const graph: GraphQueryResultFromBackend = { + nodes: [ + { _id: '1', attributes: { age: 25 } }, + { _id: '2', attributes: { age: 30, city: 'New York' } }, + ], + edges: [], + }; + + const stats = getGraphStatistics(graph); + + expect(stats).toEqual({ + topological: { density: 0, self_loops: 0 }, + nodes: { + labels: ['Person'], // Assuming default label + count: 2, + types: { + Person: { + count: 2, + attributes: { + age: { attributeType: 'number', statistics: expect.any(Object) }, + city: { attributeType: 'string', statistics: expect.any(Object) }, + }, + }, + }, + }, + edges: { labels: [], count: 0, types: {} }, + }); + }); + + it('should return correct statistics for a graph with edges and nodes', () => { + const graph: GraphQueryResultFromBackend = { + nodes: [ + { _id: '1', attributes: { age: 25 } }, + { _id: '2', attributes: { age: 30 } }, + ], + edges: [ + { _id: 'e1', attributes: { weight: 5 }, from: '1', to: '2' }, + { _id: 'e2', attributes: { weight: 10 }, from: '2', to: '2' }, // self-loop + ], + }; + + const stats = getGraphStatistics(graph); + + expect(stats).toEqual({ + topological: { density: 1, self_loops: 1 }, + nodes: { + labels: ['Person'], // Assuming default label + count: 2, + types: { + Person: { + count: 2, + attributes: { + age: { attributeType: 'number', statistics: expect.any(Object) }, + }, + }, + }, + }, + edges: { + labels: ['Relationship'], // Assuming default edge type + count: 2, + types: { + Relationship: { + count: 2, + attributes: { + weight: { attributeType: 'number', statistics: expect.any(Object) }, + }, + }, + }, + }, + }); + }); + + it('should correctly count self-loops', () => { + const graph: GraphQueryResultFromBackend = { + nodes: [{ _id: '1', attributes: {} }], + edges: [ + { _id: 'e1', attributes: {}, from: '1', to: '1' }, // self-loop + ], + }; + + const stats = getGraphStatistics(graph); + + expect(stats.topological.self_loops).toBe(1); + }); + + it('should correctly compute density for a graph with nodes and edges', () => { + const graph: GraphQueryResultFromBackend = { + nodes: [ + { _id: '1', attributes: {} }, + { _id: '2', attributes: {} }, + ], + edges: [{ _id: 'e1', attributes: {}, from: '1', to: '2' }], + }; + + const stats = getGraphStatistics(graph); + + // Density = (n_edges * 2) / (n_nodes * (n_nodes - 1)) = (1 * 2) / (2 * 1) = 1 + expect(stats.topological.density).toBe(1); + }); +}); diff --git a/libs/shared/lib/statistics/utils/attributeStats/array.ts b/libs/shared/lib/statistics/utils/attributeStats/array.ts new file mode 100644 index 000000000..c6a7dc55d --- /dev/null +++ b/libs/shared/lib/statistics/utils/attributeStats/array.ts @@ -0,0 +1,7 @@ +import { ArrayStats } from '../../statistics.types'; + +const updateArrayStats = (stats: ArrayStats, value: any[]) => { + stats.length = value.length; +}; + +export { updateArrayStats }; diff --git a/libs/shared/lib/statistics/utils/attributeStats/boolean.ts b/libs/shared/lib/statistics/utils/attributeStats/boolean.ts new file mode 100644 index 000000000..ea7efdf13 --- /dev/null +++ b/libs/shared/lib/statistics/utils/attributeStats/boolean.ts @@ -0,0 +1,11 @@ +import { BooleanStats } from '../../statistics.types'; + +const updateBooleanStats = (stats: BooleanStats, value: boolean) => { + if (value) { + stats.true += 1; + } else { + stats.false += 1; + } +}; + +export { updateBooleanStats }; diff --git a/libs/shared/lib/statistics/utils/attributeStats/categorical.ts b/libs/shared/lib/statistics/utils/attributeStats/categorical.ts new file mode 100644 index 000000000..fc9b128ce --- /dev/null +++ b/libs/shared/lib/statistics/utils/attributeStats/categorical.ts @@ -0,0 +1,16 @@ +import { CategoricalStats } from '../../statistics.types'; + +const updateCategoricalStats = (stats: CategoricalStats, value: string | boolean) => { + if (!stats.values) stats.values = []; + stats.values.push(value.toString()); + + stats.uniqueItems = new Set(stats.values).size; + + const frequencyMap: { [key: string]: number } = {}; + stats.values.forEach((val) => { + frequencyMap[val] = (frequencyMap[val] || 0) + 1; + }); + stats.mode = Object.keys(frequencyMap).reduce((a, b) => (frequencyMap[a] > frequencyMap[b] ? a : b)); +}; + +export { updateCategoricalStats }; diff --git a/libs/shared/lib/statistics/utils/attributeStats/index.ts b/libs/shared/lib/statistics/utils/attributeStats/index.ts new file mode 100644 index 000000000..42c450156 --- /dev/null +++ b/libs/shared/lib/statistics/utils/attributeStats/index.ts @@ -0,0 +1,7 @@ +export * from './array'; +export * from './categorical'; +export * from './numerical'; +export * from './object'; +export * from './temporal'; +export * from './boolean'; +export * from './initialize'; diff --git a/libs/shared/lib/statistics/utils/attributeStats/initialize.ts b/libs/shared/lib/statistics/utils/attributeStats/initialize.ts new file mode 100644 index 000000000..4f93930d5 --- /dev/null +++ b/libs/shared/lib/statistics/utils/attributeStats/initialize.ts @@ -0,0 +1,44 @@ +import { AttributeType, AttributeTypeStats } from '../../statistics.types'; + +const initializeStatistics = <T extends AttributeType>(type: T): AttributeTypeStats<T> => { + switch (type) { + case 'string': + return { + uniqueItems: 0, + values: [], + mode: '', + } as unknown as AttributeTypeStats<T>; + case 'boolean': + return { + true: 0, + false: 0, + } as unknown as AttributeTypeStats<T>; + case 'number': + return { + min: Infinity, + max: -Infinity, + average: 0, + } as unknown as AttributeTypeStats<T>; + case 'date': + case 'time': + case 'datetime': + case 'timestamp': + return { + min: Infinity, + max: -Infinity, + range: 0, + } as unknown as AttributeTypeStats<T>; + case 'array': + return { + length: 0, + } as unknown as AttributeTypeStats<T>; + case 'object': + return { + length: 0, + } as unknown as AttributeTypeStats<T>; + default: + throw new Error(`Unknown attribute type: ${type}`); + } +}; + +export { initializeStatistics }; diff --git a/libs/shared/lib/statistics/utils/attributeStats/numerical.ts b/libs/shared/lib/statistics/utils/attributeStats/numerical.ts new file mode 100644 index 000000000..77b319921 --- /dev/null +++ b/libs/shared/lib/statistics/utils/attributeStats/numerical.ts @@ -0,0 +1,11 @@ +import { NumericalStats } from '../../statistics.types'; + +const updateNumericalStats = (stats: NumericalStats, value: number) => { + if (stats.min === undefined || value < stats.min) stats.min = value; + if (stats.max === undefined || value > stats.max) stats.max = value; + + stats.count++; + stats.average = (stats.average * (stats.count - 1) + value) / stats.count; +}; + +export { updateNumericalStats }; diff --git a/libs/shared/lib/statistics/utils/attributeStats/object.ts b/libs/shared/lib/statistics/utils/attributeStats/object.ts new file mode 100644 index 000000000..f3d0de1bc --- /dev/null +++ b/libs/shared/lib/statistics/utils/attributeStats/object.ts @@ -0,0 +1,7 @@ +import { ObjectStats } from '../../statistics.types'; + +const updateObjectStats = (stats: ObjectStats, value: object) => { + stats.length = Object.keys(value).length; +}; + +export { updateObjectStats }; diff --git a/libs/shared/lib/statistics/utils/attributeStats/temporal.ts b/libs/shared/lib/statistics/utils/attributeStats/temporal.ts new file mode 100644 index 000000000..58694cbb8 --- /dev/null +++ b/libs/shared/lib/statistics/utils/attributeStats/temporal.ts @@ -0,0 +1,12 @@ +import { TemporalStats } from '../../statistics.types'; + +const updateTemporalStats = (stats: TemporalStats, value: string | Date) => { + const timestamp = value instanceof Date ? value.getTime() : new Date(value).getTime(); + + if (stats.min === undefined || timestamp < stats.min) stats.min = timestamp; + if (stats.max === undefined || timestamp > stats.max) stats.max = timestamp; + + stats.range = stats.max - stats.min; +}; + +export { updateTemporalStats }; diff --git a/libs/shared/lib/statistics/utils/getAttributeType.ts b/libs/shared/lib/statistics/utils/getAttributeType.ts new file mode 100644 index 000000000..6b185fb01 --- /dev/null +++ b/libs/shared/lib/statistics/utils/getAttributeType.ts @@ -0,0 +1,78 @@ +import { AttributeType } from '../statistics.types'; + +// Check if a string is a valid date in the YYYY-MM-DD format +const isValidDate = (value: string): boolean => { + const dateRegex = /^\d{4}-\d{2}-\d{2}$/; // Matches YYYY-MM-DD format + const [year, month, day] = value.split('-').map(Number); + const date = new Date(value); + + // Check if the regex matches, and if the date is valid (correct month/day conversion) + return ( + dateRegex.test(value) && + date.getFullYear() === year && + date.getMonth() + 1 === month && // Months are 0-based in JS Date + date.getDate() === day + ); +}; + +// Check if a string is a valid time in the hh:mm:ss format +const isValidTime = (value: string): boolean => { + const timeRegex = /^([01]\d|2[0-3]):([0-5]\d):([0-5]\d)$/; + return timeRegex.test(value); +}; + +// Check if a string is a valid datetime in the YYYY-MM-DD hh:mm:ss format +const isValidDatetime = (value: string): boolean => { + const datetimeRegex = /^\d{4}-\d{2}-\d{2} ([01]\d|2[0-3]):([0-5]\d):([0-5]\d)$/; + const [date, time] = value.split(' '); + + return datetimeRegex.test(value) && isValidDate(date) && isValidTime(time); +}; + +// Check if a string is a valid number +const isValidNumber = (value: string): boolean => { + return !isNaN(Number(value)) && !isNaN(parseFloat(value)); +}; + +// Determines the type of an attribute +const getAttributeType = (value: any): AttributeType => { + if (typeof value === 'string') { + if (isValidNumber(value)) { + return 'number'; + } + if (isValidDatetime(value)) { + return 'datetime'; + } + if (isValidDate(value)) { + return 'date'; + } + if (isValidTime(value)) { + return 'time'; + } + return 'string'; + } + + if (typeof value === 'boolean') { + return 'boolean'; + } + + if (typeof value === 'number') { + return 'number'; + } + + if (Array.isArray(value)) { + return 'array'; + } + + if (value instanceof Date) { + return 'datetime'; + } + + if (typeof value === 'object' && value !== null) { + return 'object'; + } + + return 'string'; +}; + +export { getAttributeType }; diff --git a/libs/shared/lib/statistics/utils/getNodeOrEdgeType.ts b/libs/shared/lib/statistics/utils/getNodeOrEdgeType.ts new file mode 100644 index 000000000..54caff5a0 --- /dev/null +++ b/libs/shared/lib/statistics/utils/getNodeOrEdgeType.ts @@ -0,0 +1,23 @@ +import { GraphQueryResultFromBackend } from '../../data-access/store/graphQueryResultSlice'; + +// Get node type based on _id or label +const getNodeLabel = (node: GraphQueryResultFromBackend['nodes'][number]): string => { + let nodeType = node._id.split('/')[0]; + if (node.label) nodeType = node.label; + else if (Array.isArray(node.attributes?.labels) && node.attributes.labels.length > 0) { + nodeType = node.attributes.labels[0]; // Safely access first label + } + + return nodeType; +}; + +// Get edge type based on _id or attributes +const getEdgeType = (edge: GraphQueryResultFromBackend['edges'][number]): string => { + let edgeType = edge._id.split('/')[0]; + if (!edge._id.includes('/')) { + edgeType = edge.attributes.Type as string; + } + return edgeType; +}; + +export { getNodeLabel, getEdgeType }; diff --git a/libs/shared/lib/statistics/utils/index.ts b/libs/shared/lib/statistics/utils/index.ts new file mode 100644 index 000000000..41a5c208d --- /dev/null +++ b/libs/shared/lib/statistics/utils/index.ts @@ -0,0 +1,4 @@ +export * from './getAttributeType'; +export * from './getNodeOrEdgeType'; +export * from './attributeStats'; +export * from './updateStatistics'; diff --git a/libs/shared/lib/statistics/utils/updateStatistics.ts b/libs/shared/lib/statistics/utils/updateStatistics.ts new file mode 100644 index 000000000..d43cb9825 --- /dev/null +++ b/libs/shared/lib/statistics/utils/updateStatistics.ts @@ -0,0 +1,47 @@ +import { + ArrayStats, + AttributeStats, + AttributeType, + BooleanStats, + CategoricalStats, + NumericalStats, + ObjectStats, + TemporalStats, +} from '../statistics.types'; +import { + updateArrayStats, + updateCategoricalStats, + updateNumericalStats, + updateObjectStats, + updateTemporalStats, + updateBooleanStats, +} from './attributeStats'; + +// Update statistics based on attribute type and value +const updateStatistics = (attribute: AttributeStats<AttributeType>, value: any) => { + switch (attribute.attributeType) { + case 'number': + updateNumericalStats(attribute.statistics as NumericalStats, value); + break; + case 'string': + updateCategoricalStats(attribute.statistics as CategoricalStats, value); + break; + case 'boolean': + updateBooleanStats(attribute.statistics as BooleanStats, value); + break; + case 'datetime': + case 'timestamp': + case 'date': + case 'time': + updateTemporalStats(attribute.statistics as TemporalStats, value); + break; + case 'array': + updateArrayStats(attribute.statistics as ArrayStats, value); + break; + case 'object': + updateObjectStats(attribute.statistics as ObjectStats, value); + break; + } +}; + +export { updateStatistics }; -- GitLab