diff --git a/libs/shared/lib/statistics/graphStatistics.ts b/libs/shared/lib/statistics/graphStatistics.ts new file mode 100644 index 0000000000000000000000000000000000000000..ffaf9fb4ffbac110ed58f826dfbc34dffedf1812 --- /dev/null +++ b/libs/shared/lib/statistics/graphStatistics.ts @@ -0,0 +1,68 @@ +import { GraphQueryResultFromBackend } from '../data-access/store/graphQueryResultSlice'; +import { GraphStatistics } from './statistics.types'; +import { getAttributeType, getEdgeType, getNodeLabel, initializeStatistics, updateStatistics } from './utils'; + +const getGraphStatistics = (graph: GraphQueryResultFromBackend): GraphStatistics => { + const { nodes, edges } = graph; + + const n_nodes = nodes.length; + const n_edges = edges.length; + + const metaData: GraphStatistics = { + topological: { density: (n_edges * 2) / (n_nodes * (n_nodes - 1)), self_loops: 0 }, + nodes: { labels: [], count: n_nodes, types: {} }, + edges: { labels: [], count: n_edges, types: {} }, + }; + + nodes.forEach((node) => { + const nodeType = getNodeLabel(node); + if (!metaData.nodes.labels.includes(nodeType)) { + metaData.nodes.labels.push(nodeType); + } + + if (!metaData.nodes.types[nodeType]) { + metaData.nodes.types[nodeType] = { count: 0, attributes: {} }; + } + + metaData.nodes.types[nodeType].count++; + + Object.entries(node.attributes).forEach(([attributeId, attributeValue]) => { + const attributeType = getAttributeType(attributeValue); + + if (!metaData.nodes.types[nodeType].attributes[attributeId]) { + metaData.nodes.types[nodeType].attributes[attributeId] = { attributeType, statistics: initializeStatistics(attributeType) }; + } + + updateStatistics(metaData.nodes.types[nodeType].attributes[attributeId], attributeValue); + }); + }); + + edges.forEach((edge) => { + const edgeType = getEdgeType(edge); + if (!metaData.edges.labels.includes(edgeType)) { + metaData.edges.labels.push(edgeType); + } + + if (!metaData.edges.types[edgeType]) { + metaData.edges.types[edgeType] = { count: 0, attributes: {} }; + } + + metaData.edges.types[edgeType].count++; + + if (edge.from === edge.to) metaData.topological.self_loops++; + + Object.entries(edge.attributes).forEach(([attributeId, attributeValue]) => { + const attributeType = getAttributeType(attributeValue); + + if (!metaData.edges.types[edgeType].attributes[attributeId]) { + metaData.edges.types[edgeType].attributes[attributeId] = { attributeType, statistics: initializeStatistics(attributeType) }; + } + + updateStatistics(metaData.edges.types[edgeType].attributes[attributeId], attributeValue); + }); + }); + + return metaData; +}; + +export { getGraphStatistics }; diff --git a/libs/shared/lib/statistics/index.ts b/libs/shared/lib/statistics/index.ts new file mode 100644 index 0000000000000000000000000000000000000000..e00f85d98c5450b43a46e04edac5fed85a9b33fc --- /dev/null +++ b/libs/shared/lib/statistics/index.ts @@ -0,0 +1 @@ +export * from './graphStatistics'; diff --git a/libs/shared/lib/statistics/statistics.types.ts b/libs/shared/lib/statistics/statistics.types.ts new file mode 100644 index 0000000000000000000000000000000000000000..78a0ee3395e31c72d26404a415c2d7a7dadd3523 --- /dev/null +++ b/libs/shared/lib/statistics/statistics.types.ts @@ -0,0 +1,97 @@ +type GraphStatistics = { + topological: TopologicalStats; + nodes: NodeOrEdgeStats; + edges: NodeOrEdgeStats; +}; + +type NodeOrEdgeStats = { + count: number; + labels: string[]; + types: { + [label: string]: { + count: number; + avgDegreeIn?: number; + avgDegreeOut?: number; + attributes: { + [id: string]: AttributeStats<AttributeType>; + }; + }; + }; +}; + +type AttributeStats<T extends AttributeType> = { + attributeType: T; + statistics: AttributeTypeStats<T>; +}; + +type AttributeTypeStats<T extends AttributeType> = T extends 'string' + ? CategoricalStats + : T extends 'boolean' + ? BooleanStats + : T extends 'number' + ? NumericalStats + : T extends 'date' | 'time' | 'datetime' | 'timestamp' + ? TemporalStats + : T extends 'array' + ? ArrayStats + : T extends 'object' + ? ObjectStats + : never; + +type AttributeType = 'string' | 'boolean' | 'number' | 'array' | 'object' | TemporalType; + +type TemporalType = 'date' | 'time' | 'datetime' | 'timestamp'; +// Date: Date in the YYYY-MM-DD format (ISO 8601 syntax) (e.g., 2021-09-28) +// Time: Time in the hh:mm:ss format for the time of day, time since an event, or time interval between events (e.g., 12:00:59) +// Datetime: Date and time together in the YYYY-MM-DD hh:mm:ss format (e.g., 2021-09-28 12:00:59) +// Timestamp: Number of seconds that have elapsed since midnight (00:00:00 UTC), 1st January (Unix time) (e.g., 1632855600) + +type TopologicalStats = { + density: number; + self_loops: number; +}; + +type NumericalStats = { + min: number; + max: number; + average: number; + count: number; +}; + +type BooleanStats = { + true: number; + false: number; +}; + +type CategoricalStats = { + uniqueItems: number; + values: string[]; + mode: string; +}; + +type TemporalStats = { + min: number; + max: number; + range: number; +}; + +type ArrayStats = { + length: number; +}; + +type ObjectStats = { + length: number; +}; + +export type { + GraphStatistics, + AttributeStats, + NumericalStats, + CategoricalStats, + BooleanStats, + TemporalStats, + AttributeType, + AttributeTypeStats, + ArrayStats, + ObjectStats, +}; diff --git a/libs/shared/lib/statistics/utils/attributeStats/array.ts b/libs/shared/lib/statistics/utils/attributeStats/array.ts new file mode 100644 index 0000000000000000000000000000000000000000..c6a7dc55d26d75f745b1f2675d301dea59f535ee --- /dev/null +++ b/libs/shared/lib/statistics/utils/attributeStats/array.ts @@ -0,0 +1,7 @@ +import { ArrayStats } from '../../statistics.types'; + +const updateArrayStats = (stats: ArrayStats, value: any[]) => { + stats.length = value.length; +}; + +export { updateArrayStats }; diff --git a/libs/shared/lib/statistics/utils/attributeStats/boolean.ts b/libs/shared/lib/statistics/utils/attributeStats/boolean.ts new file mode 100644 index 0000000000000000000000000000000000000000..ea7efdf1324465dd693eb8c8e41a51b4f605719d --- /dev/null +++ b/libs/shared/lib/statistics/utils/attributeStats/boolean.ts @@ -0,0 +1,11 @@ +import { BooleanStats } from '../../statistics.types'; + +const updateBooleanStats = (stats: BooleanStats, value: boolean) => { + if (value) { + stats.true += 1; + } else { + stats.false += 1; + } +}; + +export { updateBooleanStats }; diff --git a/libs/shared/lib/statistics/utils/attributeStats/categorical.ts b/libs/shared/lib/statistics/utils/attributeStats/categorical.ts new file mode 100644 index 0000000000000000000000000000000000000000..fc9b128ce8b25c03b49557e0ce471719062bb1d1 --- /dev/null +++ b/libs/shared/lib/statistics/utils/attributeStats/categorical.ts @@ -0,0 +1,16 @@ +import { CategoricalStats } from '../../statistics.types'; + +const updateCategoricalStats = (stats: CategoricalStats, value: string | boolean) => { + if (!stats.values) stats.values = []; + stats.values.push(value.toString()); + + stats.uniqueItems = new Set(stats.values).size; + + const frequencyMap: { [key: string]: number } = {}; + stats.values.forEach((val) => { + frequencyMap[val] = (frequencyMap[val] || 0) + 1; + }); + stats.mode = Object.keys(frequencyMap).reduce((a, b) => (frequencyMap[a] > frequencyMap[b] ? a : b)); +}; + +export { updateCategoricalStats }; diff --git a/libs/shared/lib/statistics/utils/attributeStats/index.ts b/libs/shared/lib/statistics/utils/attributeStats/index.ts new file mode 100644 index 0000000000000000000000000000000000000000..42c450156100639b4eaa6c3a57ad63f4eef20fb9 --- /dev/null +++ b/libs/shared/lib/statistics/utils/attributeStats/index.ts @@ -0,0 +1,7 @@ +export * from './array'; +export * from './categorical'; +export * from './numerical'; +export * from './object'; +export * from './temporal'; +export * from './boolean'; +export * from './initialize'; diff --git a/libs/shared/lib/statistics/utils/attributeStats/initialize.ts b/libs/shared/lib/statistics/utils/attributeStats/initialize.ts new file mode 100644 index 0000000000000000000000000000000000000000..4f93930d50644426fa02fc1ea4ceffa6c401e771 --- /dev/null +++ b/libs/shared/lib/statistics/utils/attributeStats/initialize.ts @@ -0,0 +1,44 @@ +import { AttributeType, AttributeTypeStats } from '../../statistics.types'; + +const initializeStatistics = <T extends AttributeType>(type: T): AttributeTypeStats<T> => { + switch (type) { + case 'string': + return { + uniqueItems: 0, + values: [], + mode: '', + } as unknown as AttributeTypeStats<T>; + case 'boolean': + return { + true: 0, + false: 0, + } as unknown as AttributeTypeStats<T>; + case 'number': + return { + min: Infinity, + max: -Infinity, + average: 0, + } as unknown as AttributeTypeStats<T>; + case 'date': + case 'time': + case 'datetime': + case 'timestamp': + return { + min: Infinity, + max: -Infinity, + range: 0, + } as unknown as AttributeTypeStats<T>; + case 'array': + return { + length: 0, + } as unknown as AttributeTypeStats<T>; + case 'object': + return { + length: 0, + } as unknown as AttributeTypeStats<T>; + default: + throw new Error(`Unknown attribute type: ${type}`); + } +}; + +export { initializeStatistics }; diff --git a/libs/shared/lib/statistics/utils/attributeStats/numerical.ts b/libs/shared/lib/statistics/utils/attributeStats/numerical.ts new file mode 100644 index 0000000000000000000000000000000000000000..77b319921f5320f14b1e73618a4fa01851757030 --- /dev/null +++ b/libs/shared/lib/statistics/utils/attributeStats/numerical.ts @@ -0,0 +1,11 @@ +import { NumericalStats } from '../../statistics.types'; + +const updateNumericalStats = (stats: NumericalStats, value: number) => { + if (stats.min === undefined || value < stats.min) stats.min = value; + if (stats.max === undefined || value > stats.max) stats.max = value; + + stats.count++; + stats.average = (stats.average * (stats.count - 1) + value) / stats.count; +}; + +export { updateNumericalStats }; diff --git a/libs/shared/lib/statistics/utils/attributeStats/object.ts b/libs/shared/lib/statistics/utils/attributeStats/object.ts new file mode 100644 index 0000000000000000000000000000000000000000..f3d0de1bce50ff41133e226bb1524500f19b453c --- /dev/null +++ b/libs/shared/lib/statistics/utils/attributeStats/object.ts @@ -0,0 +1,7 @@ +import { ObjectStats } from '../../statistics.types'; + +const updateObjectStats = (stats: ObjectStats, value: object) => { + stats.length = Object.keys(value).length; +}; + +export { updateObjectStats }; diff --git a/libs/shared/lib/statistics/utils/attributeStats/temporal.ts b/libs/shared/lib/statistics/utils/attributeStats/temporal.ts new file mode 100644 index 0000000000000000000000000000000000000000..58694cbb8d830b94155536f8e2fe06a5209d3b68 --- /dev/null +++ b/libs/shared/lib/statistics/utils/attributeStats/temporal.ts @@ -0,0 +1,12 @@ +import { TemporalStats } from '../../statistics.types'; + +const updateTemporalStats = (stats: TemporalStats, value: string | Date) => { + const timestamp = value instanceof Date ? value.getTime() : new Date(value).getTime(); + + if (stats.min === undefined || timestamp < stats.min) stats.min = timestamp; + if (stats.max === undefined || timestamp > stats.max) stats.max = timestamp; + + stats.range = stats.max - stats.min; +}; + +export { updateTemporalStats }; diff --git a/libs/shared/lib/statistics/utils/getAttributeType.ts b/libs/shared/lib/statistics/utils/getAttributeType.ts new file mode 100644 index 0000000000000000000000000000000000000000..3d8c0051d0c8c8f36152a166466b821e71d1222c --- /dev/null +++ b/libs/shared/lib/statistics/utils/getAttributeType.ts @@ -0,0 +1,78 @@ +import { AttributeType } from '../statistics.types'; + +// Check if a string is a valid date in the YYYY-MM-DD format +const isValidDate = (value: string): boolean => { + const dateRegex = /^\d{4}-\d{2}-\d{2}$/; // Matches YYYY-MM-DD format + const date = new Date(value); + return ( + dateRegex.test(value) && + date.getFullYear() === Number(value.slice(0, 4)) && + date.getMonth() === Number(value.slice(5, 7)) - 1 && + date.getDate() === Number(value.slice(8, 10)) + ); +}; + +// Check if a string is a valid time in the hh:mm:ss format +const isValidTime = (value: string): boolean => { + const timeRegex = /^([01]\d|2[0-3]):([0-5]\d):([0-5]\d)$/; + return timeRegex.test(value); +}; + +// Check if a string is a valid datetime in the YYYY-MM-DD hh:mm:ss format +const isValidDatetime = (value: string): boolean => { + const datetimeRegex = /^\d{4}-\d{2}-\d{2} ([01]\d|2[0-3]):([0-5]\d):([0-5]\d)$/; // Correct regex + const dateTimeParts = value.split(' '); + const date = dateTimeParts[0]; + const time = dateTimeParts[1]; + + return datetimeRegex.test(value) && isValidDate(date) && isValidTime(time); +}; + +// Check if a string is a valid number +const isValidNumber = (value: string): boolean => { + return !isNaN(Number(value)) && !isNaN(parseFloat(value)); +}; + +// Determines the type of an attribute +const getAttributeType = (value: any): AttributeType => { + // Check for string representation of a number + if (typeof value === 'string') { + if (isValidNumber(value)) { + return 'number'; + } + if (isValidDatetime(value)) { + return 'datetime'; + } + if (isValidDate(value)) { + return 'date'; + } + if (isValidTime(value)) { + return 'time'; + } + return 'string'; + } + + if (typeof value === 'boolean') { + return 'boolean'; + } + + if (typeof value === 'number') { + return 'number'; + } + + if (Array.isArray(value)) { + return 'array'; + } + + if (value instanceof Date) { + return 'datetime'; // Assumes Date instance should be treated as datetime + } + // Check for object type + if (typeof value === 'object' && value !== null) { + return 'object'; + } + + return 'string'; // Default to string if uncertain +}; + +export { getAttributeType }; diff --git a/libs/shared/lib/statistics/utils/getNodeOrEdgeType.ts b/libs/shared/lib/statistics/utils/getNodeOrEdgeType.ts new file mode 100644 index 0000000000000000000000000000000000000000..54caff5a0692faad3aa6bbcfde821f3656ec0fe2 --- /dev/null +++ b/libs/shared/lib/statistics/utils/getNodeOrEdgeType.ts @@ -0,0 +1,23 @@ +import { GraphQueryResultFromBackend } from '../../data-access/store/graphQueryResultSlice'; + +// Get node type based on _id or label +const getNodeLabel = (node: GraphQueryResultFromBackend['nodes'][number]): string => { + let nodeType = node._id.split('/')[0]; + if (node.label) nodeType = node.label; + else if (Array.isArray(node.attributes?.labels) && node.attributes.labels.length > 0) { + nodeType = node.attributes.labels[0]; // Safely access first label + } + + return nodeType; +}; + +// Get edge type based on _id or attributes +const getEdgeType = (edge: GraphQueryResultFromBackend['edges'][number]): string => { + let edgeType = edge._id.split('/')[0]; + if (!edge._id.includes('/')) { + edgeType = edge.attributes.Type as string; + } + return edgeType; +}; + +export { getNodeLabel, getEdgeType }; diff --git a/libs/shared/lib/statistics/utils/index.ts b/libs/shared/lib/statistics/utils/index.ts new file mode 100644 index 0000000000000000000000000000000000000000..41a5c208dac85a4a748f831f881195a2327d44be --- /dev/null +++ b/libs/shared/lib/statistics/utils/index.ts @@ -0,0 +1,4 @@ +export * from './getAttributeType'; +export * from './getNodeOrEdgeType'; +export * from './attributeStats'; +export * from './updateStatistics'; diff --git a/libs/shared/lib/statistics/utils/updateStatistics.ts b/libs/shared/lib/statistics/utils/updateStatistics.ts new file mode 100644 index 0000000000000000000000000000000000000000..d43cb98257e53148f477a7ad991c51a14d4361c3 --- /dev/null +++ b/libs/shared/lib/statistics/utils/updateStatistics.ts @@ -0,0 +1,47 @@ +import { + ArrayStats, + AttributeStats, + AttributeType, + BooleanStats, + CategoricalStats, + NumericalStats, + ObjectStats, + TemporalStats, +} from '../statistics.types'; +import { + updateArrayStats, + updateCategoricalStats, + updateNumericalStats, + updateObjectStats, + updateTemporalStats, + updateBooleanStats, +} from './attributeStats'; + +// Update statistics based on attribute type and value +const updateStatistics = (attribute: AttributeStats<AttributeType>, value: any) => { + switch (attribute.attributeType) { + case 'number': + updateNumericalStats(attribute.statistics as NumericalStats, value); + break; + case 'string': + updateCategoricalStats(attribute.statistics as CategoricalStats, value); + break; + case 'boolean': + updateBooleanStats(attribute.statistics as BooleanStats, value); + break; + case 'datetime': + case 'timestamp': + case 'date': + case 'time': + updateTemporalStats(attribute.statistics as TemporalStats, value); + break; + case 'array': + updateArrayStats(attribute.statistics as ArrayStats, value); + break; + case 'object': + updateObjectStats(attribute.statistics as ObjectStats, value); + break; + } +}; + +export { updateStatistics };