Skip to content
Snippets Groups Projects
Commit a82268fe authored by Vink, S.A. (Sjoerd)'s avatar Vink, S.A. (Sjoerd) Committed by Leonardo Christino
Browse files

feat(statistics): refactor of statistics

# Conflicts:
#	libs/shared/lib/insight-sharing/SettingsPanel.tsx
parent b11f32c1
No related branches found
No related tags found
1 merge request!255refactor: statistics on result set
This commit is part of merge request !255. Comments created here will be created in the context of that merge request.
Showing
with 433 additions and 0 deletions
import { GraphQueryResultFromBackend } from '../data-access/store/graphQueryResultSlice';
import { GraphStatistics } from './statistics.types';
import { getAttributeType, getEdgeType, getNodeLabel, initializeStatistics, updateStatistics } from './utils';
const getGraphStatistics = (graph: GraphQueryResultFromBackend): GraphStatistics => {
const { nodes, edges } = graph;
const n_nodes = nodes.length;
const n_edges = edges.length;
const metaData: GraphStatistics = {
topological: { density: (n_edges * 2) / (n_nodes * (n_nodes - 1)), self_loops: 0 },
nodes: { labels: [], count: n_nodes, types: {} },
edges: { labels: [], count: n_edges, types: {} },
};
nodes.forEach((node) => {
const nodeType = getNodeLabel(node);
if (!metaData.nodes.labels.includes(nodeType)) {
metaData.nodes.labels.push(nodeType);
}
if (!metaData.nodes.types[nodeType]) {
metaData.nodes.types[nodeType] = { count: 0, attributes: {} };
}
metaData.nodes.types[nodeType].count++;
Object.entries(node.attributes).forEach(([attributeId, attributeValue]) => {
const attributeType = getAttributeType(attributeValue);
if (!metaData.nodes.types[nodeType].attributes[attributeId]) {
metaData.nodes.types[nodeType].attributes[attributeId] = { attributeType, statistics: initializeStatistics(attributeType) };
}
updateStatistics(metaData.nodes.types[nodeType].attributes[attributeId], attributeValue);
});
});
edges.forEach((edge) => {
const edgeType = getEdgeType(edge);
if (!metaData.edges.labels.includes(edgeType)) {
metaData.edges.labels.push(edgeType);
}
if (!metaData.edges.types[edgeType]) {
metaData.edges.types[edgeType] = { count: 0, attributes: {} };
}
metaData.edges.types[edgeType].count++;
if (edge.from === edge.to) metaData.topological.self_loops++;
Object.entries(edge.attributes).forEach(([attributeId, attributeValue]) => {
const attributeType = getAttributeType(attributeValue);
if (!metaData.edges.types[edgeType].attributes[attributeId]) {
metaData.edges.types[edgeType].attributes[attributeId] = { attributeType, statistics: initializeStatistics(attributeType) };
}
updateStatistics(metaData.edges.types[edgeType].attributes[attributeId], attributeValue);
});
});
return metaData;
};
export { getGraphStatistics };
export * from './graphStatistics';
type GraphStatistics = {
topological: TopologicalStats;
nodes: NodeOrEdgeStats;
edges: NodeOrEdgeStats;
};
type NodeOrEdgeStats = {
count: number;
labels: string[];
types: {
[label: string]: {
count: number;
avgDegreeIn?: number;
avgDegreeOut?: number;
attributes: {
[id: string]: AttributeStats<AttributeType>;
};
};
};
};
type AttributeStats<T extends AttributeType> = {
attributeType: T;
statistics: AttributeTypeStats<T>;
};
type AttributeTypeStats<T extends AttributeType> = T extends 'string'
? CategoricalStats
: T extends 'boolean'
? BooleanStats
: T extends 'number'
? NumericalStats
: T extends 'date' | 'time' | 'datetime' | 'timestamp'
? TemporalStats
: T extends 'array'
? ArrayStats
: T extends 'object'
? ObjectStats
: never;
type AttributeType = 'string' | 'boolean' | 'number' | 'array' | 'object' | TemporalType;
type TemporalType = 'date' | 'time' | 'datetime' | 'timestamp';
// Date: Date in the YYYY-MM-DD format (ISO 8601 syntax) (e.g., 2021-09-28)
// Time: Time in the hh:mm:ss format for the time of day, time since an event, or time interval between events (e.g., 12:00:59)
// Datetime: Date and time together in the YYYY-MM-DD hh:mm:ss format (e.g., 2021-09-28 12:00:59)
// Timestamp: Number of seconds that have elapsed since midnight (00:00:00 UTC), 1st January (Unix time) (e.g., 1632855600)
type TopologicalStats = {
density: number;
self_loops: number;
};
type NumericalStats = {
min: number;
max: number;
average: number;
count: number;
};
type BooleanStats = {
true: number;
false: number;
};
type CategoricalStats = {
uniqueItems: number;
values: string[];
mode: string;
};
type TemporalStats = {
min: number;
max: number;
range: number;
};
type ArrayStats = {
length: number;
};
type ObjectStats = {
length: number;
};
export type {
GraphStatistics,
AttributeStats,
NumericalStats,
CategoricalStats,
BooleanStats,
TemporalStats,
AttributeType,
AttributeTypeStats,
ArrayStats,
ObjectStats,
};
import { ArrayStats } from '../../statistics.types';
const updateArrayStats = (stats: ArrayStats, value: any[]) => {
stats.length = value.length;
};
export { updateArrayStats };
import { BooleanStats } from '../../statistics.types';
const updateBooleanStats = (stats: BooleanStats, value: boolean) => {
if (value) {
stats.true += 1;
} else {
stats.false += 1;
}
};
export { updateBooleanStats };
import { CategoricalStats } from '../../statistics.types';
const updateCategoricalStats = (stats: CategoricalStats, value: string | boolean) => {
if (!stats.values) stats.values = [];
stats.values.push(value.toString());
stats.uniqueItems = new Set(stats.values).size;
const frequencyMap: { [key: string]: number } = {};
stats.values.forEach((val) => {
frequencyMap[val] = (frequencyMap[val] || 0) + 1;
});
stats.mode = Object.keys(frequencyMap).reduce((a, b) => (frequencyMap[a] > frequencyMap[b] ? a : b));
};
export { updateCategoricalStats };
export * from './array';
export * from './categorical';
export * from './numerical';
export * from './object';
export * from './temporal';
export * from './boolean';
export * from './initialize';
import { AttributeType, AttributeTypeStats } from '../../statistics.types';
const initializeStatistics = <T extends AttributeType>(type: T): AttributeTypeStats<T> => {
switch (type) {
case 'string':
return {
uniqueItems: 0,
values: [],
mode: '',
} as unknown as AttributeTypeStats<T>;
case 'boolean':
return {
true: 0,
false: 0,
} as unknown as AttributeTypeStats<T>;
case 'number':
return {
min: Infinity,
max: -Infinity,
average: 0,
} as unknown as AttributeTypeStats<T>;
case 'date':
case 'time':
case 'datetime':
case 'timestamp':
return {
min: Infinity,
max: -Infinity,
range: 0,
} as unknown as AttributeTypeStats<T>;
case 'array':
return {
length: 0,
} as unknown as AttributeTypeStats<T>;
case 'object':
return {
length: 0,
} as unknown as AttributeTypeStats<T>;
default:
throw new Error(`Unknown attribute type: ${type}`);
}
};
export { initializeStatistics };
import { NumericalStats } from '../../statistics.types';
const updateNumericalStats = (stats: NumericalStats, value: number) => {
if (stats.min === undefined || value < stats.min) stats.min = value;
if (stats.max === undefined || value > stats.max) stats.max = value;
stats.count++;
stats.average = (stats.average * (stats.count - 1) + value) / stats.count;
};
export { updateNumericalStats };
import { ObjectStats } from '../../statistics.types';
const updateObjectStats = (stats: ObjectStats, value: object) => {
stats.length = Object.keys(value).length;
};
export { updateObjectStats };
import { TemporalStats } from '../../statistics.types';
const updateTemporalStats = (stats: TemporalStats, value: string | Date) => {
const timestamp = value instanceof Date ? value.getTime() : new Date(value).getTime();
if (stats.min === undefined || timestamp < stats.min) stats.min = timestamp;
if (stats.max === undefined || timestamp > stats.max) stats.max = timestamp;
stats.range = stats.max - stats.min;
};
export { updateTemporalStats };
import { AttributeType } from '../statistics.types';
// Check if a string is a valid date in the YYYY-MM-DD format
const isValidDate = (value: string): boolean => {
const dateRegex = /^\d{4}-\d{2}-\d{2}$/; // Matches YYYY-MM-DD format
const date = new Date(value);
return (
dateRegex.test(value) &&
date.getFullYear() === Number(value.slice(0, 4)) &&
date.getMonth() === Number(value.slice(5, 7)) - 1 &&
date.getDate() === Number(value.slice(8, 10))
);
};
// Check if a string is a valid time in the hh:mm:ss format
const isValidTime = (value: string): boolean => {
const timeRegex = /^([01]\d|2[0-3]):([0-5]\d):([0-5]\d)$/;
return timeRegex.test(value);
};
// Check if a string is a valid datetime in the YYYY-MM-DD hh:mm:ss format
const isValidDatetime = (value: string): boolean => {
const datetimeRegex = /^\d{4}-\d{2}-\d{2} ([01]\d|2[0-3]):([0-5]\d):([0-5]\d)$/; // Correct regex
const dateTimeParts = value.split(' ');
const date = dateTimeParts[0];
const time = dateTimeParts[1];
return datetimeRegex.test(value) && isValidDate(date) && isValidTime(time);
};
// Check if a string is a valid number
const isValidNumber = (value: string): boolean => {
return !isNaN(Number(value)) && !isNaN(parseFloat(value));
};
// Determines the type of an attribute
const getAttributeType = (value: any): AttributeType => {
// Check for string representation of a number
if (typeof value === 'string') {
if (isValidNumber(value)) {
return 'number';
}
if (isValidDatetime(value)) {
return 'datetime';
}
if (isValidDate(value)) {
return 'date';
}
if (isValidTime(value)) {
return 'time';
}
return 'string';
}
if (typeof value === 'boolean') {
return 'boolean';
}
if (typeof value === 'number') {
return 'number';
}
if (Array.isArray(value)) {
return 'array';
}
if (value instanceof Date) {
return 'datetime'; // Assumes Date instance should be treated as datetime
}
// Check for object type
if (typeof value === 'object' && value !== null) {
return 'object';
}
return 'string'; // Default to string if uncertain
};
export { getAttributeType };
import { GraphQueryResultFromBackend } from '../../data-access/store/graphQueryResultSlice';
// Get node type based on _id or label
const getNodeLabel = (node: GraphQueryResultFromBackend['nodes'][number]): string => {
let nodeType = node._id.split('/')[0];
if (node.label) nodeType = node.label;
else if (Array.isArray(node.attributes?.labels) && node.attributes.labels.length > 0) {
nodeType = node.attributes.labels[0]; // Safely access first label
}
return nodeType;
};
// Get edge type based on _id or attributes
const getEdgeType = (edge: GraphQueryResultFromBackend['edges'][number]): string => {
let edgeType = edge._id.split('/')[0];
if (!edge._id.includes('/')) {
edgeType = edge.attributes.Type as string;
}
return edgeType;
};
export { getNodeLabel, getEdgeType };
export * from './getAttributeType';
export * from './getNodeOrEdgeType';
export * from './attributeStats';
export * from './updateStatistics';
import {
ArrayStats,
AttributeStats,
AttributeType,
BooleanStats,
CategoricalStats,
NumericalStats,
ObjectStats,
TemporalStats,
} from '../statistics.types';
import {
updateArrayStats,
updateCategoricalStats,
updateNumericalStats,
updateObjectStats,
updateTemporalStats,
updateBooleanStats,
} from './attributeStats';
// Update statistics based on attribute type and value
const updateStatistics = (attribute: AttributeStats<AttributeType>, value: any) => {
switch (attribute.attributeType) {
case 'number':
updateNumericalStats(attribute.statistics as NumericalStats, value);
break;
case 'string':
updateCategoricalStats(attribute.statistics as CategoricalStats, value);
break;
case 'boolean':
updateBooleanStats(attribute.statistics as BooleanStats, value);
break;
case 'datetime':
case 'timestamp':
case 'date':
case 'time':
updateTemporalStats(attribute.statistics as TemporalStats, value);
break;
case 'array':
updateArrayStats(attribute.statistics as ArrayStats, value);
break;
case 'object':
updateObjectStats(attribute.statistics as ObjectStats, value);
break;
}
};
export { updateStatistics };
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment