From 20dba2aec6134c6007969c31da73b7b27a5627fb Mon Sep 17 00:00:00 2001 From: Leonardo <leomilho@gmail.com> Date: Mon, 3 Mar 2025 11:49:11 +0100 Subject: [PATCH] feat: move sql converter to json format --- .../sql/queryConverterSql.test.ts | 619 +++++++++++++++++- src/queryExecution/sql/queryConverterSql.ts | 324 ++++----- src/readers/queryService.ts | 1 - 3 files changed, 697 insertions(+), 247 deletions(-) diff --git a/src/queryExecution/sql/queryConverterSql.test.ts b/src/queryExecution/sql/queryConverterSql.test.ts index ec9d796..865a20f 100644 --- a/src/queryExecution/sql/queryConverterSql.test.ts +++ b/src/queryExecution/sql/queryConverterSql.test.ts @@ -55,10 +55,81 @@ describe('query2SQL', () => { }; const sql = query2SQL(query); - const expectedSQL = `SELECT * FROM Person p1 - JOIN Movie m1 ON p1.name = m1.director - JOIN Genre g1 ON p1.name = g1.person - LIMIT 5000;`; + const expectedSQL = `WITH nodes_cte AS ( + SELECT jsonb_agg(node) AS nodes + FROM ( + SELECT jsonb_build_object( + '_id', concat('Person-', p1.id::text), + 'label', 'Person', + 'attributes', to_jsonb(p1) - 'id' + ) AS node + FROM Person p1 + UNION ALL + SELECT jsonb_build_object( + '_id', concat('Movie-', m1.id::text), + 'label', 'Movie', + 'attributes', to_jsonb(m1) - 'id' + ) AS node + FROM Movie m1 + UNION ALL + SELECT jsonb_build_object( + '_id', concat('Genre-', g1.id::text), + 'label', 'Genre', + 'attributes', to_jsonb(g1) - 'id' + ) AS node + FROM Genre g1 + ) sub +), +edges_cte AS ( + SELECT jsonb_agg( + jsonb_build_object( + '_id', concat('Person-', p1.id::text, '-Movie-', m1.id::text), + 'label', 'name.director', + 'attributes', '{}'::jsonb, + 'from', p1.id::text, + 'to', m1.id::text + ) + ) AS edges + FROM Person p1 + JOIN Movie m1 ON p1.name = m1.director + UNION ALL + SELECT jsonb_agg( + jsonb_build_object( + '_id', concat('Person-', p1.id::text, '-Genre-', g1.id::text), + 'label', 'name.person', + 'attributes', '{}'::jsonb, + 'from', p1.id::text, + 'to', g1.id::text + ) + ) AS edges + FROM Person p1 + JOIN Genre g1 ON p1.name = g1.person +), +node_counts AS ( + SELECT label, COUNT(*) AS count + FROM ( + SELECT 'Person' AS label FROM Person + UNION ALL + SELECT 'Movie' AS label FROM Movie + UNION ALL + SELECT 'Genre' AS label FROM Genre + ) t + GROUP BY label +), +edge_counts AS ( + SELECT 'name.director' AS label, COUNT(*) AS count FROM Person p1 JOIN Movie m1 ON p1.name = m1.director GROUP BY label + UNION ALL + SELECT 'name.person' AS label, COUNT(*) AS count FROM Person p1 JOIN Genre g1 ON p1.name = g1.person GROUP BY label +) +SELECT jsonb_build_object( + 'nodes', nodes_cte.nodes, + 'edges', COALESCE(edges_cte.edges, '[]'::jsonb), + 'nodeCounts', COALESCE((SELECT jsonb_agg(jsonb_build_object('label', label, 'count', count)) + FROM node_counts), '[]'::jsonb), + 'edgeCounts', COALESCE((SELECT jsonb_agg(jsonb_build_object('label', label, 'count', count)) + FROM edge_counts), '[]'::jsonb) +) AS result +FROM nodes_cte, edges_cte`; expect(fixSQLSpaces(sql.query)).toBe(fixSQLSpaces(expectedSQL)); }); @@ -106,10 +177,81 @@ describe('query2SQL', () => { }; const sql = query2SQL(query); - const expectedSQL = `SELECT * FROM Person p1 - JOIN Movie m1 ON p1.watched = m1.id - JOIN Genre g1 ON p1.watched_genre = g1.id - WHERE p1.name <> 'Raymond Campbell' + const expectedSQL = `WITH nodes_cte AS ( + SELECT jsonb_agg(node) AS nodes + FROM ( + SELECT jsonb_build_object( + '_id', concat('Person-', p1.id::text), + 'label', 'Person', + 'attributes', to_jsonb(p1) - 'id' + ) AS node + FROM Person p1 + UNION ALL + SELECT jsonb_build_object( + '_id', concat('Movie-', m1.id::text), + 'label', 'Movie', + 'attributes', to_jsonb(m1) - 'id' + ) AS node + FROM Movie m1 + UNION ALL + SELECT jsonb_build_object( + '_id', concat('Genre-', g1.id::text), + 'label', 'Genre', + 'attributes', to_jsonb(g1) - 'id' + ) AS node + FROM Genre g1 + ) sub +), +edges_cte AS ( + SELECT jsonb_agg( + jsonb_build_object( + '_id', concat('Person-', p1.id::text, '-Movie-', m1.id::text), + 'label', 'watched.id', + 'attributes', '{}'::jsonb, + 'from', p1.id::text, + 'to', m1.id::text + ) + ) AS edges + FROM Person p1 + JOIN Movie m1 ON p1.watched = m1.id + UNION ALL + SELECT jsonb_agg( + jsonb_build_object( + '_id', concat('Person-', p1.id::text, '-Genre-', g1.id::text), + 'label', 'watched_genre.id', + 'attributes', '{}'::jsonb, + 'from', p1.id::text, + 'to', g1.id::text + ) + ) AS edges + FROM Person p1 + JOIN Genre g1 ON p1.watched_genre = g1.id +), +node_counts AS ( + SELECT label, COUNT(*) AS count + FROM ( + SELECT 'Person' AS label FROM Person + UNION ALL + SELECT 'Movie' AS label FROM Movie + UNION ALL + SELECT 'Genre' AS label FROM Genre + ) t + GROUP BY label +), +edge_counts AS ( + SELECT 'watched.id' AS label, COUNT(*) AS count FROM Person p1 JOIN Movie m1 ON p1.watched = m1.id GROUP BY label + UNION ALL + SELECT 'watched_genre.id' AS label, COUNT(*) AS count FROM Person p1 JOIN Genre g1 ON p1.watched_genre = g1.id GROUP BY label +) +SELECT jsonb_build_object( + 'nodes', nodes_cte.nodes, + 'edges', COALESCE(edges_cte.edges, '[]'::jsonb), + 'nodeCounts', COALESCE((SELECT jsonb_agg(jsonb_build_object('label', label, 'count', count)) + FROM node_counts), '[]'::jsonb), + 'edgeCounts', COALESCE((SELECT jsonb_agg(jsonb_build_object('label', label, 'count', count)) + FROM edge_counts), '[]'::jsonb) +) AS result + WHERE p1.name != 'Raymond Campbell' LIMIT 5000;`; expect(fixSQLSpaces(sql.query)).toBe(fixSQLSpaces(expectedSQL)); @@ -187,8 +329,59 @@ describe('query2SQL', () => { }; const sql = query2SQL(query); - const expectedSQL = `SELECT * FROM p1 - JOIN Movie movie ON p1.acted = movie.id + const expectedSQL = `WITH nodes_cte AS ( + SELECT jsonb_agg(node) AS nodes + FROM ( + -- Nodes from p1 + SELECT jsonb_build_object( + '_id', concat('p1-', p1.id::text), + 'label', 'p1', + 'attributes', to_jsonb(p1) - 'id' + ) AS node + FROM p1 + UNION ALL + -- Nodes from Movie + SELECT jsonb_build_object( + '_id', concat('Movie-', movie.id::text), + 'label', 'Movie', + 'attributes', to_jsonb(movie) - 'id' + ) AS node + FROM Movie movie + ) sub +), +edges_cte AS ( + SELECT jsonb_agg( + jsonb_build_object( + '_id', concat('p1-', p1.id::text, '-Movie-', movie.id::text), + 'label', 'acted.id', + 'attributes', '{}'::jsonb, + 'from', p1.id::text, + 'to', movie.id::text + ) + ) AS edges + FROM p1 + JOIN Movie movie ON p1.acted = movie.id +), +node_counts AS ( + SELECT label, COUNT(*) AS count + FROM ( + SELECT 'p1' AS label FROM p1 + UNION ALL + SELECT 'Movie' AS label FROM Movie + ) t + GROUP BY label +), +edge_counts AS ( + SELECT 'acted.id' AS label, COUNT(*) AS count FROM p1 JOIN Movie movie ON p1.acted = movie.id GROUP BY label +) +SELECT jsonb_build_object( + 'nodes', nodes_cte.nodes, + 'edges', COALESCE(edges_cte.edges, '[]'::jsonb), + 'nodeCounts', COALESCE((SELECT jsonb_agg(jsonb_build_object('label', label, 'count', count)) + FROM node_counts), '[]'::jsonb), + 'edgeCounts', COALESCE((SELECT jsonb_agg(jsonb_build_object('label', label, 'count', count)) + FROM edge_counts), '[]'::jsonb) +) AS result WHERE (movie.year - p1.year) < 10 LIMIT 5000;`; @@ -263,11 +456,60 @@ describe('query2SQL', () => { }; const sql = query2SQL(query); - const expectedSQL = `WITH (SELECT AVG(p1.age) FROM Person p1 - JOIN Movie movie ON p1.acted = movie.id) AS p1_age_avg - SELECT * FROM Person p1 - JOIN Movie movie ON p1.acted = movie.id - WHERE p1.age < p1_age_avg + const expectedSQL = `WITH nodes_cte AS ( + SELECT jsonb_agg(node) AS nodes + FROM ( + -- Nodes from Person + SELECT jsonb_build_object( + '_id', concat('Person-', p1.id::text), + 'label', 'Person', + 'attributes', to_jsonb(p1) - 'id' + ) AS node + FROM Person p1 + UNION ALL + -- Nodes from Movie + SELECT jsonb_build_object( + '_id', concat('Movie-', movie.id::text), + 'label', 'Movie', + 'attributes', to_jsonb(movie) - 'id' + ) AS node + FROM Movie movie + ) sub +), +edges_cte AS ( + SELECT jsonb_agg( + jsonb_build_object( + '_id', concat('Person-', p1.id::text, '-Movie-', movie.id::text), + 'label', 'acted.id', + 'attributes', '{}'::jsonb, + 'from', p1.id::text, + 'to', movie.id::text + ) + ) AS edges + FROM Person p1 + JOIN Movie movie ON p1.acted = movie.id +), +node_counts AS ( + SELECT label, COUNT(*) AS count + FROM ( + SELECT 'Person' AS label FROM Person + UNION ALL + SELECT 'Movie' AS label FROM Movie + ) t + GROUP BY label +), +edge_counts AS ( + SELECT 'acted.id' AS label, COUNT(*) AS count FROM Person p1 JOIN Movie movie ON p1.acted = movie.id GROUP BY label +) +SELECT jsonb_build_object( + 'nodes', nodes_cte.nodes, + 'edges', COALESCE(edges_cte.edges, '[]'::jsonb), + 'nodeCounts', COALESCE((SELECT jsonb_agg(jsonb_build_object('label', label, 'count', count)) + FROM node_counts), '[]'::jsonb), + 'edgeCounts', COALESCE((SELECT jsonb_agg(jsonb_build_object('label', label, 'count', count)) + FROM edge_counts), '[]'::jsonb) +) AS result + WHERE p1.age < (SELECT AVG(p1.age) FROM Person p1 JOIN Movie movie ON p1.acted = movie.id) LIMIT 5000;`; expect(fixSQLSpaces(sql.query)).toBe(fixSQLSpaces(expectedSQL)); @@ -318,12 +560,82 @@ describe('query2SQL', () => { }; const sql = query2SQL(query); - const expectedSQL = `WITH (SELECT AVG(p1.age) FROM Person p1 - JOIN Movie movie ON p1.id = movie.id AND movie.relation = 'ACTED_IN') AS p1_age_avg - SELECT * FROM Person p1 - JOIN Movie movie ON p1.id = movie.id AND movie.relation = 'ACTED_IN' - JOIN Person p2 ON /* join condition assumed */ - WHERE p1.age < p1_age_avg + const expectedSQL = `WITH nodes_cte AS ( + SELECT jsonb_agg(node) AS nodes + FROM ( + -- Nodes from Person + SELECT jsonb_build_object( + '_id', concat('Person-', p1.id::text), + 'label', 'Person', + 'attributes', to_jsonb(p1) - 'id' + ) AS node + FROM Person p1 + UNION ALL + -- Nodes from Movie + SELECT jsonb_build_object( + '_id', concat('Movie-', movie.id::text), + 'label', 'Movie', + 'attributes', to_jsonb(movie) - 'id' + ) AS node + FROM Movie movie + UNION ALL + -- Nodes from Person + SELECT jsonb_build_object( + '_id', concat('Person-', p2.id::text), + 'label', 'Person', + 'attributes', to_jsonb(p2) - 'id' + ) AS node + FROM Person p2 + ) sub +), +edges_cte AS ( + SELECT jsonb_agg( + jsonb_build_object( + '_id', concat('Person-', p1.id::text, '-Movie-', movie.id::text), + 'label', 'acted.id', + 'attributes', '{}'::jsonb, + 'from', p1.id::text, + 'to', movie.id::text + ) + ) AS edges + FROM Person p1 + JOIN Movie movie ON p1.id = movie.id AND movie.relation = 'ACTED_IN' + UNION ALL + SELECT jsonb_agg( + jsonb_build_object( + '_id', concat('Person-', p2.id::text, '-Movie-', movie.id::text), + 'label', 'acted.id', + 'attributes', '{}'::jsonb, + 'from', p2.id::text, + 'to', movie.id::text + ) + ) AS edges + FROM Person p2 + JOIN Movie movie ON p2.id = movie.id AND movie.relation = 'ACTED_IN' +), +node_counts AS ( + SELECT label, COUNT(*) AS count + FROM ( + SELECT 'Person' AS label FROM Person + UNION ALL + SELECT 'Movie' AS label FROM Movie + ) t + GROUP BY label +), +edge_counts AS ( + SELECT 'acted.id' AS label, COUNT(*) AS count FROM Person p1 JOIN Movie movie ON p1.id = movie.id AND movie.relation = 'ACTED_IN' GROUP BY label + UNION ALL + SELECT 'acted.id' AS label, COUNT(*) AS count FROM Person p2 JOIN Movie movie ON p2.id = movie.id AND movie.relation = 'ACTED_IN' GROUP BY label +) +SELECT jsonb_build_object( + 'nodes', nodes_cte.nodes, + 'edges', COALESCE(edges_cte.edges, '[]'::jsonb), + 'nodeCounts', COALESCE((SELECT jsonb_agg(jsonb_build_object('label', label, 'count', count)) + FROM node_counts), '[]'::jsonb), + 'edgeCounts', COALESCE((SELECT jsonb_agg(jsonb_build_object('label', label, 'count', count)) + FROM edge_counts), '[]'::jsonb) +) AS result + WHERE p1.age < (SELECT AVG(p1.age) FROM Person p1 JOIN Movie movie ON p1.id = movie.id AND movie.relation = 'ACTED_IN') LIMIT 5000;`; expect(fixSQLSpaces(sql.query)).toBe(fixSQLSpaces(expectedSQL)); @@ -347,7 +659,39 @@ describe('query2SQL', () => { }; const sql = query2SQL(query); - const expectedSQL = `SELECT * FROM Person p1 + const expectedSQL = `WITH nodes_cte AS ( + SELECT jsonb_agg(node) AS nodes + FROM ( + -- Nodes from Person + SELECT jsonb_build_object( + '_id', concat('Person-', p1.id::text), + 'label', 'Person', + 'attributes', to_jsonb(p1) - 'id' + ) AS node + FROM Person p1 + ) sub +), +edges_cte AS ( + SELECT '[]'::jsonb AS edges +), +node_counts AS ( + SELECT label, COUNT(*) AS count + FROM ( + SELECT 'Person' AS label FROM Person + ) t + GROUP BY label +), +edge_counts AS ( + SELECT null AS label, 0 AS count +) +SELECT jsonb_build_object( + 'nodes', nodes_cte.nodes, + 'edges', COALESCE(edges_cte.edges, '[]'::jsonb), + 'nodeCounts', COALESCE((SELECT jsonb_agg(jsonb_build_object('label', label, 'count', count)) + FROM node_counts), '[]'::jsonb), + 'edgeCounts', COALESCE((SELECT jsonb_agg(jsonb_build_object('label', label, 'count', count)) + FROM edge_counts), '[]'::jsonb) +) AS result WHERE LOWER(p1.name) LIKE '%john%' LIMIT 5000;`; @@ -378,7 +722,39 @@ describe('query2SQL', () => { }; const sql = query2SQL(query); - const expectedSQL = `SELECT * FROM Employee id_1691576718400 + const expectedSQL = `WITH nodes_cte AS ( + SELECT jsonb_agg(node) AS nodes + FROM ( + -- Nodes from Employee + SELECT jsonb_build_object( + '_id', concat('Employee-', id_1691576718400.id::text), + 'label', 'Employee', + 'attributes', to_jsonb(id_1691576718400) - 'id' + ) AS node + FROM Employee id_1691576718400 + ) sub +), +edges_cte AS ( + SELECT '[]'::jsonb AS edges +), +node_counts AS ( + SELECT label, COUNT(*) AS count + FROM ( + SELECT 'Employee' AS label FROM Employee + ) t + GROUP BY label +), +edge_counts AS ( + SELECT null AS label, 0 AS count +) +SELECT jsonb_build_object( + 'nodes', nodes_cte.nodes, + 'edges', COALESCE(edges_cte.edges, '[]'::jsonb), + 'nodeCounts', COALESCE((SELECT jsonb_agg(jsonb_build_object('label', label, 'count', count)) + FROM node_counts), '[]'::jsonb), + 'edgeCounts', COALESCE((SELECT jsonb_agg(jsonb_build_object('label', label, 'count', count)) + FROM edge_counts), '[]'::jsonb) +) AS result WHERE id_1691576718400.title LIKE '%ale%' LIMIT 500;`; @@ -409,7 +785,39 @@ describe('query2SQL', () => { }; const sql = query2SQL(query); - const expectedSQL = `SELECT * FROM Employee id_1691576718400 + const expectedSQL = `WITH nodes_cte AS ( + SELECT jsonb_agg(node) AS nodes + FROM ( + -- Nodes from Employee + SELECT jsonb_build_object( + '_id', concat('Employee-', id_1691576718400.id::text), + 'label', 'Employee', + 'attributes', to_jsonb(id_1691576718400) - 'id' + ) AS node + FROM Employee id_1691576718400 + ) sub +), +edges_cte AS ( + SELECT '[]'::jsonb AS edges +), +node_counts AS ( + SELECT label, COUNT(*) AS count + FROM ( + SELECT 'Employee' AS label FROM Employee + ) t + GROUP BY label +), +edge_counts AS ( + SELECT null AS label, 0 AS count +) +SELECT jsonb_build_object( + 'nodes', nodes_cte.nodes, + 'edges', COALESCE(edges_cte.edges, '[]'::jsonb), + 'nodeCounts', COALESCE((SELECT jsonb_agg(jsonb_build_object('label', label, 'count', count)) + FROM node_counts), '[]'::jsonb), + 'edgeCounts', COALESCE((SELECT jsonb_agg(jsonb_build_object('label', label, 'count', count)) + FROM edge_counts), '[]'::jsonb) +) AS result WHERE id_1691576718400.title LIKE '%ale%' LIMIT 500;`; @@ -439,8 +847,40 @@ describe('query2SQL', () => { }; const sql = query2SQL(query); - const expectedSQL = `SELECT * FROM CONTAINS - WHERE unitPrice < 10 + const expectedSQL = `WITH nodes_cte AS ( + SELECT jsonb_agg(node) AS nodes + FROM ( + -- Nodes from CONTAINS + SELECT jsonb_build_object( + '_id', concat('CONTAINS-', id_1698231933579.id::text), + 'label', 'CONTAINS', + 'attributes', to_jsonb(id_1698231933579) - 'id' + ) AS node + FROM CONTAINS id_1698231933579 + ) sub +), +edges_cte AS ( + SELECT '[]'::jsonb AS edges +), +node_counts AS ( + SELECT label, COUNT(*) AS count + FROM ( + SELECT 'CONTAINS' AS label FROM CONTAINS + ) t + GROUP BY label +), +edge_counts AS ( + SELECT null AS label, 0 AS count +) +SELECT jsonb_build_object( + 'nodes', nodes_cte.nodes, + 'edges', COALESCE(edges_cte.edges, '[]'::jsonb), + 'nodeCounts', COALESCE((SELECT jsonb_agg(jsonb_build_object('label', label, 'count', count)) + FROM node_counts), '[]'::jsonb), + 'edgeCounts', COALESCE((SELECT jsonb_agg(jsonb_build_object('label', label, 'count', count)) + FROM edge_counts), '[]'::jsonb) +) AS result + WHERE id_1698231933579.unitPrice < 10 LIMIT 500;`; expect(fixSQLSpaces(sql.query)).toBe(fixSQLSpaces(expectedSQL)); @@ -473,11 +913,60 @@ describe('query2SQL', () => { }; const sql = query2SQL(query); - const expectedSQL = `WITH (SELECT COUNT(p1.id) FROM Person p1 - JOIN Movie m1 ON p1.id = m1.id AND m1.relation = 'DIRECTED') AS p1_count - SELECT * FROM Person p1 - JOIN Movie m1 ON p1.id = m1.id AND m1.relation = 'DIRECTED' - WHERE p1_count > 1 + const expectedSQL = `WITH nodes_cte AS ( + SELECT jsonb_agg(node) AS nodes + FROM ( + -- Nodes from Person + SELECT jsonb_build_object( + '_id', concat('Person-', p1.id::text), + 'label', 'Person', + 'attributes', to_jsonb(p1) - 'id' + ) AS node + FROM Person p1 + UNION ALL + -- Nodes from Movie + SELECT jsonb_build_object( + '_id', concat('Movie-', m1.id::text), + 'label', 'Movie', + 'attributes', to_jsonb(m1) - 'id' + ) AS node + FROM Movie m1 + ) sub +), +edges_cte AS ( + SELECT jsonb_agg( + jsonb_build_object( + '_id', concat('Person-', p1.id::text, '-Movie-', m1.id::text), + 'label', 'DIRECTED', + 'attributes', '{}'::jsonb, + 'from', p1.id::text, + 'to', m1.id::text + ) + ) AS edges + FROM Person p1 + JOIN Movie m1 ON p1.id = m1.id AND m1.relation = 'DIRECTED' +), +node_counts AS ( + SELECT label, COUNT(*) AS count + FROM ( + SELECT 'Person' AS label FROM Person + UNION ALL + SELECT 'Movie' AS label FROM Movie + ) t + GROUP BY label +), +edge_counts AS ( + SELECT 'DIRECTED' AS label, COUNT(*) AS count FROM Person p1 JOIN Movie m1 ON p1.id = m1.id AND m1.relation = 'DIRECTED' GROUP BY label +) +SELECT jsonb_build_object( + 'nodes', nodes_cte.nodes, + 'edges', COALESCE(edges_cte.edges, '[]'::jsonb), + 'nodeCounts', COALESCE((SELECT jsonb_agg(jsonb_build_object('label', label, 'count', count)) + FROM node_counts), '[]'::jsonb), + 'edgeCounts', COALESCE((SELECT jsonb_agg(jsonb_build_object('label', label, 'count', count)) + FROM edge_counts), '[]'::jsonb) +) AS result + WHERE (SELECT COUNT(p1.id) FROM Person p1 JOIN Movie m1 ON p1.id = m1.id AND m1.relation = 'DIRECTED') > 1 LIMIT 5000;`; expect(fixSQLSpaces(sql.query)).toBe(fixSQLSpaces(expectedSQL)); @@ -505,7 +994,39 @@ describe('query2SQL', () => { }; const sql = query2SQL(query); - const expectedSQL = `SELECT * FROM Movie id_1730483610947 + const expectedSQL = `WITH nodes_cte AS ( + SELECT jsonb_agg(node) AS nodes + FROM ( + -- Nodes from Movie + SELECT jsonb_build_object( + '_id', concat('Movie-', id_1730483610947.id::text), + 'label', 'Movie', + 'attributes', to_jsonb(id_1730483610947) - 'id' + ) AS node + FROM Movie id_1730483610947 + ) sub +), +edges_cte AS ( + SELECT '[]'::jsonb AS edges +), +node_counts AS ( + SELECT label, COUNT(*) AS count + FROM ( + SELECT 'Movie' AS label FROM Movie + ) t + GROUP BY label +), +edge_counts AS ( + SELECT null AS label, 0 AS count +) +SELECT jsonb_build_object( + 'nodes', nodes_cte.nodes, + 'edges', COALESCE(edges_cte.edges, '[]'::jsonb), + 'nodeCounts', COALESCE((SELECT jsonb_agg(jsonb_build_object('label', label, 'count', count)) + FROM node_counts), '[]'::jsonb), + 'edgeCounts', COALESCE((SELECT jsonb_agg(jsonb_build_object('label', label, 'count', count)) + FROM edge_counts), '[]'::jsonb) +) AS result LIMIT 500;`; expect(fixSQLSpaces(sql.query)).toBe(fixSQLSpaces(expectedSQL)); @@ -529,7 +1050,39 @@ describe('query2SQL', () => { }; const sql = query2SQL(query); - const expectedSQL = `SELECT * FROM Character id_1731428699410 + const expectedSQL = `WITH nodes_cte AS ( + SELECT jsonb_agg(node) AS nodes + FROM ( + -- Nodes from Character + SELECT jsonb_build_object( + '_id', concat('Character-', id_1731428699410.id::text), + 'label', 'Character', + 'attributes', to_jsonb(id_1731428699410) - 'id' + ) AS node + FROM Character id_1731428699410 + ) sub +), +edges_cte AS ( + SELECT '[]'::jsonb AS edges +), +node_counts AS ( + SELECT label, COUNT(*) AS count + FROM ( + SELECT 'Character' AS label FROM Character + ) t + GROUP BY label +), +edge_counts AS ( + SELECT null AS label, 0 AS count +) +SELECT jsonb_build_object( + 'nodes', nodes_cte.nodes, + 'edges', COALESCE(edges_cte.edges, '[]'::jsonb), + 'nodeCounts', COALESCE((SELECT jsonb_agg(jsonb_build_object('label', label, 'count', count)) + FROM node_counts), '[]'::jsonb), + 'edgeCounts', COALESCE((SELECT jsonb_agg(jsonb_build_object('label', label, 'count', count)) + FROM edge_counts), '[]'::jsonb) +) AS result WHERE UPPER(id_1731428699410.name) IS NOT NULL LIMIT 500;`; diff --git a/src/queryExecution/sql/queryConverterSql.ts b/src/queryExecution/sql/queryConverterSql.ts index 225b7d1..9dacea3 100644 --- a/src/queryExecution/sql/queryConverterSql.ts +++ b/src/queryExecution/sql/queryConverterSql.ts @@ -1,233 +1,131 @@ -import type { BackendQueryFormat } from 'ts-common'; +import type { BackendQueryFormat, NodeStruct } from 'ts-common'; import type { QueryText } from '../model'; type Logic = any; -export function query2SQL(query: BackendQueryFormat): QueryText { - const { saveStateID, return: returnFields, query: paths, limit, logic } = query; - - let selectFields = '*'; - if (returnFields && returnFields.length > 0) { - selectFields = returnFields.join(', '); - } - - let sqlQuery = `SELECT ${selectFields} FROM `; - let countQuery = `SELECT `; - const joins: string[] = []; - const whereConditions: string[] = []; - const countFields: string[] = []; - let baseTable = ''; - let baseAlias = ''; - - if (paths && paths.length > 0) { - paths.forEach((path, index) => { - if (path.node) { - const { label, id, relation } = path.node; - const tableName = label || id || 'unknown'; - // Set tableAlias to id if no label exists, else use id first if present. - const tableAlias = label ? id || label : id || `table${index}`; - - if (index === 0) { - // If no label, output only one token. - sqlQuery = `SELECT ${selectFields} FROM ${tableName === tableAlias ? tableName : tableName + ' ' + tableAlias}`; - baseTable = tableName; - baseAlias = tableName === tableAlias ? '' : tableAlias; - // Push count for base node. - countFields.push(`COUNT(${baseAlias || baseTable}.id) as ${baseAlias || baseTable}_count`); - } - - if (relation) { - // Use relation.label if provided; otherwise, use relation.id. - const relString = relation.label || relation.id; - if (relString) { - const relatedNode = relation.node; - if (relatedNode) { - const relatedTableAlias = relatedNode.id || relatedNode.label; - if (relation.direction === 'TO') { - if (relString.indexOf('.') > -1) { - const parts = relString.split('.'); - const baseField = parts[0]; - const relatedField = parts[1]; - joins.push( - `JOIN ${relatedNode.label} ${relatedTableAlias} ON ${tableAlias}.${baseField} = ${relatedTableAlias}.${relatedField}`, - ); - } else { - joins.push( - `JOIN ${relatedNode.label} ${relatedTableAlias} ON ${tableAlias}.id = ${relatedTableAlias}.id AND ${relatedTableAlias}.relation = '${relString}'`, - ); - } - // Push count for the related node. - countFields.push(`COUNT(${relatedTableAlias}.id) as ${relatedTableAlias}_count`); - } - } - } - } - // Do not push duplicate count for base node for subsequent paths. - } else { - sqlQuery = `SELECT * FROM ${path.id}`; - } - }); - } - - // Special handling for average calculation logic. - if (logic && Array.isArray(logic) && logic[0] === '<' && Array.isArray(logic[2]) && logic[2][0] === 'Avg') { - // Extract field from left operand: e.g. '@p1.age' -> 'p1.age' - const leftOperand = logic[1]; - const field = String(leftOperand).startsWith('@') ? String(leftOperand).substring(1) : leftOperand; - const parts = String(field).split('.'); - const alias = `${parts[0]}_${parts[1]}_avg`; - // Build WITH clause using the base table and joins. - const withClause = `WITH (SELECT AVG(${field}) FROM ${baseTable} ${baseAlias || baseTable}${ - joins.length > 0 ? ' ' + joins.join(' ') : '' - }) AS ${alias}`; - // Prepend the WITH clause to the main query and append join clauses to main query. - sqlQuery = withClause + ' ' + sqlQuery + (joins.length > 0 ? ' ' + joins.join(' ') : ''); - sqlQuery += ` WHERE ${field} < ${alias}`; - if (limit) { - sqlQuery += ` LIMIT ${limit}`; - } - return { query: sqlQuery + ';', countQuery: '' }; - } - - if (logic) { - const whereClause = translateLogicToSQL(logic); - if (whereClause) { - whereConditions.push(whereClause); - } +function cleanStringOfNewLinesAndTabs(str: string): string { + if (!str) { + return ''; } + let trimmedSQL = str.replace(/\n/g, ' '); + trimmedSQL = trimmedSQL.replaceAll(/ {2,50}/g, ' '); + trimmedSQL = trimmedSQL.replace(/\t+/g, ''); + return trimmedSQL.trim(); +} - if (joins.length > 0) { - sqlQuery += ` ${joins.join(' ')}`; +function buildNodeCTE(node: NodeStruct | undefined): string[] { + if (!node || !node.label || !node.id) { + return []; } + const nodeParts: string[] = []; + const mainNode = ` + SELECT jsonb_build_object( + '_id', concat('${node.label}-', ${node.id}.id::text), + 'label', '${node.label}', + 'attributes', to_jsonb(${node.id}) - 'id' + ) AS node + FROM ${node.label} ${node.id}`; + nodeParts.push(cleanStringOfNewLinesAndTabs(mainNode)); + buildNodeCTE(node.relation?.node).forEach(part => nodeParts.push(part)); + return nodeParts; +} - if (whereConditions.length > 0) { - sqlQuery += ` WHERE ${whereConditions.join(' AND ').replace(/@/g, '').replace(/"/g, "'")}`; +function buildEdgesCTE(node: NodeStruct | undefined): string[] { + if (!node || !node.relation || !node.relation.node || !node.id || !node.relation.node.id || !node.relation.label) { + return []; } - if (limit) { - sqlQuery += ` LIMIT ${limit}`; - } + const edge = ` + SELECT jsonb_agg( + jsonb_build_object( + '_id', concat('${node.label}-', ${node.id}.id::text, '-${node.relation.node.label}-', ${node.relation.node.id}.id::text), + 'label', '${node.relation.label}', + 'attributes', '{}'::jsonb, + 'from', ${node.id}.id::text, + 'to', ${node.relation.node.id}.id::text + ) + ) AS edges + FROM ${node.label} ${node.id} + JOIN ${node.relation.node.label} ${node.relation.node.id} ON ${node.id}.${node.relation.label.split('.')[0]} = ${node.relation.node.id}.${ + node.relation.label.split('.')[1] + }`; + const ret = cleanStringOfNewLinesAndTabs(edge); + return [ret, ...buildEdgesCTE(node.relation.node)]; +} - if (countFields.length > 0) { - countQuery += countFields.join(', '); - countQuery += ` FROM ${baseTable} ${baseAlias}`; - // Append join clauses for count query - if (joins.length > 0) { - countQuery += ` ${joins.join(' ')}`; - } - return { query: sqlQuery + ';', countQuery: countQuery + ';' }; +function buildNodeCountsCTE(node: NodeStruct | undefined): string[] { + if (!node || !node.label || !node.id) { + return []; } - - return { query: sqlQuery, countQuery: '' }; + const nodeParts: string[] = []; + const mainNode = `SELECT '${node.label}' AS label FROM ${node.label}`; + nodeParts.push(cleanStringOfNewLinesAndTabs(mainNode)); + buildNodeCountsCTE(node.relation?.node).forEach(part => nodeParts.push(part)); + return nodeParts; } -function translateLogicToSQL(logic: Logic): string | null { - if (typeof logic === 'string' || typeof logic === 'number' || typeof logic === 'boolean') { - return String(logic); +function buildEdgeCountsCTE(node: NodeStruct | undefined): string[] { + if (!node || !node.relation || !node.relation.node || !node.id || !node.relation.node.id || !node.relation.label) { + return []; } - if (Array.isArray(logic)) { - const operator = logic[0]; - - switch (operator) { - case 'And': { - const operands = logic.slice(1) as Logic[]; - const sqlOperands = operands.map(translateLogicToSQL).filter(Boolean); - return sqlOperands.length > 0 ? `(${sqlOperands.join(' AND ')})` : null; - } - case 'Or': { - const operands = logic.slice(1) as Logic[]; - const sqlOperands = operands.map(translateLogicToSQL).filter(Boolean); - return sqlOperands.length > 0 ? `(${sqlOperands.join(' OR ')})` : null; - } - case 'Not': { - const operand = logic[1] as Logic; - const sqlOperand = translateLogicToSQL(operand); - return sqlOperand ? `NOT (${sqlOperand})` : null; - } - case '==': { - const left = translateLogicToSQL(logic[1] as Logic); - const right = translateLogicToSQL(logic[2] as Logic); - return left && right ? `${left} = ${right}` : null; - } - case '!=': { - const left = translateLogicToSQL(logic[1] as Logic); - const right = translateLogicToSQL(logic[2] as Logic); - return left && right ? `${left} <> ${right}` : null; - } - case '<': { - const left = translateLogicToSQL(logic[1] as Logic); - const right = translateLogicToSQL(logic[2] as Logic); - return left && right ? `${left} < ${right}` : null; - } - case '>': { - const left = translateLogicToSQL(logic[1] as Logic); - const right = translateLogicToSQL(logic[2] as Logic); - return left && right ? `${left} > ${right}` : null; - } - case '<=': { - const left = translateLogicToSQL(logic[1] as Logic); - const right = translateLogicToSQL(logic[2] as Logic); - return left && right ? `${left} <= ${right}` : null; - } - case '>=': { - const left = translateLogicToSQL(logic[1] as Logic); - const right = translateLogicToSQL(logic[2] as Logic); - return left && right ? `${left} >= ${right}` : null; - } - case '+': { - const left = translateLogicToSQL(logic[1] as Logic); - const right = translateLogicToSQL(logic[2] as Logic); - return left && right ? `${left} + ${right}` : null; - } - case '-': { - const left = translateLogicToSQL(logic[1] as Logic); - const right = translateLogicToSQL(logic[2] as Logic); - return left && right ? `(${left} - ${right})` : null; - } - case '*': { - const left = translateLogicToSQL(logic[1] as Logic); - const right = translateLogicToSQL(logic[2] as Logic); - return left && right ? `${left} * ${right}` : null; - } - case '/': { - const left = translateLogicToSQL(logic[1] as Logic); - const right = translateLogicToSQL(logic[2] as Logic); - return left && right ? `${left} / ${right}` : null; - } - case 'Like': { - const left = translateLogicToSQL(logic[1] as Logic); - const right = translateLogicToSQL(logic[2] as Logic); - return left && right ? `${left} LIKE '%${right.replace(/"/g, '')}%'` : null; - } - case 'Lower': { - const operand = logic[1] as Logic; - const sqlOperand = translateLogicToSQL(operand); - return sqlOperand ? `LOWER(${sqlOperand})` : null; - } - case 'Upper': { - const operand = logic[1] as Logic; - const sqlOperand = translateLogicToSQL(operand); - return sqlOperand ? `UPPER(${sqlOperand}) IS NOT NULL` : null; - } - case 'Avg': { - const operand = logic[1] as Logic; - const sqlOperand = translateLogicToSQL(operand); - return sqlOperand ? `AVG(${sqlOperand})` : null; - } - case 'Count': { - const operand = logic[1] as Logic; - const sqlOperand = translateLogicToSQL(operand); - return sqlOperand ? `COUNT(${sqlOperand})` : null; - } - default: - return null; - } - } + const edge = ` + SELECT '${node.relation.label}' AS label, COUNT(*) AS count + FROM ${node.label} ${node.id} + JOIN ${node.relation.node.label} ${node.relation.node.id} + ON ${node.id}.${node.relation.label.split('.')[0]} = ${node.relation.node.id}.${node.relation.label.split('.')[1]} + GROUP BY label`; + const ret = cleanStringOfNewLinesAndTabs(edge); + return [ret, ...buildEdgeCountsCTE(node.relation.node)]; +} - if (typeof logic === 'string') { - return logic.startsWith('@') ? logic.substring(1).replace('.', '.') : `'${logic}'`; +function buildWhereClause(logic: Logic): string { + if (Array.isArray(logic) && logic.length === 3) { + const [operator, field, value] = logic; + const tableName = field.split('.')[0]; + const columnName = field.split('.')[1]; + return `${tableName}.${columnName} ${operator} ${value}`; } + return ''; +} - return null; +export function query2SQL(query: BackendQueryFormat): QueryText { + const nodesCTE = [...new Set(query.query.flatMap(path => buildNodeCTE(path.node)))]; + const edgesCTE = [...new Set(query.query.flatMap(path => buildEdgesCTE(path.node)))]; + const nodeCountsCTE = [...new Set(query.query.flatMap(path => buildNodeCountsCTE(path.node)))]; + const edgeCountsCTE = [...new Set(query.query.flatMap(path => buildEdgeCountsCTE(path.node)))]; + const whereClause = buildWhereClause(query.logic); + + return { + query: `WITH nodes_cte AS ( + SELECT jsonb_agg(node) AS nodes + FROM ( + ${nodesCTE.join(' UNION ALL ')} + ) sub + ), + edges_cte AS ( + ${edgesCTE.join(' UNION ALL ')} + ), + node_counts AS ( + SELECT label, COUNT(*) AS count + FROM ( + ${nodeCountsCTE.join(' UNION ALL ')} + ) t + GROUP BY label + ), + edge_counts AS ( + ${edgeCountsCTE.join(' UNION ALL ')} + ) + SELECT jsonb_build_object( + 'nodes', nodes_cte.nodes, + 'edges', COALESCE(edges_cte.edges, '[]'::jsonb), + 'nodeCounts', COALESCE((SELECT jsonb_agg(jsonb_build_object('label', label, 'count', count)) + FROM node_counts), '[]'::jsonb), + 'edgeCounts', COALESCE((SELECT jsonb_agg(jsonb_build_object('label', label, 'count', count)) + FROM edge_counts), '[]'::jsonb) + ) AS result FROM nodes_cte, edges_cte + ${whereClause ? `WHERE ${whereClause}` : ''} + ${query.limit ? `LIMIT ${query.limit}` : ''};`, + countQuery: '', + }; } diff --git a/src/readers/queryService.ts b/src/readers/queryService.ts index 46063bf..f58c3ca 100644 --- a/src/readers/queryService.ts +++ b/src/readers/queryService.ts @@ -3,7 +3,6 @@ import { type DbConnection, type QueryRequest } from 'ts-common'; import { rabbitMq, ums, type QueryExecutionTypes } from '../variables'; import { log } from '../logger'; import { QueryPublisher } from '../queryExecution/queryPublisher'; -import { query2Cypher } from '../queryExecution/cypher/converter'; import { formatTimeDifference } from 'ts-common/src/logger/logger'; import { Query2BackendQuery } from '../queryExecution/reactflow/query2backend'; -- GitLab