package cypher import ( "errors" "fmt" "log" "strings" "git.science.uu.nl/graphpolaris/query-conversion/entity" ) // ConvertQuery takes the json from the visual query builder and converts it into Cypher func (s *Service) ConvertQuery(totalJSONQuery *entity.IncomingQueryJSON) (*string, error) { var finalCypher *string queryJSON := totalJSONQuery // If you want to query the other cluster as well, remove the underscores query, _, _ := checkForQueryCluster(queryJSON) if query == nil { return nil, errors.New("Invalid query") } ok, err := checkNoDeadEnds(query) if !ok { return nil, err } finalCypher, err = createCypher(query) if err != nil { return nil, err } return finalCypher, nil } // createCypher translates a cluster of nodes (query) to Cypher func createCypher(JSONQuery *entity.IncomingQueryJSON) (*string, error) { // create the hierarchy from the cluster hierarchy, err := createQueryHierarchy(JSONQuery) if err != nil { return nil, err } // translate it to cypher in the right order, using the hierarchy cypher, err := formQuery(JSONQuery, hierarchy) if err != nil { return nil, errors.New("Creation of query Cypher failed") } // create the return statement returnStatement, err := createReturnStatement(JSONQuery, hierarchy) if err != nil { return nil, errors.New("Creation of return Cypher failed") } finalCypher := *cypher + *returnStatement return &finalCypher, nil } // createReturnStatement creates the final return statement func createReturnStatement(JSONQuery *entity.IncomingQueryJSON, parts entity.Query) (*string, error) { var retStatement string var retType string // This is a marker attached to the end, for ease of parsing in the executor // First check to see if the return is a table (due to a groupby at the end) or if it is nodelink data numOfParts := len(parts) if numOfParts == 0 { return nil, errors.New("No parts found in return statement") } if parts[numOfParts-1].QType == "groupBy" { // Return is a table groupBy := JSONQuery.FindG(parts[numOfParts-1].QID) gName := fmt.Sprintf("%v_%v", groupBy.AppliedModifier, groupBy.GroupAttribute) by := fmt.Sprintf("%v%v.%v", string(groupBy.ByType[0]), groupBy.ByID, groupBy.ByAttribute) byName := strings.Replace(by, ".", "_", 1) retStatement = fmt.Sprintf("RETURN %v, %v", byName, gName) retType = ";table" } else { // Return is nodelink // Loop through the parts of the query from back to front retStatement = "RETURN " lineStart := "" for i := numOfParts - 1; i >= 0; i-- { part := parts[i] if part.QType == "relation" { rel := JSONQuery.FindR(part.QID) retStatement += fmt.Sprintf("%v r%v", lineStart, rel.ID) lineStart = "," if rel.FromID != -1 { if rel.FromType == "entity" { retStatement += fmt.Sprintf("%v e%v", lineStart, rel.FromID) } else { id := JSONQuery.FindG(rel.FromID).ByID retStatement += fmt.Sprintf("%v eg%v", lineStart, id) } } if rel.ToID != -1 { if rel.ToType == "entity" { retStatement += fmt.Sprintf("%v e%v", lineStart, rel.ToID) } else { id := JSONQuery.FindG(rel.ToID).ByID retStatement += fmt.Sprintf("%v eg%v", lineStart, id) } } } else if part.QType == "entity" { retStatement += fmt.Sprintf("%v e%v", lineStart, part.QID) break // Probably ends with a break, since a single entity is always connected via an IN to a groupby? (maybe not in case of ONLY having an entity as the entire query) } else { // Then it is a groupby which must not be returned, thus the returns are done. break } } retType = ";nodelink" } retStatement = retStatement + "\n" + fmt.Sprintf("LIMIT %v", JSONQuery.Limit) + retType return &retStatement, nil } // createQueryHierarchy finds out what depends on what, then uses topological sort to create a hierarchy func createQueryHierarchy(JSONQuery *entity.IncomingQueryJSON) (entity.Query, error) { var parts entity.Query IDctr := 0 // Add relations all to query parts for _, rel := range JSONQuery.Relations { part := entity.QueryPart{ QType: "relation", QID: rel.ID, PartID: IDctr, Dependencies: make([]int, 0), } parts = append(parts, part) IDctr++ } // Add the Groupby's for _, gb := range JSONQuery.GroupBys { part := entity.QueryPart{ QType: "groupBy", QID: gb.ID, PartID: IDctr, Dependencies: make([]int, 0), } parts = append(parts, part) IDctr++ } // Add the entities, if they have an IN, otherwise they are not important for _, ent := range JSONQuery.Entities { skip := true for _, con := range ent.Constraints { if con.InID != -1 { skip = false } } if skip { continue } part := entity.QueryPart{ QType: "entity", QID: ent.ID, PartID: IDctr, Dependencies: make([]int, 0), } parts = append(parts, part) IDctr++ } // Check dependencies in a nice O(n^2) for _, rel := range JSONQuery.Relations { if rel.FromID == -1 { continue } // Check the dependencies From - To for _, rela := range JSONQuery.Relations { if rela.ToID == -1 { continue } if rel.FromID == rela.ToID && rel.FromType == rela.ToType { part := parts.Find(rel.ID, "relation") part.Dependencies = append(part.Dependencies, parts.Find(rela.ID, "relation").PartID) } } if rel.ToID == -1 { continue } // Now for connections to group by's it doesnt matter if the GB is attached to the from or the to // The GB always has priority for _, gb := range JSONQuery.GroupBys { if (rel.FromID == gb.ID && rel.FromType == "groupBy") || (rel.ToID == gb.ID && rel.ToType == "groupBy") { part := parts.Find(rel.ID, "relation") gbID := parts.Find(gb.ID, "groupBy").PartID part.Dependencies = append(part.Dependencies, gbID) } } } // Same trick for group by's for _, gb := range JSONQuery.GroupBys { for _, rela := range JSONQuery.Relations { // Check if the gb is connected to the relation if (gb.ByID == rela.ID && gb.ByType == "relation") || // Is the By connected to a relation (gb.GroupID == rela.ID && gb.GroupType == "relation") || // is the Group connected to a relation (gb.ByID == rela.FromID && gb.ByType == rela.FromType) || // Is the by connected to an entity connected to the "From" of a relation (gb.ByID == rela.ToID && gb.ByType == rela.ToType) || // Is the by connected to an entity connected to the "To" of a relation (gb.GroupID == rela.FromID && gb.GroupType == rela.FromType) || // Is the group connected to an entity connected to the "From" of arelation (gb.GroupID == rela.ToID && gb.GroupType == rela.ToType) { // Is the group connected to an entity connected to the "To" of a relation part := parts.Find(gb.ID, "groupBy") part.Dependencies = append(part.Dependencies, parts.Find(rela.ID, "relation").PartID) } } // Not sure if this is even possible, but hey who knows // Check to see if the gb is connected to another gb for _, grb := range JSONQuery.GroupBys { if gb.ID == grb.ID { continue } if (gb.GroupID == grb.ID && gb.GroupType == "groupBy") || (gb.ByID == grb.ID && gb.ByType == "groupBy") { part := parts.Find(gb.ID, "groupBy") part.Dependencies = append(part.Dependencies, parts.Find(grb.ID, "groupBy").PartID) } } } for _, ent := range JSONQuery.Entities { for _, con := range ent.Constraints { if con.InID != -1 { part := parts.Find(ent.ID, "entity") // Should always be groupBy part.Dependencies = append(part.Dependencies, parts.Find(con.InID, con.InType).PartID) } } } // Here comes a checker for (A)-->(B) and (B)-->(A). This is mitigated partly by ignoring it // Lets call it a small cycle. It wont catch bigger cycles (with 3 nodes for example) for _, p := range parts { // We only allow small cycles with relations if p.QType != "relation" { continue } for _, dep := range p.Dependencies { other := parts.SelectByID(dep) if other.QType != "relation" { continue } // Deleting from a slice while looping through it is an easy way to make mistakes, hence the workaround cycle := false toRemove := -1 for i, otherDep := range other.Dependencies { if otherDep == p.PartID { // Small cycle detected cycle = true toRemove = i } } // Remove one of the two dependencies, does not really matter which, cypher knits it back together due to the query // using the same ID's, thus making it a cycle again later on. if cycle { log.Println("Cycle detected and removed") if len(other.Dependencies) == 0 { other.Dependencies = make([]int, 0) } else { other.Dependencies[toRemove] = other.Dependencies[len(other.Dependencies)-1] other.Dependencies = other.Dependencies[:len(other.Dependencies)-1] } } } } // Now we have a directed graph, meaning we can use some topological sort (Kahn's algorithm) var sortedQuery entity.Query incomingEdges := make(map[int]int) // Set all to 0 for _, p := range parts { incomingEdges[p.PartID] = 0 } // Count the incoming edges (dependencies) for _, p := range parts { for _, dp := range p.Dependencies { incomingEdges[dp]++ } } for { // While there is a someone where incomingEdges[someone] == 0 part := entity.QueryPart{PartID: -1} // Select a node with no incoming edges for ID, edges := range incomingEdges { if edges == 0 { part = *parts.SelectByID(ID) } } // Check to see if there are parts withouth incoming edges left if part.PartID == -1 { break } // Remove it from the set incomingEdges[part.PartID] = -1 sortedQuery = append(sortedQuery, part) // Decrease incoming edges of other parts for _, ID := range part.Dependencies { incomingEdges[ID]-- } } // Now check for cycles in the graph partRemaining := false for _, edges := range incomingEdges { if edges != -1 { partRemaining = true } } if partRemaining { // Somehow there was a cycle in the query, return nil, errors.New("Cyclic query detected") } // Reverse the list retQuery := make([]entity.QueryPart, len(sortedQuery)) for i := 0; i < len(sortedQuery); i++ { retQuery[i] = sortedQuery[len(sortedQuery)-i-1] } return retQuery, nil } // formQuery uses the hierarchy to create cypher for each part of the query in the right order func formQuery(JSONQuery *entity.IncomingQueryJSON, hierarchy entity.Query) (*string, error) { // Traverse through the hierarchy and for every entry create a part like: // Match p0 = (l:Lorem)-[:Ipsum*1..1]-(d:Dolor) // Constraints on l and d // Unwind relationships(p0) as r0 // Constraints on r0 // With * totalQuery := "" for _, entry := range hierarchy { var cypher *string var err error switch entry.QType { case "relation": cypher, err = createRelationCypher(JSONQuery, entry) if err != nil { return nil, err } break case "groupBy": cypher, err = createGroupByCypher(JSONQuery, entry) if err != nil { return nil, err } break case "entity": // This would be in case of an IN or if there was only 1 entity in the query builder cypher, err = createInCypher(JSONQuery, entry) if err != nil { return nil, err } break default: // Should never be reached return nil, errors.New("Invalid query pill type detected") } totalQuery += *cypher } return &totalQuery, nil } // createInCypher creates the cypher for an entity with an IN-clause func createInCypher(JSONQuery *entity.IncomingQueryJSON, part entity.QueryPart) (*string, error) { ent := JSONQuery.FindE(part.QID) eName := fmt.Sprintf("e%v", ent.ID) match := fmt.Sprintf("MATCH (%v:%v)\n", eName, ent.Name) eConstraints := "" newLineStatement := "\tWHERE" // Find the IN for _, con := range ent.Constraints { if con.InID != -1 { gby := JSONQuery.FindG(con.InID) // Because this could only be on a groupby byName := fmt.Sprintf("%v%v", string(gby.ByType[0]), gby.ByID) eConstraints += fmt.Sprintf("%v %v.%v IN %v_%v\n", newLineStatement, eName, con.Attribute, byName, gby.ByAttribute) newLineStatement = "\tAND" } } // Attach other constraints (if any) for _, v := range ent.Constraints { if v.InID != -1 { continue } eConstraints += fmt.Sprintf("%v %v \n", newLineStatement, *createConstraintBoolExpression(&v, eName, false)) } with := "WITH *\n" retStatement := match + eConstraints + with return &retStatement, nil } // createRelationCypher takes the json and a query part, finds the necessary entities and converts it into cypher func createRelationCypher(JSONQuery *entity.IncomingQueryJSON, part entity.QueryPart) (*string, error) { rel := JSONQuery.FindR(part.QID) if (rel.FromID == -1) && (rel.ToID == -1) { // Now there is only a relation, which we do not allow return nil, errors.New("Relation only queries are not supported") } var match, eConstraints, unwind, rConstraints string // There is some duplicate code here below that could be omitted with extra if-statements, but that is something to do // for a later time. Since this way it is easier to understand the flow of the code // Removing the duplicate code here, probably more than triples the if-statements and is a puzzle for a later time (TODO) if rel.ToID == -1 { // There is no To, only a From var eName string var ent *entity.QueryEntityStruct if rel.FromType == "entity" { ent = JSONQuery.FindE(rel.ToID) eName = fmt.Sprintf("e%v", ent.ID) } else if rel.FromType == "groupBy" { gb := JSONQuery.FindG(rel.FromID) if gb.ByType == "relation" { return nil, errors.New("Invalid query: cannot connect a relation to a group by that groups by another relation") } ent = JSONQuery.FindE(gb.ByID) // This is a sort of dummy variable, since it is not directly visible in the query, but it is definitely needed eName = fmt.Sprintf("e%v", ent.ID) } else { // Should never be reachable return nil, errors.New("Invalid connection type to relation") } match = fmt.Sprintf("MATCH p%v = (%v:%v)-[:%v*%v..%v]-()\n", part.PartID, eName, ent.Name, rel.Name, rel.Depth.Min, rel.Depth.Max) eConstraints = "" newLineStatement := "\tWHERE" for _, v := range ent.Constraints { eConstraints += fmt.Sprintf("%v %v \n", newLineStatement, *createConstraintBoolExpression(&v, eName, false)) newLineStatement = "\tAND" } // Add an IN clause, connecting the relation to the output of the groupby if rel.FromType == "groupBy" { gb := JSONQuery.FindG(rel.FromID) inConstraint := fmt.Sprintf("%v %v.%v IN %v_%v \n", newLineStatement, eName, gb.ByAttribute, gb.AppliedModifier, gb.ByAttribute) eConstraints += inConstraint } } else if rel.FromID == -1 { var eName string var ent *entity.QueryEntityStruct if rel.ToType == "entity" { ent = JSONQuery.FindE(rel.ToID) eName = fmt.Sprintf("e%v", ent.ID) } else if rel.ToType == "groupBy" { gb := JSONQuery.FindG(rel.ToID) if gb.ByType == "relation" { return nil, errors.New("Invalid query: cannot connect a relation to a group by that groups by another relation") } ent = JSONQuery.FindE(gb.ByID) // This is a sort of dummy variable, since it is not directly visible in the query, but it is definitely needed eName = fmt.Sprintf("e%v", ent.ID) } else { // Should never be reachable return nil, errors.New("Invalid connection type to relation") } match = fmt.Sprintf("MATCH p%v = ()-[:%v*%v..%v]-(%v:%v)\n", part.PartID, rel.Name, rel.Depth.Min, rel.Depth.Max, eName, ent.Name) eConstraints = "" newLineStatement := "\tWHERE" for _, v := range ent.Constraints { eConstraints += fmt.Sprintf("%v %v \n", newLineStatement, *createConstraintBoolExpression(&v, eName, false)) newLineStatement = "\tAND" } // Add an IN clause, connecting the relation to the output of the groupby if rel.ToType == "groupBy" { gb := JSONQuery.FindG(rel.ToID) inConstraint := fmt.Sprintf("%v %v.%v IN %v_%v \n", newLineStatement, eName, gb.ByAttribute, gb.AppliedModifier, gb.ByAttribute) eConstraints += inConstraint } } else { var eTName string var entFrom *entity.QueryEntityStruct var eFName string var entTo *entity.QueryEntityStruct // Check of what type the To is if rel.ToType == "entity" { entTo = JSONQuery.FindE(rel.ToID) eTName = fmt.Sprintf("e%v", entTo.ID) } else if rel.ToType == "groupBy" { gb := JSONQuery.FindG(rel.ToID) if gb.ByType == "relation" { return nil, errors.New("Invalid query: cannot connect a relation to a group by that groups by another relation") } entTo = JSONQuery.FindE(gb.ByID) // this is a sort of dummy variable, since it is not directly visible in the query, but it is definitely needed eTName = fmt.Sprintf("e%v", entTo.ID) } else { // Should never be reachable return nil, errors.New("Invalid connection type to relation") } // Check of what type the From is if rel.FromType == "entity" { entFrom = JSONQuery.FindE(rel.FromID) eFName = fmt.Sprintf("e%v", entFrom.ID) } else if rel.FromType == "groupBy" { gb := JSONQuery.FindG(rel.FromID) if gb.ByType == "relation" { return nil, errors.New("Invalid query: cannot connect a relation to a group by that groups by another relation") } entFrom = JSONQuery.FindE(gb.ByID) // This is a sort of dummy variable, since it is not directly visible in the query, but it is definitely needed eFName = fmt.Sprintf("eg%v", entFrom.ID) } else { // Should never be reachable return nil, errors.New("Invalid connection type to relation") } match = fmt.Sprintf("MATCH p%v = (%v:%v)-[:%v*%v..%v]-(%v:%v)\n", part.PartID, eFName, entFrom.Name, rel.Name, rel.Depth.Min, rel.Depth.Max, eTName, entTo.Name) eConstraints = "" newLineStatement := "\tWHERE" for _, v := range entFrom.Constraints { eConstraints += fmt.Sprintf("%v %v \n", newLineStatement, *createConstraintBoolExpression(&v, eFName, false)) newLineStatement = "\tAND" } for _, v := range entTo.Constraints { eConstraints += fmt.Sprintf("%v %v \n", newLineStatement, *createConstraintBoolExpression(&v, eTName, false)) newLineStatement = "\tAND" } // Add an IN clause, connecting the relation to the output of the groupby if rel.ToType == "groupBy" { gb := JSONQuery.FindG(rel.ToID) inConstraint := fmt.Sprintf("%v %v.%v IN %v_%v \n", newLineStatement, eTName, gb.ByAttribute, strings.Replace(eFName, "g", "", 1), gb.ByAttribute) eConstraints += inConstraint newLineStatement = "\tAND" } if rel.FromType == "groupBy" { gb := JSONQuery.FindG(rel.FromID) inConstraint := fmt.Sprintf("%v %v.%v IN %v_%v \n", newLineStatement, eFName, gb.ByAttribute, strings.Replace(eFName, "g", "", 1), gb.ByAttribute) eConstraints += inConstraint } } rName := fmt.Sprintf("r%v", part.QID) unwind = fmt.Sprintf("UNWIND relationships(p%v) as %v \nWITH *\n", part.PartID, rName) rConstraints = "" newLineStatement := "\tWHERE" for _, v := range rel.Constraints { rConstraints += fmt.Sprintf("%v %v \n", newLineStatement, *createConstraintBoolExpression(&v, rName, false)) newLineStatement = "\tAND" } retString := match + eConstraints + unwind + rConstraints return &retString, nil } // createGroupByCypher takes the json and a query part, finds the group by and converts it into cypher func createGroupByCypher(JSONQuery *entity.IncomingQueryJSON, part entity.QueryPart) (*string, error) { groupBy := JSONQuery.FindG(part.QID) gName := fmt.Sprintf("%v_%v", groupBy.AppliedModifier, groupBy.GroupAttribute) by := fmt.Sprintf("%v%v.%v", string(groupBy.ByType[0]), groupBy.ByID, groupBy.ByAttribute) byName := strings.Replace(by, ".", "_", 1) group := fmt.Sprintf("%v%v.%v", string(groupBy.GroupType[0]), groupBy.GroupID, groupBy.GroupAttribute) // If you do not use a *, then everything needs to be aliased with := fmt.Sprintf("WITH %v AS %v, %v(%v) AS %v \n", by, byName, groupBy.AppliedModifier, group, gName) gConstraints := "" newLineStatement := "\tWHERE" for _, v := range groupBy.Constraints { gConstraints += fmt.Sprintf("%v %v \n", newLineStatement, *createConstraintBoolExpression(&v, gName, true)) newLineStatement = "\tAND" } retString := with + gConstraints return &retString, nil }