Skip to content
Snippets Groups Projects
convertQuery.go 19.9 KiB
Newer Older
package cypher

import (
	"errors"
	"fmt"
	"strings"

	"git.science.uu.nl/graphpolaris/query-conversion/entity"
// ConvertQuery takes the json from the visual query builder and converts it into Cypher
func (s *Service) ConvertQuery(totalJSONQuery *entity.IncomingQueryJSON) (*string, error) {
	var finalCypher *string

	queryJSON := totalJSONQuery

Joris's avatar
Joris committed
	// If you want to query the other cluster as well, remove the underscores
	query, _, _ := checkForQueryCluster(queryJSON)
Joris's avatar
Joris committed
	if query == nil {
		return nil, errors.New("Invalid query")
	}

	ok, err := checkNoDeadEnds(query)
	if !ok {
		return nil, err
	}

	finalCypher, err = createCypher(query)
	if err != nil {
		return nil, err
	}

	return finalCypher, nil
// createCypher translates a cluster of nodes (query) to Cypher
func createCypher(JSONQuery *entity.IncomingQueryJSON) (*string, error) {

	// create the hierarchy from the cluster
	hierarchy, err := createQueryHierarchy(JSONQuery)
	if err != nil {
		return nil, err
	}

	// translate it to cypher in the right order, using the hierarchy
	cypher, err := formQuery(JSONQuery, hierarchy)
	if err != nil {
		return nil, errors.New("Creation of query Cypher failed")
	}

	// create the return statement
	returnStatement, err := createReturnStatement(JSONQuery, hierarchy)
	if err != nil {
		return nil, errors.New("Creation of return Cypher failed")
	}

	finalCypher := *cypher + *returnStatement

	return &finalCypher, nil
}

// createReturnStatement creates the final return statement
func createReturnStatement(JSONQuery *entity.IncomingQueryJSON, parts entity.Query) (*string, error) {

	var retStatement string
Joris's avatar
Joris committed
	var retType string // This is a marker attached to the end, for ease of parsing in the executor

	// First check to see if the return is a table (due to a groupby at the end) or if it is nodelink data
	numOfParts := len(parts)
Joris's avatar
Joris committed
	if numOfParts == 0 {
		return nil, errors.New("No parts found in return statement")
	}

	if parts[numOfParts-1].QType == "groupBy" {
		// Return is a table
		groupBy := JSONQuery.FindG(parts[numOfParts-1].QID)

		gName := fmt.Sprintf("%v_%v", groupBy.AppliedModifier, groupBy.GroupAttribute)
		by := fmt.Sprintf("%v%v.%v", string(groupBy.ByType[0]), groupBy.ByID, groupBy.ByAttribute)
		byName := strings.Replace(by, ".", "_", 1)

		retStatement = fmt.Sprintf("RETURN %v, %v", byName, gName)
Joris's avatar
Joris committed
		retType = ";table"
	} else {
		// Return is nodelink
		// Loop through the parts of the query from back to front
		retStatement = "RETURN "
		lineStart := ""
		for i := numOfParts - 1; i >= 0; i-- {
			part := parts[i]
			if part.QType == "relation" {
				rel := JSONQuery.FindR(part.QID)
				retStatement += fmt.Sprintf("%v r%v", lineStart, rel.ID)
				lineStart = ","

				if rel.FromID != -1 {
					if rel.FromType == "entity" {

						retStatement += fmt.Sprintf("%v e%v", lineStart, rel.FromID)
					} else {
						id := JSONQuery.FindG(rel.FromID).ByID
						retStatement += fmt.Sprintf("%v eg%v", lineStart, id)
					}
				}

				if rel.ToID != -1 {
					if rel.ToType == "entity" {

						retStatement += fmt.Sprintf("%v e%v", lineStart, rel.ToID)
					} else {
						id := JSONQuery.FindG(rel.ToID).ByID
						retStatement += fmt.Sprintf("%v eg%v", lineStart, id)
					}
				}
			} else if part.QType == "entity" {
				retStatement += fmt.Sprintf("%v e%v", lineStart, part.QID)
				break

				// Probably ends with a break, since a single entity is always connected via an IN to a groupby? (maybe not in case of ONLY having an entity as the entire query)
			} else {
				// Then it is a groupby which must not be returned, thus the returns are done.
				break
			}
		}
Joris's avatar
Joris committed

		retType = ";nodelink"
Joris's avatar
Joris committed
	retStatement = retStatement + "\n" + fmt.Sprintf("LIMIT %v", JSONQuery.Limit) + retType

	return &retStatement, nil
}

// createQueryHierarchy finds out what depends on what, then uses topological sort to create a hierarchy
func createQueryHierarchy(JSONQuery *entity.IncomingQueryJSON) (entity.Query, error) {

	var parts entity.Query
	IDctr := 0

	// Add relations all to query parts
	for _, rel := range JSONQuery.Relations {
		part := entity.QueryPart{
			QType:        "relation",
			QID:          rel.ID,
			PartID:       IDctr,
			Dependencies: make([]int, 0),
		}
		parts = append(parts, part)

		IDctr++

	}

	// Add the Groupby's
	for _, gb := range JSONQuery.GroupBys {
		part := entity.QueryPart{
			QType:        "groupBy",
			QID:          gb.ID,
			PartID:       IDctr,
			Dependencies: make([]int, 0),
		}
		parts = append(parts, part)

		IDctr++

	}

	// Add the entities, if they have an IN, otherwise they are not important
	for _, ent := range JSONQuery.Entities {

		skip := true
		for _, con := range ent.Constraints {
			if con.InID != -1 {
				skip = false
			}
		}

		if skip {
			continue
		}

		part := entity.QueryPart{
			QType:        "entity",
			QID:          ent.ID,
			PartID:       IDctr,
			Dependencies: make([]int, 0),
		}
		parts = append(parts, part)

		IDctr++
	}

	// Check dependencies in a nice O(n^2)
	for _, rel := range JSONQuery.Relations {
		if rel.FromID == -1 {
			continue
		}

		// Check the dependencies From - To
		for _, rela := range JSONQuery.Relations {
			if rela.ToID == -1 {
				continue
			}

			if rel.FromID == rela.ToID && rel.FromType == rela.ToType {
				part := parts.Find(rel.ID, "relation")
				part.Dependencies = append(part.Dependencies, parts.Find(rela.ID, "relation").PartID)
			}
		}

		if rel.ToID == -1 {
			continue
		}

		// Now for connections to group by's it doesnt matter if the GB is attached to the from or the to
		// The GB always has priority
		for _, gb := range JSONQuery.GroupBys {
			if (rel.FromID == gb.ID && rel.FromType == "groupBy") || (rel.ToID == gb.ID && rel.ToType == "groupBy") {
				part := parts.Find(rel.ID, "relation")
				gbID := parts.Find(gb.ID, "groupBy").PartID
				part.Dependencies = append(part.Dependencies, gbID)
			}
		}
	}

	// Same trick for group by's
	for _, gb := range JSONQuery.GroupBys {
		for _, rela := range JSONQuery.Relations {
			// Check if the gb is connected to the relation
			if (gb.ByID == rela.ID && gb.ByType == "relation") || // Is the By connected to a relation
				(gb.GroupID == rela.ID && gb.GroupType == "relation") || // is the Group connected to a relation
				(gb.ByID == rela.FromID && gb.ByType == rela.FromType) || // Is the by connected to an entity connected to the "From" of a relation
				(gb.ByID == rela.ToID && gb.ByType == rela.ToType) || // Is the by connected to an entity connected to the "To" of a relation
				(gb.GroupID == rela.FromID && gb.GroupType == rela.FromType) || // Is the group connected to an entity connected to the "From" of arelation
				(gb.GroupID == rela.ToID && gb.GroupType == rela.ToType) { // Is the group connected to an entity connected to the "To" of a relation
				part := parts.Find(gb.ID, "groupBy")
				part.Dependencies = append(part.Dependencies, parts.Find(rela.ID, "relation").PartID)
			}
		}

		// Not sure if this is even possible, but hey who knows
		// Check to see if the gb is connected to another gb
		for _, grb := range JSONQuery.GroupBys {
			if gb.ID == grb.ID {
				continue
			}

			if (gb.GroupID == grb.ID && gb.GroupType == "groupBy") || (gb.ByID == grb.ID && gb.ByType == "groupBy") {
				part := parts.Find(gb.ID, "groupBy")
				part.Dependencies = append(part.Dependencies, parts.Find(grb.ID, "groupBy").PartID)
			}
		}
	}

	for _, ent := range JSONQuery.Entities {
		for _, con := range ent.Constraints {
			if con.InID != -1 {
				part := parts.Find(ent.ID, "entity") // Should always be groupBy
				part.Dependencies = append(part.Dependencies, parts.Find(con.InID, con.InType).PartID)
			}
		}

	}

	// Here comes a checker for (A)-->(B) and (B)-->(A). This is mitigated partly by ignoring it
	// Lets call it a small cycle. It wont catch bigger cycles (with 3 nodes for example)

	for _, p := range parts {
		// We only allow small cycles with relations
		if p.QType != "relation" {
			continue
		}

		for _, dep := range p.Dependencies {
			other := parts.SelectByID(dep)

			if other.QType != "relation" {
				continue
			}

			// Deleting from a slice while looping through it is an easy way to make mistakes, hence the workaround
			cycle := false
			toRemove := -1

			for i, otherDep := range other.Dependencies {
				if otherDep == p.PartID {
					// Small cycle detected
			// Remove one of the two dependencies, does not really matter which, cypher knits it back together due to the query
			// using the same ID's, thus making it a cycle again later on.
				log.Println("Cycle detected and removed")
				if len(other.Dependencies) == 0 {
					other.Dependencies = make([]int, 0)
				} else {
					other.Dependencies[toRemove] = other.Dependencies[len(other.Dependencies)-1]
					other.Dependencies = other.Dependencies[:len(other.Dependencies)-1]
				}

			}
		}
	}

	// Now we have a directed graph, meaning we can use some topological sort (Kahn's algorithm)
	var sortedQuery entity.Query
	incomingEdges := make(map[int]int)

	// Set all to 0
	for _, p := range parts {
		incomingEdges[p.PartID] = 0
	}

	// Count the incoming edges (dependencies)
	for _, p := range parts {
		for _, dp := range p.Dependencies {
			incomingEdges[dp]++
		}
	}

	for { // While there is a someone where incomingEdges[someone] == 0
		part := entity.QueryPart{PartID: -1}
		// Select a node with no incoming edges
		for ID, edges := range incomingEdges {
			if edges == 0 {
				part = *parts.SelectByID(ID)
			}
		}

		// Check to see if there are parts withouth incoming edges left
		if part.PartID == -1 {
			break
		}

		// Remove it from the set
		incomingEdges[part.PartID] = -1
		sortedQuery = append(sortedQuery, part)

		// Decrease incoming edges of other parts
		for _, ID := range part.Dependencies {
			incomingEdges[ID]--
		}
	}

	// Now check for cycles in the graph
	partRemaining := false
	for _, edges := range incomingEdges {
		if edges != -1 {
			partRemaining = true
		}
	}

	if partRemaining {
		// Somehow there was a cycle in the query,
		return nil, errors.New("Cyclic query detected")
	}

	// Reverse the list
	retQuery := make([]entity.QueryPart, len(sortedQuery))
	for i := 0; i < len(sortedQuery); i++ {
		retQuery[i] = sortedQuery[len(sortedQuery)-i-1]
	}

	return retQuery, nil
}

// formQuery uses the hierarchy to create cypher for each part of the query in the right order
func formQuery(JSONQuery *entity.IncomingQueryJSON, hierarchy entity.Query) (*string, error) {

	// Traverse through the hierarchy and for every entry create a part like:
	// Match p0 = (l:Lorem)-[:Ipsum*1..1]-(d:Dolor)
	// Constraints on l and d
	// Unwind relationships(p0) as r0
	// Constraints on r0
	// With *

	totalQuery := ""

	for _, entry := range hierarchy {
		var cypher *string
		var err error

		switch entry.QType {
		case "relation":
			cypher, err = createRelationCypher(JSONQuery, entry)
			if err != nil {
				return nil, err
			}
			break
		case "groupBy":
			cypher, err = createGroupByCypher(JSONQuery, entry)
			if err != nil {
				return nil, err
			}

			break
		case "entity":
			// This would be in case of an IN or if there was only 1 entity in the query builder
			cypher, err = createInCypher(JSONQuery, entry)
			if err != nil {
				return nil, err
			}

			break
		default:
			// Should never be reached
			return nil, errors.New("Invalid query pill type detected")
		}

		totalQuery += *cypher
	}

	return &totalQuery, nil
}

// createInCypher creates the cypher for an entity with an IN-clause
func createInCypher(JSONQuery *entity.IncomingQueryJSON, part entity.QueryPart) (*string, error) {
	ent := JSONQuery.FindE(part.QID)
	eName := fmt.Sprintf("e%v", ent.ID)

	match := fmt.Sprintf("MATCH (%v:%v)\n", eName, ent.Name)
	eConstraints := ""
	newLineStatement := "\tWHERE"

	// Find the IN
	for _, con := range ent.Constraints {
		if con.InID != -1 {
			gby := JSONQuery.FindG(con.InID) // Because this could only be on a groupby
			byName := fmt.Sprintf("%v%v", string(gby.ByType[0]), gby.ByID)
			eConstraints += fmt.Sprintf("%v %v.%v IN %v_%v\n", newLineStatement, eName, con.Attribute, byName, gby.ByAttribute)
			newLineStatement = "\tAND"
		}
	}

	// Attach other constraints (if any)
	for _, v := range ent.Constraints {
		if v.InID != -1 {
			continue
		}
		eConstraints += fmt.Sprintf("%v %v \n", newLineStatement, *createConstraintBoolExpression(&v, eName, false))
	}

	with := "WITH *\n"
	retStatement := match + eConstraints + with
	return &retStatement, nil

}

// createRelationCypher takes the json and a query part, finds the necessary entities and converts it into cypher
func createRelationCypher(JSONQuery *entity.IncomingQueryJSON, part entity.QueryPart) (*string, error) {

	rel := JSONQuery.FindR(part.QID)

	if (rel.FromID == -1) && (rel.ToID == -1) {
		// Now there is only a relation, which we do not allow
		return nil, errors.New("Relation only queries are not supported")
	}

	var match, eConstraints, unwind, rConstraints string

	// There is some duplicate code here below that could be omitted with extra if-statements, but that is something to do
	// for a later time. Since this way it is easier to understand the flow of the code
	// Removing the duplicate code here, probably more than triples the if-statements and is a puzzle for a later time (TODO)
	if rel.ToID == -1 {
		// There is no To, only a From
		var eName string
		var ent *entity.QueryEntityStruct

		if rel.FromType == "entity" {

			ent = JSONQuery.FindE(rel.ToID)
			eName = fmt.Sprintf("e%v", ent.ID)

		} else if rel.FromType == "groupBy" {
			gb := JSONQuery.FindG(rel.FromID)
			if gb.ByType == "relation" {
				return nil, errors.New("Invalid query: cannot connect a relation to a group by that groups by another relation")
			}

			ent = JSONQuery.FindE(gb.ByID)
			// This is a sort of dummy variable, since it is not directly visible in the query, but it is definitely needed
			eName = fmt.Sprintf("e%v", ent.ID)
		} else {
			// Should never be reachable
			return nil, errors.New("Invalid connection type to relation")
		}

		match = fmt.Sprintf("MATCH p%v = (%v:%v)-[:%v*%v..%v]-()\n", part.PartID, eName, ent.Name, rel.Name, rel.Depth.Min, rel.Depth.Max)

		eConstraints = ""
		newLineStatement := "\tWHERE"
		for _, v := range ent.Constraints {
			eConstraints += fmt.Sprintf("%v %v \n", newLineStatement, *createConstraintBoolExpression(&v, eName, false))
			newLineStatement = "\tAND"
		}

		// Add an IN clause, connecting the relation to the output of the groupby
		if rel.FromType == "groupBy" {
			gb := JSONQuery.FindG(rel.FromID)
			inConstraint := fmt.Sprintf("%v %v.%v IN %v_%v \n", newLineStatement, eName, gb.ByAttribute, gb.AppliedModifier, gb.ByAttribute)
			eConstraints += inConstraint
		}

	} else if rel.FromID == -1 {
		var eName string
		var ent *entity.QueryEntityStruct

		if rel.ToType == "entity" {
			ent = JSONQuery.FindE(rel.ToID)
			eName = fmt.Sprintf("e%v", ent.ID)

		} else if rel.ToType == "groupBy" {
			gb := JSONQuery.FindG(rel.ToID)
			if gb.ByType == "relation" {
				return nil, errors.New("Invalid query: cannot connect a relation to a group by that groups by another relation")
			}

			ent = JSONQuery.FindE(gb.ByID)
			// This is a sort of dummy variable, since it is not directly visible in the query, but it is definitely needed
			eName = fmt.Sprintf("e%v", ent.ID)
		} else {
			// Should never be reachable
			return nil, errors.New("Invalid connection type to relation")
		}

		match = fmt.Sprintf("MATCH p%v = ()-[:%v*%v..%v]-(%v:%v)\n", part.PartID, rel.Name, rel.Depth.Min, rel.Depth.Max, eName, ent.Name)

		eConstraints = ""
		newLineStatement := "\tWHERE"
		for _, v := range ent.Constraints {
			eConstraints += fmt.Sprintf("%v %v \n", newLineStatement, *createConstraintBoolExpression(&v, eName, false))
			newLineStatement = "\tAND"
		}

		// Add an IN clause, connecting the relation to the output of the groupby
		if rel.ToType == "groupBy" {
			gb := JSONQuery.FindG(rel.ToID)
			inConstraint := fmt.Sprintf("%v %v.%v IN %v_%v \n", newLineStatement, eName, gb.ByAttribute, gb.AppliedModifier, gb.ByAttribute)
			eConstraints += inConstraint
		}

	} else {
		var eTName string
		var entFrom *entity.QueryEntityStruct
		var eFName string
		var entTo *entity.QueryEntityStruct

		// Check of what type the To is
		if rel.ToType == "entity" {
			entTo = JSONQuery.FindE(rel.ToID)
			eTName = fmt.Sprintf("e%v", entTo.ID)

		} else if rel.ToType == "groupBy" {
			gb := JSONQuery.FindG(rel.ToID)
			if gb.ByType == "relation" {
				return nil, errors.New("Invalid query: cannot connect a relation to a group by that groups by another relation")
			}

			entTo = JSONQuery.FindE(gb.ByID)
			// this is a sort of dummy variable, since it is not directly visible in the query, but it is definitely needed
			eTName = fmt.Sprintf("e%v", entTo.ID)
		} else {
			// Should never be reachable
			return nil, errors.New("Invalid connection type to relation")
		}

		// Check of what type the From is
		if rel.FromType == "entity" {

			entFrom = JSONQuery.FindE(rel.FromID)
			eFName = fmt.Sprintf("e%v", entFrom.ID)

		} else if rel.FromType == "groupBy" {
			gb := JSONQuery.FindG(rel.FromID)
			if gb.ByType == "relation" {
				return nil, errors.New("Invalid query: cannot connect a relation to a group by that groups by another relation")
			}

			entFrom = JSONQuery.FindE(gb.ByID)
			// This is a sort of dummy variable, since it is not directly visible in the query, but it is definitely needed
			eFName = fmt.Sprintf("eg%v", entFrom.ID)
		} else {
			// Should never be reachable
			return nil, errors.New("Invalid connection type to relation")
		}

		match = fmt.Sprintf("MATCH p%v = (%v:%v)-[:%v*%v..%v]-(%v:%v)\n", part.PartID, eFName, entFrom.Name, rel.Name, rel.Depth.Min, rel.Depth.Max, eTName, entTo.Name)

		eConstraints = ""
		newLineStatement := "\tWHERE"
		for _, v := range entFrom.Constraints {
			eConstraints += fmt.Sprintf("%v %v \n", newLineStatement, *createConstraintBoolExpression(&v, eFName, false))
			newLineStatement = "\tAND"
		}
		for _, v := range entTo.Constraints {
			eConstraints += fmt.Sprintf("%v %v \n", newLineStatement, *createConstraintBoolExpression(&v, eTName, false))
			newLineStatement = "\tAND"
		}

		// Add an IN clause, connecting the relation to the output of the groupby
		if rel.ToType == "groupBy" {
			gb := JSONQuery.FindG(rel.ToID)
			inConstraint := fmt.Sprintf("%v %v.%v IN %v_%v \n", newLineStatement, eTName, gb.ByAttribute, strings.Replace(eFName, "g", "", 1), gb.ByAttribute)
			eConstraints += inConstraint
			newLineStatement = "\tAND"
		}

		if rel.FromType == "groupBy" {
			gb := JSONQuery.FindG(rel.FromID)
			inConstraint := fmt.Sprintf("%v %v.%v IN %v_%v \n", newLineStatement, eFName, gb.ByAttribute, strings.Replace(eFName, "g", "", 1), gb.ByAttribute)
			eConstraints += inConstraint
		}
	}

	rName := fmt.Sprintf("r%v", part.QID)
	unwind = fmt.Sprintf("UNWIND relationships(p%v) as %v \nWITH *\n", part.PartID, rName)

	rConstraints = ""
	newLineStatement := "\tWHERE"
	for _, v := range rel.Constraints {
		rConstraints += fmt.Sprintf("%v %v \n", newLineStatement, *createConstraintBoolExpression(&v, rName, false))
		newLineStatement = "\tAND"
	}

	retString := match + eConstraints + unwind + rConstraints
	return &retString, nil

}

// createGroupByCypher takes the json and a query part, finds the group by and converts it into cypher
func createGroupByCypher(JSONQuery *entity.IncomingQueryJSON, part entity.QueryPart) (*string, error) {
	groupBy := JSONQuery.FindG(part.QID)

	gName := fmt.Sprintf("%v_%v", groupBy.AppliedModifier, groupBy.GroupAttribute)
	by := fmt.Sprintf("%v%v.%v", string(groupBy.ByType[0]), groupBy.ByID, groupBy.ByAttribute)
	byName := strings.Replace(by, ".", "_", 1)
	group := fmt.Sprintf("%v%v.%v", string(groupBy.GroupType[0]), groupBy.GroupID, groupBy.GroupAttribute)

	// If you do not use a *, then everything needs to be aliased
	with := fmt.Sprintf("WITH %v AS %v, %v(%v) AS %v \n", by, byName, groupBy.AppliedModifier, group, gName)

	gConstraints := ""
	newLineStatement := "\tWHERE"
	for _, v := range groupBy.Constraints {
		gConstraints += fmt.Sprintf("%v %v \n", newLineStatement, *createConstraintBoolExpression(&v, gName, true))
		newLineStatement = "\tAND"
	}

	retString := with + gConstraints
	return &retString, nil
}