feat(neo4j): improve neo4j read query performance by specifying labels (#10593)

This commit is contained in:
deepgarg-visa 2024-05-29 08:43:46 +05:30 committed by GitHub
parent 9b247e1031
commit 36ae5afbb5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -377,20 +377,24 @@ public class Neo4jGraphService implements GraphService {
graphFilters.getAllowedEntityTypes(), direction),
"maxHops", maxHops));
final String entityType = entityUrn.getEntityType();
if (lineageFlags == null
|| (lineageFlags.getStartTimeMillis() == null && lineageFlags.getEndTimeMillis() == null)) {
// if no time filtering required, simply find all expansion paths to other nodes
final var statement =
"MATCH (a {urn: $urn}) "
+ "CALL apoc.path.spanningTree(a, { "
+ " relationshipFilter: $relationshipFilter, "
+ " labelFilter: $labelFilter, "
+ " minLevel: 1, "
+ " maxLevel: $maxHops "
+ "}) "
+ "YIELD path "
+ "WITH a, path AS path "
+ "RETURN a, path, last(nodes(path));";
String.format(
"MATCH (a:%s {urn: $urn}) "
+ "CALL apoc.path.spanningTree(a, { "
+ " relationshipFilter: $relationshipFilter, "
+ " labelFilter: $labelFilter, "
+ " minLevel: 1, "
+ " maxLevel: $maxHops "
+ "}) "
+ "YIELD path "
+ "WITH a, path AS path "
+ "RETURN a, path, last(nodes(path));",
entityType);
return Pair.of(statement, parameterMap);
} else {
// when needing time filtering, possibility on multiple paths between two
@ -413,26 +417,28 @@ public class Neo4jGraphService implements GraphService {
// exploration, not
// after path exploration is done)
final var statement =
"MATCH (a {urn: $urn}) "
+ "CALL apoc.path.subgraphNodes(a, { "
+ " relationshipFilter: $relationshipFilter, "
+ " labelFilter: $labelFilter, "
+ " minLevel: 1, "
+ " maxLevel: $maxHops "
+ "}) "
+ "YIELD node AS b "
+ "WITH a, b "
+ "MATCH path = shortestPath((a)"
+ relationshipPattern
+ "(b)) "
+ "WHERE a <> b "
+ " AND ALL(rt IN relationships(path) WHERE "
+ " (rt.source IS NOT NULL AND rt.source = 'UI') OR "
+ " (rt.createdOn IS NULL AND rt.updatedOn IS NULL) OR "
+ " ($startTimeMillis <= rt.createdOn <= $endTimeMillis OR "
+ " $startTimeMillis <= rt.updatedOn <= $endTimeMillis) "
+ " ) "
+ "RETURN a, path, b;";
String.format(
"MATCH (a:%s {urn: $urn}) "
+ "CALL apoc.path.subgraphNodes(a, { "
+ " relationshipFilter: $relationshipFilter, "
+ " labelFilter: $labelFilter, "
+ " minLevel: 1, "
+ " maxLevel: $maxHops "
+ "}) "
+ "YIELD node AS b "
+ "WITH a, b "
+ "MATCH path = shortestPath((a)"
+ relationshipPattern
+ "(b)) "
+ "WHERE a <> b "
+ " AND ALL(rt IN relationships(path) WHERE "
+ " (rt.source IS NOT NULL AND rt.source = 'UI') OR "
+ " (rt.createdOn IS NULL AND rt.updatedOn IS NULL) OR "
+ " ($startTimeMillis <= rt.createdOn <= $endTimeMillis OR "
+ " $startTimeMillis <= rt.updatedOn <= $endTimeMillis) "
+ " ) "
+ "RETURN a, path, b;",
entityType);
// provide dummy start/end time when not provided, so no need to
// format clause differently if either of them is missing
@ -487,10 +493,19 @@ public class Neo4jGraphService implements GraphService {
matchTemplate = "MATCH (src %s)-[r%s %s]->(dest %s)%s";
}
final String returnNodes =
String.format(
"RETURN dest, type(r)"); // Return both related entity and the relationship type.
final String returnCount = "RETURN count(*)"; // For getting the total results.
String srcNodeLabel = StringUtils.EMPTY;
// Create a URN from the String. Only proceed if srcCriteria is not null or empty
if (StringUtils.isNotEmpty(srcCriteria)) {
final String urnValue =
sourceEntityFilter.getOr().get(0).getAnd().get(0).getValue().toString();
try {
final Urn urn = Urn.createFromString(urnValue);
srcNodeLabel = urn.getEntityType();
matchTemplate = matchTemplate.replace("(src ", "(src:%s ");
} catch (URISyntaxException e) {
log.error("Failed to parse URN: {} ", urnValue, e);
}
}
String relationshipTypeFilter = "";
if (!relationshipTypes.isEmpty()) {
@ -500,17 +515,34 @@ public class Neo4jGraphService implements GraphService {
String whereClause = computeEntityTypeWhereClause(sourceTypes, destinationTypes);
// Build Statement strings
String baseStatementString =
String.format(
matchTemplate,
srcCriteria,
relationshipTypeFilter,
edgeCriteria,
destCriteria,
whereClause);
String baseStatementString;
if (StringUtils.isNotEmpty(srcNodeLabel)) {
baseStatementString =
String.format(
matchTemplate,
srcNodeLabel,
srcCriteria,
relationshipTypeFilter,
edgeCriteria,
destCriteria,
whereClause);
} else {
baseStatementString =
String.format(
matchTemplate,
srcCriteria,
relationshipTypeFilter,
edgeCriteria,
destCriteria,
whereClause);
}
log.info(baseStatementString);
final String returnNodes =
"RETURN dest, type(r)"; // Return both related entity and the relationship type.
final String returnCount = "RETURN count(*)"; // For getting the total results.
final String resultStatementString =
String.format("%s %s SKIP $offset LIMIT $count", baseStatementString, returnNodes);
final String countStatementString = String.format("%s %s", baseStatementString, returnCount);
@ -571,10 +603,11 @@ public class Neo4jGraphService implements GraphService {
public void removeNode(@Nonnull final Urn urn) {
log.debug(String.format("Removing Neo4j node with urn: %s", urn));
final String srcNodeLabel = urn.getEntityType();
// also delete any relationship going to or from it
final String matchTemplate = "MATCH (node {urn: $urn}) DETACH DELETE node";
final String statement = String.format(matchTemplate);
final String matchTemplate = "MATCH (node:%s {urn: $urn}) DETACH DELETE node";
final String statement = String.format(matchTemplate, srcNodeLabel);
final Map<String, Object> params = new HashMap<>();
params.put("urn", urn.toString());
@ -605,19 +638,20 @@ public class Neo4jGraphService implements GraphService {
// also delete any relationship going to or from it
final RelationshipDirection relationshipDirection = relationshipFilter.getDirection();
final String srcNodeLabel = urn.getEntityType();
String matchTemplate = "MATCH (src {urn: $urn})-[r%s]-(dest) RETURN type(r), dest, 2";
String matchTemplate = "MATCH (src:%s {urn: $urn})-[r%s]-(dest) RETURN type(r), dest, 2";
if (relationshipDirection == RelationshipDirection.INCOMING) {
matchTemplate = "MATCH (src {urn: $urn})<-[r%s]-(dest) RETURN type(r), dest, 0";
matchTemplate = "MATCH (src:%s {urn: $urn})<-[r%s]-(dest) RETURN type(r), dest, 0";
} else if (relationshipDirection == RelationshipDirection.OUTGOING) {
matchTemplate = "MATCH (src {urn: $urn})-[r%s]->(dest) RETURN type(r), dest, 1";
matchTemplate = "MATCH (src:%s {urn: $urn})-[r%s]->(dest) RETURN type(r), dest, 1";
}
String relationshipTypeFilter = "";
if (!relationshipTypes.isEmpty()) {
relationshipTypeFilter = ":" + StringUtils.join(relationshipTypes, "|");
}
final String statement = String.format(matchTemplate, relationshipTypeFilter);
final String statement = String.format(matchTemplate, srcNodeLabel, relationshipTypeFilter);
final Map<String, Object> params = new HashMap<>();
params.put("urn", urn.toString());
@ -626,7 +660,7 @@ public class Neo4jGraphService implements GraphService {
if (!neo4jResult.isEmpty()) {
String removeMode = neo4jResult.get(0).values().get(2).toString();
if (removeMode.equals("2")) {
final String matchDeleteTemplate = "MATCH (src {urn: $urn})-[r%s]-(dest) DELETE r";
final String matchDeleteTemplate = "MATCH (src:%s {urn: $urn})-[r%s]-(dest) DELETE r";
relationshipTypeFilter = "";
if (!relationshipTypes.isEmpty()) {
relationshipTypeFilter =
@ -636,7 +670,7 @@ public class Neo4jGraphService implements GraphService {
+ StringUtils.join(relationshipTypes, "|r_");
}
final String statementNoDirection =
String.format(matchDeleteTemplate, relationshipTypeFilter);
String.format(matchDeleteTemplate, srcNodeLabel, relationshipTypeFilter);
runQuery(buildStatement(statementNoDirection, params)).consume();
} else {
for (Record typeDest : neo4jResult) {