fix(reasoner): fix KgGraph split bug (#145)

This commit is contained in:
wangshaofei 2024-03-13 14:26:25 +08:00 committed by GitHub
parent e4602264a5
commit 8869df8c8c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 107 additions and 81 deletions

View File

@ -173,8 +173,22 @@ public class KgGraphImpl implements KgGraph<IVertexId>, Serializable {
Long limit) {
Set<String> vertexAliases = Sets.newHashSet(inputVertexAliases);
// remove alias that vertex set size is 1
vertexAliases.removeIf(vertexAlias -> 1 == this.alias2VertexMap.get(vertexAlias).size());
// remove alias that vertex set size is 1 and its related edge size is 1
Map<String, Set<String>> vertexAliasToEdgeAliasMap =
staticParameters.getVertexAliasToEdgeAliasMap();
vertexAliases.removeIf(
vertexAlias -> {
Boolean isSingleVertex = 1 == this.alias2VertexMap.get(vertexAlias).size();
Set<String> relatedEdgeAliasSet = vertexAliasToEdgeAliasMap.get(vertexAlias);
Boolean isRelatedEdgeSingle = true;
for (String edgeAlias : relatedEdgeAliasSet) {
isRelatedEdgeSingle = 1 == this.alias2EdgeMap.get(edgeAlias).size();
if (!isRelatedEdgeSingle) {
break;
}
}
return isSingleVertex && isRelatedEdgeSingle;
});
if (vertexAliases.isEmpty()) {
// not need split
if (null != filter && !filter.test(this)) {
@ -201,8 +215,7 @@ public class KgGraphImpl implements KgGraph<IVertexId>, Serializable {
return resultList;
}
Map<Bytes, KgGraph<IVertexId>> resultMap = new HashMap<>();
List<String> vertexAliasList = Lists.newArrayList(vertexAliases);
List<KgGraph<IVertexId>> result = new ArrayList<>();
EdgeCombinationIterator it =
new EdgeCombinationIterator(
staticParameters.getEdgeIterateInfoList(),
@ -218,15 +231,13 @@ public class KgGraphImpl implements KgGraph<IVertexId>, Serializable {
if (null != filter && !filter.test(path)) {
continue;
}
Bytes key = getKgGraphKeyBySplitVertex(vertexAliasList, path);
KgGraph<IVertexId> kgGraph = resultMap.computeIfAbsent(key, k -> new KgGraphImpl());
kgGraph.merge(Lists.newArrayList(path), schema);
if (null != limit && resultMap.size() >= limit) {
result.add(path);
if (null != limit && result.size() >= limit) {
// reach max path limit
break;
}
}
return Lists.newArrayList(resultMap.values());
return result;
}
/**
@ -419,8 +430,7 @@ public class KgGraphImpl implements KgGraph<IVertexId>, Serializable {
alias2VertexMap,
alias2EdgeMap);
it.setScope(limit);
List<String> sortedVertexAliasList = Lists.newArrayList(vertexAliases);
Map<KgGraphKey, KgGraph<IVertexId>> result = new HashMap<>();
List<KgGraph<IVertexId>> result = new ArrayList<>();
long count = 0;
while (it.hasNext()) {
KgGraph<IVertexId> kgg = it.next();
@ -436,29 +446,12 @@ public class KgGraphImpl implements KgGraph<IVertexId>, Serializable {
if (kgGraph.checkDuplicateVertex()) {
continue;
}
KgGraphKey kgGraphKey = getKgGraphKey(kgGraph, sortedVertexAliasList);
KgGraph<IVertexId> oldKgGraph = result.get(kgGraphKey);
if (null == oldKgGraph) {
result.put(kgGraphKey, kgGraph);
} else {
oldKgGraph.merge(Lists.newArrayList(kgGraph), schema);
}
result.add(kgGraph);
if (null != limit && count >= limit) {
break;
}
}
return Lists.newArrayList(result.values());
}
private KgGraphKey getKgGraphKey(KgGraph<IVertexId> kgGraph, List<String> vertexAliases) {
IVertexId[] vertexIds = new IVertexId[vertexAliases.size()];
for (int i = 0; i < vertexAliases.size(); ++i) {
String alias = vertexAliases.get(i);
IVertexId vertexId = kgGraph.getVertex(alias).get(0).getId();
vertexIds[i] = vertexId;
}
return new KgGraphKey(vertexIds);
return result;
}
/**

View File

@ -1,39 +0,0 @@
/*
* Copyright 2023 OpenSPG Authors
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
* in compliance with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied.
*/
package com.antgroup.openspg.reasoner.kggraph.impl;
import com.antgroup.openspg.reasoner.common.graph.vertex.IVertexId;
import java.util.Arrays;
public class KgGraphKey {
private final IVertexId[] vertexIds;
public KgGraphKey(IVertexId[] vertexIds) {
this.vertexIds = vertexIds;
}
@Override
public boolean equals(Object obj) {
if (!(obj instanceof KgGraphKey)) {
return false;
}
KgGraphKey other = (KgGraphKey) obj;
return Arrays.equals(this.vertexIds, other.vertexIds);
}
@Override
public int hashCode() {
return Arrays.hashCode(vertexIds);
}
}

View File

@ -56,10 +56,13 @@ public class KgGraphSplitStaticParameters implements Serializable {
private final List<EdgeIterateInfo> subEdgeIterateInfoList;
private final Map<String, Integer> subEdgeIterateOrderMap;
/** 将KgGraph中重复调用的逻辑抽出来在初始化中一次完成 */
private final Map<String, Set<String>> vertexAliasToEdgeAliasMap;
// Abstract the logic of repeated calls in KgGraph and do it all at once in initialization
public KgGraphSplitStaticParameters(Set<String> splitVertexAliases, Pattern schema) {
this.edgeIterateInfoList = initEdgeIterateInfo(schema);
this.edgeIterateOrderMap = initEdgeIterateOrderMap(this.edgeIterateInfoList);
this.vertexAliasToEdgeAliasMap = parseVertexAliasToEdgeAliasMap(schema);
if (CollectionUtils.isNotEmpty(splitVertexAliases)) {
this.needSplitEdgeSet = getNeedSplitEdgeSet(splitVertexAliases, schema);
@ -96,6 +99,23 @@ public class KgGraphSplitStaticParameters implements Serializable {
}
}
private static Map<String, Set<String>> parseVertexAliasToEdgeAliasMap(Pattern schema) {
Map<String, Set<String>> vertexAliasToEdgeAliasMap = new HashMap<>();
for (String key : JavaConversions.setAsJavaSet(schema.topology().keySet())) {
scala.collection.Iterator<Connection> it = schema.topology().get(key).get().iterator();
while (it.hasNext()) {
Connection connection = it.next();
Set<String> sourceVertexRelatedEdgeAliasSet =
vertexAliasToEdgeAliasMap.computeIfAbsent(connection.source(), k -> new HashSet<>());
sourceVertexRelatedEdgeAliasSet.add(connection.alias());
Set<String> targetVertexRelatedEdgeAliasSet =
vertexAliasToEdgeAliasMap.computeIfAbsent(connection.target(), k -> new HashSet<>());
targetVertexRelatedEdgeAliasSet.add(connection.alias());
}
}
return vertexAliasToEdgeAliasMap;
}
/**
* Getter method for property <tt>needSplitEdgeSet</tt>.
*
@ -168,6 +188,15 @@ public class KgGraphSplitStaticParameters implements Serializable {
return subEdgeIterateOrderMap;
}
/**
* Getter method for property <tt>vertexAliasToEdgeAliasMap</tt>.
*
* @return property value of vertexAliasToEdgeAliasMap
*/
public Map<String, Set<String>> getVertexAliasToEdgeAliasMap() {
return vertexAliasToEdgeAliasMap;
}
public boolean canDoSampleSplit(Map<String, Set<IVertex<IVertexId, IProperty>>> alias2VertexMap) {
if (null == this.neighborAliasSet) {
return false;

View File

@ -17,34 +17,75 @@ import com.antgroup.openspg.reasoner.common.graph.edge.IEdge;
import com.antgroup.openspg.reasoner.common.graph.property.IProperty;
import com.antgroup.openspg.reasoner.common.graph.vertex.IVertexId;
import com.antgroup.openspg.reasoner.kggraph.KgGraph;
import com.antgroup.openspg.reasoner.lube.common.pattern.Connection;
import com.antgroup.openspg.reasoner.lube.common.pattern.Pattern;
import com.antgroup.openspg.reasoner.utils.RunnerUtil;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
public class UnfoldReduceDuplicateImpl implements KgGraphListProcess {
private final Pattern schema;
private final String edgeAlias;
private final List<String> edgeAliasList = new ArrayList<>();
public UnfoldReduceDuplicateImpl(Pattern schema, String edgeAlias) {
this.schema = schema;
this.edgeAlias = edgeAlias;
for (Connection connection : RunnerUtil.getConnectionSet(schema)) {
if (connection.alias().equals(this.edgeAlias)) {
continue;
}
this.edgeAliasList.add(connection.alias());
}
}
@Override
public List<KgGraph<IVertexId>> reduce(Collection<KgGraph<IVertexId>> kgGraphs) {
Set<IEdge<IVertexId, IProperty>> pathEdgeSet = new HashSet<>();
Set<PathDuplicateKey> pathEdgeKeySet = new HashSet<>();
List<KgGraph<IVertexId>> result = new ArrayList<>();
for (KgGraph<IVertexId> kgGraph : kgGraphs) {
IEdge<IVertexId, IProperty> edge = kgGraph.getEdge(edgeAlias).get(0);
if (pathEdgeSet.contains(edge)) {
PathDuplicateKey key = new PathDuplicateKey(kgGraph);
if (pathEdgeKeySet.contains(key)) {
continue;
}
pathEdgeSet.add(edge);
pathEdgeKeySet.add(key);
result.add(kgGraph);
}
return result;
}
private class PathDuplicateKey {
private final List<IEdge<IVertexId, IProperty>> edgeList = new ArrayList<>();
public PathDuplicateKey(KgGraph<IVertexId> kgGraph) {
addEdges(kgGraph);
}
public void addEdges(KgGraph<IVertexId> kgGraph) {
this.edgeList.add(kgGraph.getEdge(edgeAlias).get(0));
for (String edgeAlias : edgeAliasList) {
IEdge<IVertexId, IProperty> edge = kgGraph.getEdge(edgeAlias).get(0);
this.edgeList.add(edge);
}
}
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
PathDuplicateKey that = (PathDuplicateKey) o;
return Arrays.equals(this.edgeList.toArray(), that.edgeList.toArray());
}
@Override
public int hashCode() {
return Arrays.hashCode(this.edgeList.toArray());
}
}
}

View File

@ -46,6 +46,7 @@ public class KgGraphSimpleSplitTest {
Pattern schema = pattern1();
KgGraph<IVertexId> kgGraph = kgGraph1();
// B,C,D size is 1, but its adjacent edge is more than 1
Set<String> splitVertexAliasSet = Sets.newHashSet("B", "C", "D");
List<KgGraph<IVertexId>> rst1 =
kgGraph.split(
@ -54,9 +55,9 @@ public class KgGraphSimpleSplitTest {
new KgGraphSplitStaticParameters(splitVertexAliasSet, schema),
null,
null);
Assert.assertEquals(1, rst1.size());
Assert.assertEquals(rst1.get(0), kgGraph);
Assert.assertEquals(2, rst1.size());
// D size is 1 and its adjacent edge is 1
Set<String> splitVertexAliasSet2 = Sets.newHashSet("D");
List<KgGraph<IVertexId>> rst2 =
kgGraph.split(
@ -207,11 +208,11 @@ public class KgGraphSimpleSplitTest {
new KgGraphSplitStaticParameters(splitVertexAliasSet3, schema),
null,
null);
Assert.assertEquals(2, rst3.size());
Assert.assertEquals(4, rst3.size());
rst3.stream()
.forEach(
iVertexIdKgGraph -> {
Assert.assertEquals(2, iVertexIdKgGraph.getVertex("A").size());
Assert.assertEquals(1, iVertexIdKgGraph.getVertex("A").size());
Assert.assertEquals(1, iVertexIdKgGraph.getVertex("B").size());
Assert.assertEquals(1, iVertexIdKgGraph.getVertex("C").size());
Assert.assertEquals(1, iVertexIdKgGraph.getVertex("D").size());

View File

@ -98,7 +98,8 @@ public class KgGraphTest {
new KgGraphSplitStaticParameters(splitVertexAliasSet2, schemaB),
null,
null);
Assert.assertTrue(splitRst.size() == kgGraph.getAlias2VertexMap().get("B").size());
// B size is 1, but its adjacent edge size is more than 1
Assert.assertTrue(splitRst.size() == 2);
splitRst.stream()
.map(kgGraph -> ((KgGraphImpl) kgGraph))
.forEach(kgGraph -> Assert.assertTrue(kgGraph.getAlias2VertexMap().get("B").size() == 1));