test(GraphService): Thorough graph service tests (#3011)

This commit is contained in:
Enrico Minack 2021-09-17 22:54:24 +02:00 committed by GitHub
parent a2b3890fa1
commit c0aa3ecb4b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 1701 additions and 114 deletions

View File

@ -9,8 +9,58 @@ import javax.annotation.Nullable;
public interface GraphService {
/**
* Adds an edge to the graph. This creates the source and destination nodes, if they do not exist.
*/
void addEdge(final Edge edge);
/**
* Find related entities (nodes) connected to a source entity via edges of given relationship types. Related entities
* can be filtered by source and destination type (use `null` for any type), by source and destination entity filter
* and relationship filter. Pagination of the result is controlled via `offset` and `count`.
*
* Starting from a node as the source entity, determined by `sourceType` and `sourceEntityFilter`,
* related entities are found along the direction of edges (`RelationshipDirection.OUTGOING`) or in opposite
* direction of edges (`RelationshipDirection.INCOMING`). The destination entities are further filtered by `destinationType`
* and `destinationEntityFilter`, and then returned as related entities.
*
* This does not return duplicate related entities, even if entities are connected to source entities via multiple edges.
* An empty list of relationship types returns an empty result.
*
* In other words, the source and destination entity is not to be understood as the source and destination of the edge,
* but as the source and destination of "finding related entities", where always the destination entities are returned.
* This understanding is important when it comes to `RelationshipDirection.INCOMING`. The origin of the edge becomes
* the destination entity and the source entity is where the edge points to.
*
* Example I:
* dataset one --DownstreamOf-> dataset two --DownstreamOf-> dataset three
*
* findRelatedEntities(null, EMPTY_FILTER, null, EMPTY_FILTER, ["DownstreamOf"], RelationshipFilter.setDirection(RelationshipDirection.OUTGOING), 0, 100)
* - RelatedEntity("DownstreamOf", "dataset two")
* - RelatedEntity("DownstreamOf", "dataset three")
*
* findRelatedEntities(null, EMPTY_FILTER, null, EMPTY_FILTER, ["DownstreamOf"], RelationshipFilter.setDirection(RelationshipDirection.INCOMING), 0, 100)
* - RelatedEntity("DownstreamOf", "dataset one")
* - RelatedEntity("DownstreamOf", "dataset two")
*
* Example II:
* dataset one --HasOwner-> user one
*
* findRelatedEntities(null, EMPTY_FILTER, null, EMPTY_FILTER, ["HasOwner"], RelationshipFilter.setDirection(RelationshipDirection.OUTGOING), 0, 100)
* - RelatedEntity("HasOwner", "user one")
*
* findRelatedEntities(null, EMPTY_FILTER, null, EMPTY_FILTER, ["HasOwner"], RelationshipFilter.setDirection(RelationshipDirection.INCOMING), 0, 100)
* - RelatedEntity("HasOwner", "dataset one")
*
* Calling this method with {@link com.linkedin.metadata.query.RelationshipDirection} `UNDIRECTED` in `relationshipFilter`
* is equivalent to the union of `OUTGOING` and `INCOMING` (without duplicates).
*
* Example III:
* findRelatedEntities(null, EMPTY_FILTER, null, EMPTY_FILTER, ["DownstreamOf"], RelationshipFilter.setDirection(RelationshipDirection.UNDIRECTED), 0, 100)
* - RelatedEntity("DownstreamOf", "dataset one")
* - RelatedEntity("DownstreamOf", "dataset two")
* - RelatedEntity("DownstreamOf", "dataset three")
*/
@Nonnull
RelatedEntitiesResult findRelatedEntities(
@Nullable final String sourceType,
@ -22,8 +72,19 @@ public interface GraphService {
final int offset,
final int count);
/**
* Removes the given node (if it exists) as well as all edges (incoming and outgoing) of the node.
*/
void removeNode(@Nonnull final Urn urn);
/**
* Removes edges of the given relationship types from the given node after applying the relationship filter.
*
* An empty list of relationship types removes nothing from the node.
*
* Calling this method with a {@link com.linkedin.metadata.query.RelationshipDirection} `UNDIRECTED` in `relationshipFilter`
* is equivalent to the union of `OUTGOING` and `INCOMING` (without duplicates).
*/
void removeEdgesFromNode(
@Nonnull final Urn urn,
@Nonnull final List<String> relationshipTypes,
@ -31,5 +92,8 @@ public interface GraphService {
void configure();
/**
* Removes all edges and nodes from the graph.
*/
void clear();
}

View File

@ -114,7 +114,7 @@ public class Neo4jGraphService implements GraphService {
matchTemplate = "MATCH (src%s %s)-[r%s %s]->(dest%s %s)";
}
final String returnNodes = "RETURN dest, type(r)"; // Return both related entity and the relationship type.
final String returnNodes = String.format("RETURN dest%s, type(r)", destinationType); // Return both related entity and the relationship type.
final String returnCount = "RETURN count(*)"; // For getting the total results.
String relationshipTypeFilter = "";

View File

@ -1,9 +1,13 @@
package com.linkedin.metadata.graph;
import com.linkedin.common.urn.Urn;
import com.linkedin.metadata.ElasticSearchTestUtils;
import com.linkedin.metadata.graph.elastic.ESGraphQueryDAO;
import com.linkedin.metadata.graph.elastic.ESGraphWriteDAO;
import com.linkedin.metadata.graph.elastic.ElasticSearchGraphService;
import com.linkedin.metadata.query.Filter;
import com.linkedin.metadata.query.RelationshipDirection;
import com.linkedin.metadata.query.RelationshipFilter;
import com.linkedin.metadata.utils.elasticsearch.IndexConvention;
import com.linkedin.metadata.utils.elasticsearch.IndexConventionImpl;
import org.apache.http.HttpHost;
@ -12,11 +16,18 @@ import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestClientBuilder;
import org.elasticsearch.client.RestHighLevelClient;
import org.testcontainers.elasticsearch.ElasticsearchContainer;
import org.testng.SkipException;
import org.testng.annotations.AfterTest;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.BeforeTest;
import org.testng.annotations.Test;
import javax.annotation.Nonnull;
import java.util.Comparator;
import java.util.HashSet;
import java.util.List;
import static org.testng.Assert.assertEquals;
import static com.linkedin.metadata.graph.elastic.ElasticSearchGraphService.INDEX_NAME;
@ -31,12 +42,6 @@ public class ElasticSearchGraphServiceTest extends GraphServiceTestBase {
private static final String IMAGE_NAME = "docker.elastic.co/elasticsearch/elasticsearch:7.9.3";
private static final int HTTP_PORT = 9200;
@BeforeMethod
public void wipe() throws Exception {
_client.clear();
syncAfterWrite();
}
@BeforeTest
public void setup() {
_elasticsearchContainer = new ElasticsearchContainer(IMAGE_NAME);
@ -46,6 +51,12 @@ public class ElasticSearchGraphServiceTest extends GraphServiceTestBase {
_client.configure();
}
@BeforeMethod
public void wipe() throws Exception {
_client.clear();
syncAfterWrite();
}
@Nonnull
private RestHighLevelClient buildRestClient() {
final RestClientBuilder builder =
@ -81,4 +92,134 @@ public class ElasticSearchGraphServiceTest extends GraphServiceTestBase {
ElasticSearchTestUtils.syncAfterWrite(_searchClient, _indexName);
}
@Override
protected void assertEqualsAnyOrder(RelatedEntitiesResult actual, RelatedEntitiesResult expected) {
// https://github.com/linkedin/datahub/issues/3115
// ElasticSearchGraphService produces duplicates, which is here ignored until fixed
// actual.count and actual.total not tested due to duplicates
assertEquals(actual.start, expected.start);
assertEqualsAnyOrder(actual.entities, expected.entities, RELATED_ENTITY_COMPARATOR);
}
@Override
protected <T> void assertEqualsAnyOrder(List<T> actual, List<T> expected, Comparator<T> comparator) {
// https://github.com/linkedin/datahub/issues/3115
// ElasticSearchGraphService produces duplicates, which is here ignored until fixed
assertEquals(
new HashSet<>(actual),
new HashSet<>(expected)
);
}
@Override
public void testFindRelatedEntitiesSourceEntityFilter(Filter sourceEntityFilter,
List<String> relationshipTypes,
RelationshipFilter relationships,
List<RelatedEntity> expectedRelatedEntities) throws Exception {
if (relationships.getDirection() == RelationshipDirection.UNDIRECTED) {
// https://github.com/linkedin/datahub/issues/3114
throw new SkipException("ElasticSearchGraphService does not implement UNDIRECTED relationship filter");
}
super.testFindRelatedEntitiesSourceEntityFilter(sourceEntityFilter, relationshipTypes, relationships, expectedRelatedEntities);
}
@Override
public void testFindRelatedEntitiesDestinationEntityFilter(Filter destinationEntityFilter,
List<String> relationshipTypes,
RelationshipFilter relationships,
List<RelatedEntity> expectedRelatedEntities) throws Exception {
if (relationships.getDirection() == RelationshipDirection.UNDIRECTED) {
// https://github.com/linkedin/datahub/issues/3114
throw new SkipException("ElasticSearchGraphService does not implement UNDIRECTED relationship filter");
}
super.testFindRelatedEntitiesDestinationEntityFilter(destinationEntityFilter, relationshipTypes, relationships, expectedRelatedEntities);
}
@Override
public void testFindRelatedEntitiesSourceType(String datasetType,
List<String> relationshipTypes,
RelationshipFilter relationships,
List<RelatedEntity> expectedRelatedEntities) throws Exception {
if (relationships.getDirection() == RelationshipDirection.UNDIRECTED) {
// https://github.com/linkedin/datahub/issues/3114
throw new SkipException("ElasticSearchGraphService does not implement UNDIRECTED relationship filter");
}
if (datasetType != null && datasetType.isEmpty()) {
// https://github.com/linkedin/datahub/issues/3116
throw new SkipException("ElasticSearchGraphService does not support empty source type");
}
super.testFindRelatedEntitiesSourceType(datasetType, relationshipTypes, relationships, expectedRelatedEntities);
}
@Override
public void testFindRelatedEntitiesDestinationType(String datasetType,
List<String> relationshipTypes,
RelationshipFilter relationships,
List<RelatedEntity> expectedRelatedEntities) throws Exception {
if (relationships.getDirection() == RelationshipDirection.UNDIRECTED) {
// https://github.com/linkedin/datahub/issues/3114
throw new SkipException("ElasticSearchGraphService does not implement UNDIRECTED relationship filter");
}
if (datasetType != null && datasetType.isEmpty()) {
// https://github.com/linkedin/datahub/issues/3116
throw new SkipException("ElasticSearchGraphService does not support empty destination type");
}
super.testFindRelatedEntitiesDestinationType(datasetType, relationshipTypes, relationships, expectedRelatedEntities);
}
@Test
@Override
public void testFindRelatedEntitiesNoRelationshipTypes() {
// https://github.com/linkedin/datahub/issues/3117
throw new SkipException("ElasticSearchGraphService does not support empty list of relationship types");
}
@Override
public void testRemoveEdgesFromNode(@Nonnull Urn nodeToRemoveFrom,
@Nonnull List<String> relationTypes,
@Nonnull RelationshipFilter relationshipFilter,
List<RelatedEntity> expectedOutgoingRelatedUrnsBeforeRemove,
List<RelatedEntity> expectedIncomingRelatedUrnsBeforeRemove,
List<RelatedEntity> expectedOutgoingRelatedUrnsAfterRemove,
List<RelatedEntity> expectedIncomingRelatedUrnsAfterRemove) throws Exception {
if (relationshipFilter.getDirection() == RelationshipDirection.UNDIRECTED) {
// https://github.com/linkedin/datahub/issues/3114
throw new SkipException("ElasticSearchGraphService does not implement UNDIRECTED relationship filter");
}
super.testRemoveEdgesFromNode(
nodeToRemoveFrom,
relationTypes, relationshipFilter,
expectedOutgoingRelatedUrnsBeforeRemove, expectedIncomingRelatedUrnsBeforeRemove,
expectedOutgoingRelatedUrnsAfterRemove, expectedIncomingRelatedUrnsAfterRemove
);
}
@Test
@Override
public void testRemoveEdgesFromNodeNoRelationshipTypes() {
// https://github.com/linkedin/datahub/issues/3117
throw new SkipException("ElasticSearchGraphService does not support empty list of relationship types");
}
@Test
@Override
public void testConcurrentAddEdge() {
// https://github.com/linkedin/datahub/issues/3124
throw new SkipException("This test is flaky for ElasticSearchGraphService, ~5% of the runs fail on a race condition");
}
@Test
@Override
public void testConcurrentRemoveEdgesFromNode() {
// https://github.com/linkedin/datahub/issues/3118
throw new SkipException("ElasticSearchGraphService produces duplicates");
}
@Test
@Override
public void testConcurrentRemoveNodes() {
// https://github.com/linkedin/datahub/issues/3118
throw new SkipException("ElasticSearchGraphService produces duplicates");
}
}

View File

@ -1,11 +1,19 @@
package com.linkedin.metadata.graph;
import com.linkedin.metadata.query.RelationshipFilter;
import org.neo4j.driver.Driver;
import org.neo4j.driver.GraphDatabase;
import org.testng.SkipException;
import org.testng.annotations.AfterMethod;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.Test;
import javax.annotation.Nonnull;
import java.util.Comparator;
import java.util.HashSet;
import java.util.List;
import static org.testng.Assert.assertEquals;
public class Neo4jGraphServiceTest extends GraphServiceTestBase {
@ -28,11 +36,115 @@ public class Neo4jGraphServiceTest extends GraphServiceTestBase {
}
@Override
protected @Nonnull GraphService getGraphService() {
protected @Nonnull
GraphService getGraphService() {
return _client;
}
@Override
protected void syncAfterWrite() { }
protected void syncAfterWrite() {
}
@Override
protected void assertEqualsAnyOrder(RelatedEntitiesResult actual, RelatedEntitiesResult expected) {
// https://github.com/linkedin/datahub/issues/3118
// Neo4jGraphService produces duplicates, which is here ignored until fixed
// actual.count and actual.total not tested due to duplicates
assertEquals(actual.start, expected.start);
assertEqualsAnyOrder(actual.entities, expected.entities, RELATED_ENTITY_COMPARATOR);
}
@Override
protected <T> void assertEqualsAnyOrder(List<T> actual, List<T> expected, Comparator<T> comparator) {
// https://github.com/linkedin/datahub/issues/3118
// Neo4jGraphService produces duplicates, which is here ignored until fixed
assertEquals(
new HashSet<>(actual),
new HashSet<>(expected)
);
}
@Override
public void testFindRelatedEntitiesSourceType(String datasetType,
List<String> relationshipTypes,
RelationshipFilter relationships,
List<RelatedEntity> expectedRelatedEntities) throws Exception {
if (datasetType != null && datasetType.isEmpty()) {
// https://github.com/linkedin/datahub/issues/3119
throw new SkipException("Neo4jGraphService does not support empty source type");
}
if (datasetType != null && datasetType.equals(GraphServiceTestBase.userType)) {
// https://github.com/linkedin/datahub/issues/3123
// only test cases with "user" type fail due to this bug
throw new SkipException("Neo4jGraphService does not apply source / destination types");
}
super.testFindRelatedEntitiesSourceType(datasetType, relationshipTypes, relationships, expectedRelatedEntities);
}
@Override
public void testFindRelatedEntitiesDestinationType(String datasetType,
List<String> relationshipTypes,
RelationshipFilter relationships,
List<RelatedEntity> expectedRelatedEntities) throws Exception {
if (datasetType != null && datasetType.isEmpty()) {
// https://github.com/linkedin/datahub/issues/3119
throw new SkipException("Neo4jGraphService does not support empty destination type");
}
if (relationshipTypes.contains(hasOwner)) {
// https://github.com/linkedin/datahub/issues/3123
// only test cases with "HasOwner" relatioship fail due to this bug
throw new SkipException("Neo4jGraphService does not apply source / destination types");
}
super.testFindRelatedEntitiesDestinationType(datasetType, relationshipTypes, relationships, expectedRelatedEntities);
}
@Test
@Override
public void testFindRelatedEntitiesNullSourceType() throws Exception {
// https://github.com/linkedin/datahub/issues/3121
throw new SkipException("Neo4jGraphService does not support 'null' entity type string");
}
@Test
@Override
public void testFindRelatedEntitiesNullDestinationType() throws Exception {
// https://github.com/linkedin/datahub/issues/3121
throw new SkipException("Neo4jGraphService does not support 'null' entity type string");
}
@Test
@Override
public void testFindRelatedEntitiesNoRelationshipTypes() {
// https://github.com/linkedin/datahub/issues/3120
throw new SkipException("Neo4jGraphService does not support empty list of relationship types");
}
@Test
@Override
public void testRemoveEdgesFromNodeNoRelationshipTypes() {
// https://github.com/linkedin/datahub/issues/3120
throw new SkipException("Neo4jGraphService does not support empty list of relationship types");
}
@Test
@Override
public void testConcurrentAddEdge() {
// https://github.com/linkedin/datahub/issues/3141
throw new SkipException("Neo4jGraphService does not manage to add all edges added concurrently");
}
@Test
@Override
public void testConcurrentRemoveEdgesFromNode() {
// https://github.com/linkedin/datahub/issues/3118
throw new SkipException("Neo4jGraphService produces duplicates");
}
@Test
@Override
public void testConcurrentRemoveNodes() {
// https://github.com/linkedin/datahub/issues/3118
throw new SkipException("Neo4jGraphService produces duplicates");
}
}