refactor(extractor): Move extractors to entity-registry (#4307)

* Move extractors to entity-registry

* Fix test

* Fix checkstyle
This commit is contained in:
Dexter Lee 2022-03-03 16:57:32 -08:00 committed by GitHub
parent d96241c4e1
commit daab31d6ef
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 47 additions and 76 deletions

View File

@ -0,0 +1,38 @@
package com.linkedin.metadata.models.extractor;
import com.datahub.util.ModelUtils;
import com.linkedin.data.schema.RecordDataSchema;
import com.linkedin.data.template.RecordTemplate;
import com.linkedin.metadata.models.FieldSpec;
import com.linkedin.metadata.models.annotation.AspectAnnotation;
import java.util.Map;
import java.util.function.Function;
import java.util.stream.Collectors;
import lombok.extern.slf4j.Slf4j;
/**
* Extracts fields from a RecordTemplate based on the appropriate {@link FieldSpec}.
*/
@Slf4j
public class AspectExtractor {
private AspectExtractor() {
}
public static Map<String, RecordTemplate> extractAspectRecords(RecordTemplate snapshot) {
return ModelUtils.getAspectsFromSnapshot(snapshot)
.stream()
.collect(Collectors.toMap(record -> getAspectNameFromSchema(record.schema()), Function.identity()));
}
private static String getAspectNameFromSchema(final RecordDataSchema aspectSchema) {
final Object aspectAnnotationObj = aspectSchema.getProperties().get(AspectAnnotation.ANNOTATION_NAME);
if (aspectAnnotationObj != null) {
return AspectAnnotation.fromSchemaProperty(aspectAnnotationObj, aspectSchema.getFullName()).getName();
}
log.error(String.format("Failed to extract aspect name from provided schema %s", aspectSchema.getName()));
throw new IllegalArgumentException(
String.format("Failed to extract aspect name from provided schema %s", aspectSchema.getName()));
}
}

View File

@ -1,4 +1,4 @@
package com.linkedin.metadata.extractor;
package com.linkedin.metadata.models.extractor;
import com.linkedin.data.schema.PathSpec;
import com.linkedin.data.template.RecordTemplate;

View File

@ -1,58 +0,0 @@
package com.linkedin.metadata.extractor;
import com.datahub.util.ModelUtils;
import com.linkedin.data.element.DataElement;
import com.linkedin.data.it.IterationOrder;
import com.linkedin.data.it.ObjectIterator;
import com.linkedin.data.schema.PathSpec;
import com.linkedin.data.template.RecordTemplate;
import com.linkedin.metadata.models.FieldSpec;
import com.linkedin.metadata.utils.PegasusUtils;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.function.Function;
import java.util.stream.Collectors;
/**
* Extracts fields from a RecordTemplate based on the appropriate {@link FieldSpec}.
*/
public class AspectExtractor {
public static final String ASPECT_FIELD = "aspects";
private AspectExtractor() {
}
/**
* Function to extract the fields that match the input fieldSpecs
*/
public static Map<String, DataElement> extractAspects(RecordTemplate snapshot) {
final ObjectIterator iterator = new ObjectIterator(snapshot.data(), snapshot.schema(), IterationOrder.PRE_ORDER);
final Map<String, DataElement> aspectsByName = new HashMap<>();
for (DataElement dataElement = iterator.next(); dataElement != null; dataElement = iterator.next()) {
if (dataElement.getSchemaPathSpec() == null) {
continue;
}
final PathSpec pathSpec = dataElement.getSchemaPathSpec();
List<String> pathComponents = pathSpec.getPathComponents();
// three components representing /aspect/*/<aspectClassName>
if (pathComponents.size() != 3) {
continue;
}
String aspectName = PegasusUtils.getAspectNameFromFullyQualifiedName(pathComponents.get(2));
aspectsByName.put(aspectName, dataElement);
}
return aspectsByName;
}
public static Map<String, RecordTemplate> extractAspectRecords(RecordTemplate snapshot) {
return ModelUtils.getAspectsFromSnapshot(snapshot)
.stream()
.collect(
Collectors.toMap(record -> PegasusUtils.getAspectNameFromSchema(record.schema()), Function.identity()));
}
}

View File

@ -7,7 +7,7 @@ import com.fasterxml.jackson.databind.node.ObjectNode;
import com.linkedin.common.urn.Urn;
import com.linkedin.data.schema.DataSchema;
import com.linkedin.data.template.RecordTemplate;
import com.linkedin.metadata.extractor.FieldExtractor;
import com.linkedin.metadata.models.extractor.FieldExtractor;
import com.linkedin.metadata.models.AspectSpec;
import com.linkedin.metadata.models.EntitySpec;
import com.linkedin.metadata.models.SearchableFieldSpec;

View File

@ -12,7 +12,7 @@ import com.linkedin.data.DataMap;
import com.linkedin.data.schema.ArrayDataSchema;
import com.linkedin.data.schema.DataSchema;
import com.linkedin.data.template.RecordTemplate;
import com.linkedin.metadata.extractor.FieldExtractor;
import com.linkedin.metadata.models.extractor.FieldExtractor;
import com.linkedin.metadata.models.AspectSpec;
import com.linkedin.metadata.models.TimeseriesFieldCollectionSpec;
import com.linkedin.metadata.models.TimeseriesFieldSpec;

View File

@ -5,38 +5,28 @@ import com.datahub.test.TestEntityAspectArray;
import com.datahub.test.TestEntityInfo;
import com.datahub.test.TestEntityKey;
import com.datahub.test.TestEntitySnapshot;
import com.google.common.collect.ImmutableMap;
import com.linkedin.common.urn.Urn;
import com.linkedin.data.element.DataElement;
import com.linkedin.data.template.RecordTemplate;
import com.linkedin.metadata.TestEntityUtil;
import com.linkedin.metadata.models.extractor.AspectExtractor;
import java.util.Map;
import org.testng.annotations.Test;
import static org.testng.AssertJUnit.assertEquals;
import static org.testng.AssertJUnit.assertTrue;
public class AspectExtractorTest {
@Test
public void testExtractor() {
TestEntitySnapshot snapshot = new TestEntitySnapshot();
// Empty snapshot should return empty map
assertEquals(AspectExtractor.extractAspects(new TestEntitySnapshot()), ImmutableMap.of());
Urn urn = TestEntityUtil.getTestEntityUrn();
TestEntityKey testEntityKey = TestEntityUtil.getTestEntityKey(urn);
TestEntityInfo testEntityInfo = TestEntityUtil.getTestEntityInfo(urn);
snapshot.setAspects(
new TestEntityAspectArray(TestEntityAspect.create(testEntityKey), TestEntityAspect.create(testEntityInfo)));
Map<String, DataElement> result = AspectExtractor.extractAspects(snapshot);
Map<String, RecordTemplate> result = AspectExtractor.extractAspectRecords(snapshot);
assertEquals(result.size(), 2);
assertTrue(result.containsKey("testEntityKey"));
assertTrue(result.containsKey("testEntityInfo"));
Map<String, RecordTemplate> result2 = AspectExtractor.extractAspectRecords(snapshot);
assertEquals(result2.size(), 2);
assertEquals(result2.get("testEntityKey"), testEntityKey);
assertEquals(result2.get("testEntityInfo"), testEntityInfo);
assertEquals(result.get("testEntityKey"), testEntityKey);
assertEquals(result.get("testEntityInfo"), testEntityInfo);
}
}

View File

@ -7,6 +7,7 @@ import com.linkedin.metadata.TestEntityUtil;
import com.linkedin.metadata.models.AspectSpec;
import com.linkedin.metadata.models.EntitySpec;
import com.linkedin.metadata.models.SearchableFieldSpec;
import com.linkedin.metadata.models.extractor.FieldExtractor;
import java.util.List;
import java.util.Map;
import java.util.function.Function;

View File

@ -11,7 +11,7 @@ import com.linkedin.gms.factory.entityregistry.EntityRegistryFactory;
import com.linkedin.gms.factory.search.EntitySearchServiceFactory;
import com.linkedin.gms.factory.search.SearchDocumentTransformerFactory;
import com.linkedin.gms.factory.timeseries.TimeseriesAspectServiceFactory;
import com.linkedin.metadata.extractor.FieldExtractor;
import com.linkedin.metadata.models.extractor.FieldExtractor;
import com.linkedin.metadata.graph.Edge;
import com.linkedin.metadata.graph.GraphService;
import com.linkedin.metadata.models.AspectSpec;