mirror of
https://github.com/datahub-project/datahub.git
synced 2025-08-17 13:45:54 +00:00
refactor(extractor): Move extractors to entity-registry (#4307)
* Move extractors to entity-registry * Fix test * Fix checkstyle
This commit is contained in:
parent
d96241c4e1
commit
daab31d6ef
@ -0,0 +1,38 @@
|
||||
package com.linkedin.metadata.models.extractor;
|
||||
|
||||
import com.datahub.util.ModelUtils;
|
||||
import com.linkedin.data.schema.RecordDataSchema;
|
||||
import com.linkedin.data.template.RecordTemplate;
|
||||
import com.linkedin.metadata.models.FieldSpec;
|
||||
import com.linkedin.metadata.models.annotation.AspectAnnotation;
|
||||
import java.util.Map;
|
||||
import java.util.function.Function;
|
||||
import java.util.stream.Collectors;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
|
||||
/**
|
||||
* Extracts fields from a RecordTemplate based on the appropriate {@link FieldSpec}.
|
||||
*/
|
||||
@Slf4j
|
||||
public class AspectExtractor {
|
||||
|
||||
private AspectExtractor() {
|
||||
}
|
||||
|
||||
public static Map<String, RecordTemplate> extractAspectRecords(RecordTemplate snapshot) {
|
||||
return ModelUtils.getAspectsFromSnapshot(snapshot)
|
||||
.stream()
|
||||
.collect(Collectors.toMap(record -> getAspectNameFromSchema(record.schema()), Function.identity()));
|
||||
}
|
||||
|
||||
private static String getAspectNameFromSchema(final RecordDataSchema aspectSchema) {
|
||||
final Object aspectAnnotationObj = aspectSchema.getProperties().get(AspectAnnotation.ANNOTATION_NAME);
|
||||
if (aspectAnnotationObj != null) {
|
||||
return AspectAnnotation.fromSchemaProperty(aspectAnnotationObj, aspectSchema.getFullName()).getName();
|
||||
}
|
||||
log.error(String.format("Failed to extract aspect name from provided schema %s", aspectSchema.getName()));
|
||||
throw new IllegalArgumentException(
|
||||
String.format("Failed to extract aspect name from provided schema %s", aspectSchema.getName()));
|
||||
}
|
||||
}
|
@ -1,4 +1,4 @@
|
||||
package com.linkedin.metadata.extractor;
|
||||
package com.linkedin.metadata.models.extractor;
|
||||
|
||||
import com.linkedin.data.schema.PathSpec;
|
||||
import com.linkedin.data.template.RecordTemplate;
|
@ -1,58 +0,0 @@
|
||||
package com.linkedin.metadata.extractor;
|
||||
|
||||
import com.datahub.util.ModelUtils;
|
||||
import com.linkedin.data.element.DataElement;
|
||||
import com.linkedin.data.it.IterationOrder;
|
||||
import com.linkedin.data.it.ObjectIterator;
|
||||
import com.linkedin.data.schema.PathSpec;
|
||||
import com.linkedin.data.template.RecordTemplate;
|
||||
import com.linkedin.metadata.models.FieldSpec;
|
||||
import com.linkedin.metadata.utils.PegasusUtils;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.function.Function;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
|
||||
/**
|
||||
* Extracts fields from a RecordTemplate based on the appropriate {@link FieldSpec}.
|
||||
*/
|
||||
public class AspectExtractor {
|
||||
public static final String ASPECT_FIELD = "aspects";
|
||||
|
||||
private AspectExtractor() {
|
||||
}
|
||||
|
||||
/**
|
||||
* Function to extract the fields that match the input fieldSpecs
|
||||
*/
|
||||
public static Map<String, DataElement> extractAspects(RecordTemplate snapshot) {
|
||||
|
||||
final ObjectIterator iterator = new ObjectIterator(snapshot.data(), snapshot.schema(), IterationOrder.PRE_ORDER);
|
||||
final Map<String, DataElement> aspectsByName = new HashMap<>();
|
||||
|
||||
for (DataElement dataElement = iterator.next(); dataElement != null; dataElement = iterator.next()) {
|
||||
if (dataElement.getSchemaPathSpec() == null) {
|
||||
continue;
|
||||
}
|
||||
final PathSpec pathSpec = dataElement.getSchemaPathSpec();
|
||||
List<String> pathComponents = pathSpec.getPathComponents();
|
||||
// three components representing /aspect/*/<aspectClassName>
|
||||
if (pathComponents.size() != 3) {
|
||||
continue;
|
||||
}
|
||||
String aspectName = PegasusUtils.getAspectNameFromFullyQualifiedName(pathComponents.get(2));
|
||||
aspectsByName.put(aspectName, dataElement);
|
||||
}
|
||||
|
||||
return aspectsByName;
|
||||
}
|
||||
|
||||
public static Map<String, RecordTemplate> extractAspectRecords(RecordTemplate snapshot) {
|
||||
return ModelUtils.getAspectsFromSnapshot(snapshot)
|
||||
.stream()
|
||||
.collect(
|
||||
Collectors.toMap(record -> PegasusUtils.getAspectNameFromSchema(record.schema()), Function.identity()));
|
||||
}
|
||||
}
|
@ -7,7 +7,7 @@ import com.fasterxml.jackson.databind.node.ObjectNode;
|
||||
import com.linkedin.common.urn.Urn;
|
||||
import com.linkedin.data.schema.DataSchema;
|
||||
import com.linkedin.data.template.RecordTemplate;
|
||||
import com.linkedin.metadata.extractor.FieldExtractor;
|
||||
import com.linkedin.metadata.models.extractor.FieldExtractor;
|
||||
import com.linkedin.metadata.models.AspectSpec;
|
||||
import com.linkedin.metadata.models.EntitySpec;
|
||||
import com.linkedin.metadata.models.SearchableFieldSpec;
|
||||
|
@ -12,7 +12,7 @@ import com.linkedin.data.DataMap;
|
||||
import com.linkedin.data.schema.ArrayDataSchema;
|
||||
import com.linkedin.data.schema.DataSchema;
|
||||
import com.linkedin.data.template.RecordTemplate;
|
||||
import com.linkedin.metadata.extractor.FieldExtractor;
|
||||
import com.linkedin.metadata.models.extractor.FieldExtractor;
|
||||
import com.linkedin.metadata.models.AspectSpec;
|
||||
import com.linkedin.metadata.models.TimeseriesFieldCollectionSpec;
|
||||
import com.linkedin.metadata.models.TimeseriesFieldSpec;
|
||||
|
@ -5,38 +5,28 @@ import com.datahub.test.TestEntityAspectArray;
|
||||
import com.datahub.test.TestEntityInfo;
|
||||
import com.datahub.test.TestEntityKey;
|
||||
import com.datahub.test.TestEntitySnapshot;
|
||||
import com.google.common.collect.ImmutableMap;
|
||||
import com.linkedin.common.urn.Urn;
|
||||
import com.linkedin.data.element.DataElement;
|
||||
import com.linkedin.data.template.RecordTemplate;
|
||||
import com.linkedin.metadata.TestEntityUtil;
|
||||
import com.linkedin.metadata.models.extractor.AspectExtractor;
|
||||
import java.util.Map;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import static org.testng.AssertJUnit.assertEquals;
|
||||
import static org.testng.AssertJUnit.assertTrue;
|
||||
|
||||
|
||||
public class AspectExtractorTest {
|
||||
@Test
|
||||
public void testExtractor() {
|
||||
TestEntitySnapshot snapshot = new TestEntitySnapshot();
|
||||
// Empty snapshot should return empty map
|
||||
assertEquals(AspectExtractor.extractAspects(new TestEntitySnapshot()), ImmutableMap.of());
|
||||
|
||||
Urn urn = TestEntityUtil.getTestEntityUrn();
|
||||
TestEntityKey testEntityKey = TestEntityUtil.getTestEntityKey(urn);
|
||||
TestEntityInfo testEntityInfo = TestEntityUtil.getTestEntityInfo(urn);
|
||||
snapshot.setAspects(
|
||||
new TestEntityAspectArray(TestEntityAspect.create(testEntityKey), TestEntityAspect.create(testEntityInfo)));
|
||||
Map<String, DataElement> result = AspectExtractor.extractAspects(snapshot);
|
||||
Map<String, RecordTemplate> result = AspectExtractor.extractAspectRecords(snapshot);
|
||||
assertEquals(result.size(), 2);
|
||||
assertTrue(result.containsKey("testEntityKey"));
|
||||
assertTrue(result.containsKey("testEntityInfo"));
|
||||
|
||||
Map<String, RecordTemplate> result2 = AspectExtractor.extractAspectRecords(snapshot);
|
||||
assertEquals(result2.size(), 2);
|
||||
assertEquals(result2.get("testEntityKey"), testEntityKey);
|
||||
assertEquals(result2.get("testEntityInfo"), testEntityInfo);
|
||||
assertEquals(result.get("testEntityKey"), testEntityKey);
|
||||
assertEquals(result.get("testEntityInfo"), testEntityInfo);
|
||||
}
|
||||
}
|
||||
|
@ -7,6 +7,7 @@ import com.linkedin.metadata.TestEntityUtil;
|
||||
import com.linkedin.metadata.models.AspectSpec;
|
||||
import com.linkedin.metadata.models.EntitySpec;
|
||||
import com.linkedin.metadata.models.SearchableFieldSpec;
|
||||
import com.linkedin.metadata.models.extractor.FieldExtractor;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.function.Function;
|
||||
|
@ -11,7 +11,7 @@ import com.linkedin.gms.factory.entityregistry.EntityRegistryFactory;
|
||||
import com.linkedin.gms.factory.search.EntitySearchServiceFactory;
|
||||
import com.linkedin.gms.factory.search.SearchDocumentTransformerFactory;
|
||||
import com.linkedin.gms.factory.timeseries.TimeseriesAspectServiceFactory;
|
||||
import com.linkedin.metadata.extractor.FieldExtractor;
|
||||
import com.linkedin.metadata.models.extractor.FieldExtractor;
|
||||
import com.linkedin.metadata.graph.Edge;
|
||||
import com.linkedin.metadata.graph.GraphService;
|
||||
import com.linkedin.metadata.models.AspectSpec;
|
||||
|
Loading…
x
Reference in New Issue
Block a user