diff --git a/entity-registry/src/main/java/com/linkedin/metadata/models/extractor/AspectExtractor.java b/entity-registry/src/main/java/com/linkedin/metadata/models/extractor/AspectExtractor.java new file mode 100644 index 0000000000..720eb87ec5 --- /dev/null +++ b/entity-registry/src/main/java/com/linkedin/metadata/models/extractor/AspectExtractor.java @@ -0,0 +1,38 @@ +package com.linkedin.metadata.models.extractor; + +import com.datahub.util.ModelUtils; +import com.linkedin.data.schema.RecordDataSchema; +import com.linkedin.data.template.RecordTemplate; +import com.linkedin.metadata.models.FieldSpec; +import com.linkedin.metadata.models.annotation.AspectAnnotation; +import java.util.Map; +import java.util.function.Function; +import java.util.stream.Collectors; +import lombok.extern.slf4j.Slf4j; + + +/** + * Extracts fields from a RecordTemplate based on the appropriate {@link FieldSpec}. + */ +@Slf4j +public class AspectExtractor { + + private AspectExtractor() { + } + + public static Map extractAspectRecords(RecordTemplate snapshot) { + return ModelUtils.getAspectsFromSnapshot(snapshot) + .stream() + .collect(Collectors.toMap(record -> getAspectNameFromSchema(record.schema()), Function.identity())); + } + + private static String getAspectNameFromSchema(final RecordDataSchema aspectSchema) { + final Object aspectAnnotationObj = aspectSchema.getProperties().get(AspectAnnotation.ANNOTATION_NAME); + if (aspectAnnotationObj != null) { + return AspectAnnotation.fromSchemaProperty(aspectAnnotationObj, aspectSchema.getFullName()).getName(); + } + log.error(String.format("Failed to extract aspect name from provided schema %s", aspectSchema.getName())); + throw new IllegalArgumentException( + String.format("Failed to extract aspect name from provided schema %s", aspectSchema.getName())); + } +} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/extractor/FieldExtractor.java b/entity-registry/src/main/java/com/linkedin/metadata/models/extractor/FieldExtractor.java similarity index 98% rename from metadata-io/src/main/java/com/linkedin/metadata/extractor/FieldExtractor.java rename to entity-registry/src/main/java/com/linkedin/metadata/models/extractor/FieldExtractor.java index abe00fcc58..bac2ec5698 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/extractor/FieldExtractor.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/models/extractor/FieldExtractor.java @@ -1,4 +1,4 @@ -package com.linkedin.metadata.extractor; +package com.linkedin.metadata.models.extractor; import com.linkedin.data.schema.PathSpec; import com.linkedin.data.template.RecordTemplate; diff --git a/metadata-io/src/main/java/com/linkedin/metadata/extractor/AspectExtractor.java b/metadata-io/src/main/java/com/linkedin/metadata/extractor/AspectExtractor.java deleted file mode 100644 index 5719c90c59..0000000000 --- a/metadata-io/src/main/java/com/linkedin/metadata/extractor/AspectExtractor.java +++ /dev/null @@ -1,58 +0,0 @@ -package com.linkedin.metadata.extractor; - -import com.datahub.util.ModelUtils; -import com.linkedin.data.element.DataElement; -import com.linkedin.data.it.IterationOrder; -import com.linkedin.data.it.ObjectIterator; -import com.linkedin.data.schema.PathSpec; -import com.linkedin.data.template.RecordTemplate; -import com.linkedin.metadata.models.FieldSpec; -import com.linkedin.metadata.utils.PegasusUtils; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.function.Function; -import java.util.stream.Collectors; - - -/** - * Extracts fields from a RecordTemplate based on the appropriate {@link FieldSpec}. - */ -public class AspectExtractor { - public static final String ASPECT_FIELD = "aspects"; - - private AspectExtractor() { - } - - /** - * Function to extract the fields that match the input fieldSpecs - */ - public static Map extractAspects(RecordTemplate snapshot) { - - final ObjectIterator iterator = new ObjectIterator(snapshot.data(), snapshot.schema(), IterationOrder.PRE_ORDER); - final Map aspectsByName = new HashMap<>(); - - for (DataElement dataElement = iterator.next(); dataElement != null; dataElement = iterator.next()) { - if (dataElement.getSchemaPathSpec() == null) { - continue; - } - final PathSpec pathSpec = dataElement.getSchemaPathSpec(); - List pathComponents = pathSpec.getPathComponents(); - // three components representing /aspect/*/ - if (pathComponents.size() != 3) { - continue; - } - String aspectName = PegasusUtils.getAspectNameFromFullyQualifiedName(pathComponents.get(2)); - aspectsByName.put(aspectName, dataElement); - } - - return aspectsByName; - } - - public static Map extractAspectRecords(RecordTemplate snapshot) { - return ModelUtils.getAspectsFromSnapshot(snapshot) - .stream() - .collect( - Collectors.toMap(record -> PegasusUtils.getAspectNameFromSchema(record.schema()), Function.identity())); - } -} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformer.java b/metadata-io/src/main/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformer.java index a61d5622e3..966556bdf1 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformer.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformer.java @@ -7,7 +7,7 @@ import com.fasterxml.jackson.databind.node.ObjectNode; import com.linkedin.common.urn.Urn; import com.linkedin.data.schema.DataSchema; import com.linkedin.data.template.RecordTemplate; -import com.linkedin.metadata.extractor.FieldExtractor; +import com.linkedin.metadata.models.extractor.FieldExtractor; import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.models.SearchableFieldSpec; diff --git a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/transformer/TimeseriesAspectTransformer.java b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/transformer/TimeseriesAspectTransformer.java index b64a0883e6..723be5e0b9 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/transformer/TimeseriesAspectTransformer.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/transformer/TimeseriesAspectTransformer.java @@ -12,7 +12,7 @@ import com.linkedin.data.DataMap; import com.linkedin.data.schema.ArrayDataSchema; import com.linkedin.data.schema.DataSchema; import com.linkedin.data.template.RecordTemplate; -import com.linkedin.metadata.extractor.FieldExtractor; +import com.linkedin.metadata.models.extractor.FieldExtractor; import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.models.TimeseriesFieldCollectionSpec; import com.linkedin.metadata.models.TimeseriesFieldSpec; diff --git a/metadata-io/src/test/java/com/linkedin/metadata/extractor/AspectExtractorTest.java b/metadata-io/src/test/java/com/linkedin/metadata/extractor/AspectExtractorTest.java index 0153a4abb4..c7ab24e87a 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/extractor/AspectExtractorTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/extractor/AspectExtractorTest.java @@ -5,38 +5,28 @@ import com.datahub.test.TestEntityAspectArray; import com.datahub.test.TestEntityInfo; import com.datahub.test.TestEntityKey; import com.datahub.test.TestEntitySnapshot; -import com.google.common.collect.ImmutableMap; import com.linkedin.common.urn.Urn; -import com.linkedin.data.element.DataElement; import com.linkedin.data.template.RecordTemplate; import com.linkedin.metadata.TestEntityUtil; +import com.linkedin.metadata.models.extractor.AspectExtractor; import java.util.Map; import org.testng.annotations.Test; import static org.testng.AssertJUnit.assertEquals; -import static org.testng.AssertJUnit.assertTrue; public class AspectExtractorTest { @Test public void testExtractor() { TestEntitySnapshot snapshot = new TestEntitySnapshot(); - // Empty snapshot should return empty map - assertEquals(AspectExtractor.extractAspects(new TestEntitySnapshot()), ImmutableMap.of()); - Urn urn = TestEntityUtil.getTestEntityUrn(); TestEntityKey testEntityKey = TestEntityUtil.getTestEntityKey(urn); TestEntityInfo testEntityInfo = TestEntityUtil.getTestEntityInfo(urn); snapshot.setAspects( new TestEntityAspectArray(TestEntityAspect.create(testEntityKey), TestEntityAspect.create(testEntityInfo))); - Map result = AspectExtractor.extractAspects(snapshot); + Map result = AspectExtractor.extractAspectRecords(snapshot); assertEquals(result.size(), 2); - assertTrue(result.containsKey("testEntityKey")); - assertTrue(result.containsKey("testEntityInfo")); - - Map result2 = AspectExtractor.extractAspectRecords(snapshot); - assertEquals(result2.size(), 2); - assertEquals(result2.get("testEntityKey"), testEntityKey); - assertEquals(result2.get("testEntityInfo"), testEntityInfo); + assertEquals(result.get("testEntityKey"), testEntityKey); + assertEquals(result.get("testEntityInfo"), testEntityInfo); } } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/extractor/FieldExtractorTest.java b/metadata-io/src/test/java/com/linkedin/metadata/extractor/FieldExtractorTest.java index 33ef9db172..47089f1e2f 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/extractor/FieldExtractorTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/extractor/FieldExtractorTest.java @@ -7,6 +7,7 @@ import com.linkedin.metadata.TestEntityUtil; import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.models.SearchableFieldSpec; +import com.linkedin.metadata.models.extractor.FieldExtractor; import java.util.List; import java.util.Map; import java.util.function.Function; diff --git a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/UpdateIndicesHook.java b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/UpdateIndicesHook.java index 4da233610f..daf483b3d6 100644 --- a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/UpdateIndicesHook.java +++ b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/UpdateIndicesHook.java @@ -11,7 +11,7 @@ import com.linkedin.gms.factory.entityregistry.EntityRegistryFactory; import com.linkedin.gms.factory.search.EntitySearchServiceFactory; import com.linkedin.gms.factory.search.SearchDocumentTransformerFactory; import com.linkedin.gms.factory.timeseries.TimeseriesAspectServiceFactory; -import com.linkedin.metadata.extractor.FieldExtractor; +import com.linkedin.metadata.models.extractor.FieldExtractor; import com.linkedin.metadata.graph.Edge; import com.linkedin.metadata.graph.GraphService; import com.linkedin.metadata.models.AspectSpec;