From 5fd2110b7f1f5f30b4f8af12421e997a00d37a1a Mon Sep 17 00:00:00 2001 From: John Plaisted Date: Wed, 28 Oct 2020 11:32:18 -0700 Subject: [PATCH] Revert "refactor search index builder to store urn parts efficiently (#1937)" (#1970) This broke MAE processor because not all documents have urns now. This reverts commit 5fca512a07cea75480633188e634ba07f49cdb28. --- .../search/DataProcessIndexBuilder.java | 17 +++++++++------ .../builders/search/DatasetIndexBuilder.java | 21 +++++++++---------- .../search/DataProcessIndexBuilderTest.java | 5 +++-- .../search/DatasetIndexBuilderTest.java | 10 ++++----- 4 files changed, 29 insertions(+), 24 deletions(-) diff --git a/metadata-builders/src/main/java/com/linkedin/metadata/builders/search/DataProcessIndexBuilder.java b/metadata-builders/src/main/java/com/linkedin/metadata/builders/search/DataProcessIndexBuilder.java index b4b0f4f190..a2c33d602a 100644 --- a/metadata-builders/src/main/java/com/linkedin/metadata/builders/search/DataProcessIndexBuilder.java +++ b/metadata-builders/src/main/java/com/linkedin/metadata/builders/search/DataProcessIndexBuilder.java @@ -1,6 +1,7 @@ package com.linkedin.metadata.builders.search; import com.linkedin.common.Ownership; +import com.linkedin.common.Status; import com.linkedin.common.urn.DataProcessUrn; import com.linkedin.data.template.RecordTemplate; import com.linkedin.data.template.StringArray; @@ -41,7 +42,7 @@ public class DataProcessIndexBuilder extends BaseIndexBuilder getDocumentsToUpdateFromSnapshotType(@Nonnull DataProcessSnapshot dataProcessSnapshot) { - final DataProcessUrn urn = dataProcessSnapshot.getUrn(); - final List documents = dataProcessSnapshot.getAspects().stream().map(aspect -> { + DataProcessUrn urn = dataProcessSnapshot.getUrn(); + return dataProcessSnapshot.getAspects().stream().map(aspect -> { if (aspect.isDataProcessInfo()) { return getDocumentToUpdateFromAspect(urn, aspect.getDataProcessInfo()); } else if (aspect.isOwnership()) { @@ -72,8 +79,6 @@ public class DataProcessIndexBuilder extends BaseIndexBuilder { @Nonnull private DatasetDocument getDocumentToUpdateFromAspect(@Nonnull DatasetUrn urn, @Nonnull Ownership ownership) { final StringArray owners = BuilderUtils.getCorpUserOwners(ownership); - return new DatasetDocument() + return setUrnDerivedFields(urn) .setHasOwners(!owners.isEmpty()) .setOwners(owners); } @Nonnull private DatasetDocument getDocumentToUpdateFromAspect(@Nonnull DatasetUrn urn, @Nonnull Status status) { - return new DatasetDocument() + return setUrnDerivedFields(urn) .setRemoved(status.isRemoved()); } @Nonnull private DatasetDocument getDocumentToUpdateFromAspect(@Nonnull DatasetUrn urn, @Nonnull DatasetDeprecation deprecation) { - return new DatasetDocument().setDeprecated(deprecation.isDeprecated()); + return setUrnDerivedFields(urn).setDeprecated(deprecation.isDeprecated()); } @Nonnull private DatasetDocument getDocumentToUpdateFromAspect(@Nonnull DatasetUrn urn, @Nonnull DatasetProperties datasetProperties) { - final DatasetDocument doc = new DatasetDocument(); - if (datasetProperties.getDescription() != null) { + final DatasetDocument doc = setUrnDerivedFields(urn); + if (datasetProperties.hasDescription()) { doc.setDescription(datasetProperties.getDescription()); + } else { + doc.setDescription(""); } return doc; } @Nonnull private DatasetDocument getDocumentToUpdateFromAspect(@Nonnull DatasetUrn urn, @Nonnull SchemaMetadata schemaMetadata) { - return new DatasetDocument() + return setUrnDerivedFields(urn) .setHasSchema(true); } @Nonnull private DatasetDocument getDocumentToUpdateFromAspect(@Nonnull DatasetUrn urn, @Nonnull UpstreamLineage upstreamLineage) { - return new DatasetDocument() + return setUrnDerivedFields(urn) .setUpstreams(new DatasetUrnArray( upstreamLineage.getUpstreams().stream().map(upstream -> upstream.getDataset()).collect(Collectors.toList()) )); @@ -93,7 +95,7 @@ public class DatasetIndexBuilder extends BaseIndexBuilder { @Nonnull private List getDocumentsToUpdateFromSnapshotType(@Nonnull DatasetSnapshot datasetSnapshot) { final DatasetUrn urn = datasetSnapshot.getUrn(); - final List documents = datasetSnapshot.getAspects().stream().map(aspect -> { + return datasetSnapshot.getAspects().stream().map(aspect -> { if (aspect.isDatasetDeprecation()) { return getDocumentToUpdateFromAspect(urn, aspect.getDatasetDeprecation()); } else if (aspect.isDatasetProperties()) { @@ -109,8 +111,6 @@ public class DatasetIndexBuilder extends BaseIndexBuilder { } return null; }).filter(Objects::nonNull).collect(Collectors.toList()); - documents.add(setUrnDerivedFields(urn)); - return documents; } @Override @@ -123,7 +123,6 @@ public class DatasetIndexBuilder extends BaseIndexBuilder { } @Override - @Nonnull public Class getDocumentType() { return DatasetDocument.class; } diff --git a/metadata-builders/src/test/java/com/linkedin/metadata/builders/search/DataProcessIndexBuilderTest.java b/metadata-builders/src/test/java/com/linkedin/metadata/builders/search/DataProcessIndexBuilderTest.java index 8ba9597420..e84374cf06 100644 --- a/metadata-builders/src/test/java/com/linkedin/metadata/builders/search/DataProcessIndexBuilderTest.java +++ b/metadata-builders/src/test/java/com/linkedin/metadata/builders/search/DataProcessIndexBuilderTest.java @@ -39,9 +39,10 @@ public class DataProcessIndexBuilderTest { new DataProcessSnapshot().setUrn(dataProcessUrn).setAspects(dataProcessAspectArray); List actualDocs = new DataProcessIndexBuilder().getDocumentsToUpdate(dataProcessSnapshot); - assertEquals(actualDocs.size(), 2); + assertEquals(actualDocs.size(), 1); + assertEquals(actualDocs.get(0).getUrn(), dataProcessUrn); assertEquals(actualDocs.get(0).getInputs().get(0), inputDatasetUrn); assertEquals(actualDocs.get(0).getOutputs().get(0), outputDatasetUrn); - assertEquals(actualDocs.get(1).getUrn(), dataProcessUrn); + } } diff --git a/metadata-builders/src/test/java/com/linkedin/metadata/builders/search/DatasetIndexBuilderTest.java b/metadata-builders/src/test/java/com/linkedin/metadata/builders/search/DatasetIndexBuilderTest.java index 8155c4f9f1..0202000745 100644 --- a/metadata-builders/src/test/java/com/linkedin/metadata/builders/search/DatasetIndexBuilderTest.java +++ b/metadata-builders/src/test/java/com/linkedin/metadata/builders/search/DatasetIndexBuilderTest.java @@ -25,16 +25,16 @@ public class DatasetIndexBuilderTest { DatasetSnapshot datasetSnapshot = ModelUtils.newSnapshot(DatasetSnapshot.class, datasetUrn, Collections.singletonList(ModelUtils.newAspectUnion(DatasetAspect.class, datasetProperties))); List actualDocs = new DatasetIndexBuilder().getDocumentsToUpdate(datasetSnapshot); - assertEquals(actualDocs.size(), 2); + assertEquals(actualDocs.size(), 1); + assertEquals(actualDocs.get(0).getUrn(), datasetUrn); assertEquals(actualDocs.get(0).getDescription(), "baz"); - assertEquals(actualDocs.get(1).getUrn(), datasetUrn); datasetProperties = new DatasetProperties(); datasetSnapshot = ModelUtils.newSnapshot(DatasetSnapshot.class, datasetUrn, Collections.singletonList(ModelUtils.newAspectUnion(DatasetAspect.class, datasetProperties))); actualDocs = new DatasetIndexBuilder().getDocumentsToUpdate(datasetSnapshot); - assertEquals(actualDocs.size(), 2); - assertNull(actualDocs.get(0).getDescription()); - assertEquals(actualDocs.get(1).getUrn(), datasetUrn); + assertEquals(actualDocs.size(), 1); + assertEquals(actualDocs.get(0).getUrn(), datasetUrn); + assertEquals(actualDocs.get(0).getDescription(), ""); } } \ No newline at end of file