mirror of
https://github.com/datahub-project/datahub.git
synced 2025-08-24 00:57:59 +00:00
fix(dataProduct): reduce write fan-out for unset side effect (#11951)
This commit is contained in:
parent
90fe14aadb
commit
32ef389440
@ -27,6 +27,7 @@ import com.linkedin.metadata.search.utils.QueryUtils;
|
|||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
|
import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
@ -70,6 +71,7 @@ public class DataProductUnsetSideEffect extends MCPSideEffect {
|
|||||||
log.error("Unable to process data product properties for urn: {}", mclItem.getUrn());
|
log.error("Unable to process data product properties for urn: {}", mclItem.getUrn());
|
||||||
return Stream.empty();
|
return Stream.empty();
|
||||||
}
|
}
|
||||||
|
Map<String, List<GenericJsonPatch.PatchOp>> patchOpMap = new HashMap<>();
|
||||||
for (DataProductAssociation dataProductAssociation :
|
for (DataProductAssociation dataProductAssociation :
|
||||||
Optional.ofNullable(dataProductProperties.getAssets())
|
Optional.ofNullable(dataProductProperties.getAssets())
|
||||||
.orElse(new DataProductAssociationArray())) {
|
.orElse(new DataProductAssociationArray())) {
|
||||||
@ -93,40 +95,45 @@ public class DataProductUnsetSideEffect extends MCPSideEffect {
|
|||||||
if (!result.getEntities().isEmpty()) {
|
if (!result.getEntities().isEmpty()) {
|
||||||
for (RelatedEntities entity : result.getEntities()) {
|
for (RelatedEntities entity : result.getEntities()) {
|
||||||
if (!mclItem.getUrn().equals(UrnUtils.getUrn(entity.getSourceUrn()))) {
|
if (!mclItem.getUrn().equals(UrnUtils.getUrn(entity.getSourceUrn()))) {
|
||||||
EntitySpec entitySpec =
|
|
||||||
retrieverContext
|
|
||||||
.getAspectRetriever()
|
|
||||||
.getEntityRegistry()
|
|
||||||
.getEntitySpec(DATA_PRODUCT_ENTITY_NAME);
|
|
||||||
GenericJsonPatch.PatchOp patchOp = new GenericJsonPatch.PatchOp();
|
GenericJsonPatch.PatchOp patchOp = new GenericJsonPatch.PatchOp();
|
||||||
patchOp.setOp(PatchOperationType.REMOVE.getValue());
|
patchOp.setOp(PatchOperationType.REMOVE.getValue());
|
||||||
patchOp.setPath(String.format("/assets/%s", entity.getDestinationUrn()));
|
patchOp.setPath(String.format("/assets/%s", entity.getDestinationUrn()));
|
||||||
mcpItems.add(
|
patchOpMap
|
||||||
PatchItemImpl.builder()
|
.computeIfAbsent(entity.getSourceUrn(), urn -> new ArrayList<>())
|
||||||
.urn(UrnUtils.getUrn(entity.getSourceUrn()))
|
.add(patchOp);
|
||||||
.entitySpec(
|
|
||||||
retrieverContext
|
|
||||||
.getAspectRetriever()
|
|
||||||
.getEntityRegistry()
|
|
||||||
.getEntitySpec(DATA_PRODUCT_ENTITY_NAME))
|
|
||||||
.aspectName(DATA_PRODUCT_PROPERTIES_ASPECT_NAME)
|
|
||||||
.aspectSpec(entitySpec.getAspectSpec(DATA_PRODUCT_PROPERTIES_ASPECT_NAME))
|
|
||||||
.patch(
|
|
||||||
GenericJsonPatch.builder()
|
|
||||||
.arrayPrimaryKeys(
|
|
||||||
Map.of(
|
|
||||||
DataProductPropertiesTemplate.ASSETS_FIELD_NAME,
|
|
||||||
List.of(DataProductPropertiesTemplate.KEY_FIELD_NAME)))
|
|
||||||
.patch(List.of(patchOp))
|
|
||||||
.build()
|
|
||||||
.getJsonPatch())
|
|
||||||
.auditStamp(mclItem.getAuditStamp())
|
|
||||||
.systemMetadata(mclItem.getSystemMetadata())
|
|
||||||
.build(retrieverContext.getAspectRetriever().getEntityRegistry()));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
for (String urn : patchOpMap.keySet()) {
|
||||||
|
EntitySpec entitySpec =
|
||||||
|
retrieverContext
|
||||||
|
.getAspectRetriever()
|
||||||
|
.getEntityRegistry()
|
||||||
|
.getEntitySpec(DATA_PRODUCT_ENTITY_NAME);
|
||||||
|
mcpItems.add(
|
||||||
|
PatchItemImpl.builder()
|
||||||
|
.urn(UrnUtils.getUrn(urn))
|
||||||
|
.entitySpec(
|
||||||
|
retrieverContext
|
||||||
|
.getAspectRetriever()
|
||||||
|
.getEntityRegistry()
|
||||||
|
.getEntitySpec(DATA_PRODUCT_ENTITY_NAME))
|
||||||
|
.aspectName(DATA_PRODUCT_PROPERTIES_ASPECT_NAME)
|
||||||
|
.aspectSpec(entitySpec.getAspectSpec(DATA_PRODUCT_PROPERTIES_ASPECT_NAME))
|
||||||
|
.patch(
|
||||||
|
GenericJsonPatch.builder()
|
||||||
|
.arrayPrimaryKeys(
|
||||||
|
Map.of(
|
||||||
|
DataProductPropertiesTemplate.ASSETS_FIELD_NAME,
|
||||||
|
List.of(DataProductPropertiesTemplate.KEY_FIELD_NAME)))
|
||||||
|
.patch(patchOpMap.get(urn))
|
||||||
|
.build()
|
||||||
|
.getJsonPatch())
|
||||||
|
.auditStamp(mclItem.getAuditStamp())
|
||||||
|
.systemMetadata(mclItem.getSystemMetadata())
|
||||||
|
.build(retrieverContext.getAspectRetriever().getEntityRegistry()));
|
||||||
|
}
|
||||||
return mcpItems.stream();
|
return mcpItems.stream();
|
||||||
}
|
}
|
||||||
return Stream.empty();
|
return Stream.empty();
|
||||||
|
@ -34,6 +34,8 @@ import com.linkedin.metadata.search.utils.QueryUtils;
|
|||||||
import com.linkedin.metadata.utils.AuditStampUtils;
|
import com.linkedin.metadata.utils.AuditStampUtils;
|
||||||
import com.linkedin.test.metadata.aspect.TestEntityRegistry;
|
import com.linkedin.test.metadata.aspect.TestEntityRegistry;
|
||||||
import io.datahubproject.metadata.context.RetrieverContext;
|
import io.datahubproject.metadata.context.RetrieverContext;
|
||||||
|
import jakarta.json.JsonArray;
|
||||||
|
import jakarta.json.JsonObject;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
@ -251,6 +253,111 @@ public class DataProductUnsetSideEffectTest {
|
|||||||
.build(mockAspectRetriever.getEntityRegistry())));
|
.build(mockAspectRetriever.getEntityRegistry())));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testBulkAssetMove() {
|
||||||
|
DataProductUnsetSideEffect test = new DataProductUnsetSideEffect();
|
||||||
|
test.setConfig(TEST_PLUGIN_CONFIG);
|
||||||
|
|
||||||
|
// Create 100 dataset URNs and set up their existing relationships
|
||||||
|
List<Urn> datasetUrns = new ArrayList<>();
|
||||||
|
for (int i = 0; i < 100; i++) {
|
||||||
|
Urn datasetUrn =
|
||||||
|
UrnUtils.getUrn(
|
||||||
|
String.format("urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_%d,PROD)", i));
|
||||||
|
datasetUrns.add(datasetUrn);
|
||||||
|
|
||||||
|
// Mock the existing relationship for each dataset with the old data product
|
||||||
|
RelatedEntities relatedEntities =
|
||||||
|
new RelatedEntities(
|
||||||
|
"DataProductContains",
|
||||||
|
TEST_PRODUCT_URN_2.toString(), // Old data product
|
||||||
|
datasetUrn.toString(),
|
||||||
|
RelationshipDirection.INCOMING,
|
||||||
|
null);
|
||||||
|
|
||||||
|
List<RelatedEntities> relatedEntitiesList = new ArrayList<>();
|
||||||
|
relatedEntitiesList.add(relatedEntities);
|
||||||
|
RelatedEntitiesScrollResult relatedEntitiesScrollResult =
|
||||||
|
new RelatedEntitiesScrollResult(1, 10, null, relatedEntitiesList);
|
||||||
|
|
||||||
|
when(retrieverContext
|
||||||
|
.getGraphRetriever()
|
||||||
|
.scrollRelatedEntities(
|
||||||
|
eq(null),
|
||||||
|
eq(QueryUtils.newFilter("urn", datasetUrn.toString())),
|
||||||
|
eq(null),
|
||||||
|
eq(EMPTY_FILTER),
|
||||||
|
eq(ImmutableList.of("DataProductContains")),
|
||||||
|
eq(
|
||||||
|
QueryUtils.newRelationshipFilter(
|
||||||
|
EMPTY_FILTER, RelationshipDirection.INCOMING)),
|
||||||
|
eq(Collections.emptyList()),
|
||||||
|
eq(null),
|
||||||
|
eq(10),
|
||||||
|
eq(null),
|
||||||
|
eq(null)))
|
||||||
|
.thenReturn(relatedEntitiesScrollResult);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create data product properties with all 100 assets
|
||||||
|
DataProductProperties dataProductProperties = new DataProductProperties();
|
||||||
|
DataProductAssociationArray dataProductAssociations = new DataProductAssociationArray();
|
||||||
|
for (Urn datasetUrn : datasetUrns) {
|
||||||
|
DataProductAssociation association = new DataProductAssociation();
|
||||||
|
association.setDestinationUrn(datasetUrn);
|
||||||
|
dataProductAssociations.add(association);
|
||||||
|
}
|
||||||
|
dataProductProperties.setAssets(dataProductAssociations);
|
||||||
|
|
||||||
|
// Run test
|
||||||
|
ChangeItemImpl dataProductPropertiesChangeItem =
|
||||||
|
ChangeItemImpl.builder()
|
||||||
|
.urn(TEST_PRODUCT_URN) // New data product
|
||||||
|
.aspectName(DATA_PRODUCT_PROPERTIES_ASPECT_NAME)
|
||||||
|
.changeType(ChangeType.UPSERT)
|
||||||
|
.entitySpec(TEST_REGISTRY.getEntitySpec(DATA_PRODUCT_ENTITY_NAME))
|
||||||
|
.aspectSpec(
|
||||||
|
TEST_REGISTRY
|
||||||
|
.getEntitySpec(DATA_PRODUCT_ENTITY_NAME)
|
||||||
|
.getAspectSpec(DATA_PRODUCT_PROPERTIES_ASPECT_NAME))
|
||||||
|
.recordTemplate(dataProductProperties)
|
||||||
|
.auditStamp(AuditStampUtils.createDefaultAuditStamp())
|
||||||
|
.build(mockAspectRetriever);
|
||||||
|
|
||||||
|
List<MCPItem> testOutput =
|
||||||
|
test.postMCPSideEffect(
|
||||||
|
List.of(
|
||||||
|
MCLItemImpl.builder()
|
||||||
|
.build(
|
||||||
|
dataProductPropertiesChangeItem,
|
||||||
|
null,
|
||||||
|
null,
|
||||||
|
retrieverContext.getAspectRetriever())),
|
||||||
|
retrieverContext)
|
||||||
|
.toList();
|
||||||
|
|
||||||
|
// Verify test
|
||||||
|
assertEquals(testOutput.size(), 1, "Expected one patch to remove assets from old data product");
|
||||||
|
|
||||||
|
MCPItem patchItem = testOutput.get(0);
|
||||||
|
assertEquals(
|
||||||
|
patchItem.getUrn(), TEST_PRODUCT_URN_2, "Patch should target the old data product");
|
||||||
|
assertEquals(patchItem.getAspectName(), DATA_PRODUCT_PROPERTIES_ASPECT_NAME);
|
||||||
|
|
||||||
|
// Verify the patch contains remove operations for all 100 assets
|
||||||
|
JsonArray patchArray = ((PatchItemImpl) patchItem).getPatch().toJsonArray();
|
||||||
|
assertEquals(patchArray.size(), 100, "Should have 100 remove operations");
|
||||||
|
|
||||||
|
// Verify each remove operation
|
||||||
|
for (int i = 0; i < 100; i++) {
|
||||||
|
JsonObject op = patchArray.getJsonObject(i);
|
||||||
|
assertEquals(op.getString("op"), PatchOperationType.REMOVE.getValue());
|
||||||
|
assertEquals(
|
||||||
|
op.getString("path"),
|
||||||
|
String.format("/assets/urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_%d,PROD)", i));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private static DataProductProperties getTestDataProductProperties(Urn destinationUrn) {
|
private static DataProductProperties getTestDataProductProperties(Urn destinationUrn) {
|
||||||
DataProductProperties dataProductProperties = new DataProductProperties();
|
DataProductProperties dataProductProperties = new DataProductProperties();
|
||||||
DataProductAssociationArray dataProductAssociations = new DataProductAssociationArray();
|
DataProductAssociationArray dataProductAssociations = new DataProductAssociationArray();
|
||||||
|
Loading…
x
Reference in New Issue
Block a user