feat(patch): refactor cll patch (#9922)

Co-authored-by: Harshal Sheth <hsheth2@gmail.com>
This commit is contained in:
RyanHolstien 2024-02-27 12:00:44 -06:00 committed by GitHub
parent 336d0543c1
commit ddf0b7d2cd
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 415 additions and 657 deletions

View File

@ -9,12 +9,11 @@ import com.fasterxml.jackson.databind.node.ObjectNode;
import com.linkedin.common.urn.DatasetUrn; import com.linkedin.common.urn.DatasetUrn;
import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.Urn;
import com.linkedin.dataset.DatasetLineageType; import com.linkedin.dataset.DatasetLineageType;
import com.linkedin.dataset.FineGrainedLineageDownstreamType;
import com.linkedin.dataset.FineGrainedLineageUpstreamType;
import com.linkedin.metadata.aspect.patch.PatchOperationType; import com.linkedin.metadata.aspect.patch.PatchOperationType;
import javax.annotation.Nonnull; import javax.annotation.Nonnull;
import javax.annotation.Nullable; import javax.annotation.Nullable;
import lombok.ToString; import lombok.ToString;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.tuple.ImmutableTriple; import org.apache.commons.lang3.tuple.ImmutableTriple;
@ToString @ToString
@ -52,120 +51,52 @@ public class UpstreamLineagePatchBuilder
return this; return this;
} }
/**
* Method for adding an upstream FineGrained Dataset
*
* @param datasetUrn dataset to be set as upstream
* @param confidenceScore optional, confidence score for the lineage edge. Defaults to 1.0 for
* full confidence
* @param transformationOperation string operation type that describes the transformation
* operation happening in the lineage edge
* @return this builder
*/
public UpstreamLineagePatchBuilder addFineGrainedUpstreamDataset(
@Nonnull DatasetUrn datasetUrn,
@Nullable Float confidenceScore,
@Nonnull String transformationOperation) {
Float finalConfidenceScore = getConfidenceScoreOrDefault(confidenceScore);
pathValues.add(
ImmutableTriple.of(
PatchOperationType.ADD.getValue(),
FINE_GRAINED_PATH_START
+ transformationOperation
+ "/"
+ "upstreamType"
+ "/"
+ "DATASET"
+ "/"
+ datasetUrn,
instance.numberNode(finalConfidenceScore)));
return this;
}
/** /**
* Adds a field as a fine grained upstream * Adds a field as a fine grained upstream
* *
* @param schemaFieldUrn a schema field to be marked as upstream, format: * @param upstreamSchemaField a schema field to be marked as upstream, format:
* urn:li:schemaField(DATASET_URN, COLUMN NAME) * urn:li:schemaField(DATASET_URN, COLUMN NAME)
* @param confidenceScore optional, confidence score for the lineage edge. Defaults to 1.0 for * @param confidenceScore optional, confidence score for the lineage edge. Defaults to 1.0 for
* full confidence * full confidence
* @param transformationOperation string operation type that describes the transformation * @param transformationOperation string operation type that describes the transformation
* operation happening in the lineage edge * operation happening in the lineage edge
* @param type the upstream lineage type, either Field or Field Set * @param downstreamSchemaField the downstream schema field this upstream is derived from, format:
* urn:li:schemaField(DATASET_URN, COLUMN NAME)
* @param queryUrn query urn the relationship is derived from
* @return this builder * @return this builder
*/ */
public UpstreamLineagePatchBuilder addFineGrainedUpstreamField( public UpstreamLineagePatchBuilder addFineGrainedUpstreamField(
@Nonnull Urn schemaFieldUrn, @Nonnull Urn upstreamSchemaField,
@Nullable Float confidenceScore, @Nullable Float confidenceScore,
@Nonnull String transformationOperation, @Nonnull String transformationOperation,
@Nullable FineGrainedLineageUpstreamType type) { @Nonnull Urn downstreamSchemaField,
@Nullable Urn queryUrn) {
Float finalConfidenceScore = getConfidenceScoreOrDefault(confidenceScore); Float finalConfidenceScore = getConfidenceScoreOrDefault(confidenceScore);
String finalType; String finalQueryUrn;
if (type == null) { if (queryUrn == null || StringUtils.isBlank(queryUrn.toString())) {
// Default to set of fields if not explicitly a single field finalQueryUrn = "NONE";
finalType = FineGrainedLineageUpstreamType.FIELD_SET.toString();
} else { } else {
finalType = type.toString(); finalQueryUrn = queryUrn.toString();
} }
ObjectNode fineGrainedLineageNode = instance.objectNode();
fineGrainedLineageNode.put("confidenceScore", instance.numberNode(finalConfidenceScore));
pathValues.add( pathValues.add(
ImmutableTriple.of( ImmutableTriple.of(
PatchOperationType.ADD.getValue(), PatchOperationType.ADD.getValue(),
FINE_GRAINED_PATH_START FINE_GRAINED_PATH_START
+ transformationOperation + transformationOperation
+ "/" + "/"
+ "upstreamType" + downstreamSchemaField
+ "/" + "/"
+ finalType + finalQueryUrn
+ "/" + "/"
+ schemaFieldUrn, + upstreamSchemaField,
instance.numberNode(finalConfidenceScore))); fineGrainedLineageNode));
return this; return this;
} }
/**
* Adds a field as a fine grained downstream
*
* @param schemaFieldUrn a schema field to be marked as downstream, format:
* urn:li:schemaField(DATASET_URN, COLUMN NAME)
* @param confidenceScore optional, confidence score for the lineage edge. Defaults to 1.0 for
* full confidence
* @param transformationOperation string operation type that describes the transformation
* operation happening in the lineage edge
* @param type the downstream lineage type, either Field or Field Set
* @return this builder
*/
public UpstreamLineagePatchBuilder addFineGrainedDownstreamField(
@Nonnull Urn schemaFieldUrn,
@Nullable Float confidenceScore,
@Nonnull String transformationOperation,
@Nullable FineGrainedLineageDownstreamType type) {
Float finalConfidenceScore = getConfidenceScoreOrDefault(confidenceScore);
String finalType;
if (type == null) {
// Default to set of fields if not explicitly a single field
finalType = FineGrainedLineageDownstreamType.FIELD_SET.toString();
} else {
finalType = type.toString();
}
pathValues.add(
ImmutableTriple.of(
PatchOperationType.ADD.getValue(),
FINE_GRAINED_PATH_START
+ transformationOperation
+ "/"
+ "downstreamType"
+ "/"
+ finalType
+ "/"
+ schemaFieldUrn,
instance.numberNode(finalConfidenceScore)));
return this;
}
private Float getConfidenceScoreOrDefault(@Nullable Float confidenceScore) { private Float getConfidenceScoreOrDefault(@Nullable Float confidenceScore) {
float finalConfidenceScore; float finalConfidenceScore;
if (confidenceScore != null && confidenceScore > 0 && confidenceScore <= 1.0f) { if (confidenceScore != null && confidenceScore > 0 && confidenceScore <= 1.0f) {
@ -180,96 +111,43 @@ public class UpstreamLineagePatchBuilder
/** /**
* Removes a field as a fine grained upstream * Removes a field as a fine grained upstream
* *
* @param schemaFieldUrn a schema field to be marked as upstream, format: * @param upstreamSchemaFieldUrn a schema field to be marked as upstream, format:
* urn:li:schemaField(DATASET_URN, COLUMN NAME) * urn:li:schemaField(DATASET_URN, COLUMN NAME)
* @param transformationOperation string operation type that describes the transformation * @param transformationOperation string operation type that describes the transformation
* operation happening in the lineage edge * operation happening in the lineage edge
* @param type the upstream lineage type, either Field or Field Set * @param downstreamSchemaField the downstream schema field this upstream is derived from, format:
* urn:li:schemaField(DATASET_URN, COLUMN NAME)
* @param queryUrn query urn the relationship is derived from
* @return this builder * @return this builder
*/ */
public UpstreamLineagePatchBuilder removeFineGrainedUpstreamField( public UpstreamLineagePatchBuilder removeFineGrainedUpstreamField(
@Nonnull Urn schemaFieldUrn, @Nonnull Urn upstreamSchemaFieldUrn,
@Nonnull String transformationOperation, @Nonnull String transformationOperation,
@Nullable FineGrainedLineageUpstreamType type) { @Nonnull Urn downstreamSchemaField,
String finalType; @Nullable Urn queryUrn) {
if (type == null) {
// Default to set of fields if not explicitly a single field String finalQueryUrn;
finalType = FineGrainedLineageUpstreamType.FIELD_SET.toString(); if (queryUrn == null || StringUtils.isBlank(queryUrn.toString())) {
finalQueryUrn = "NONE";
} else { } else {
finalType = type.toString(); finalQueryUrn = queryUrn.toString();
} }
pathValues.add( pathValues.add(
ImmutableTriple.of( ImmutableTriple.of(
PatchOperationType.REMOVE.getValue(), PatchOperationType.REMOVE.getValue(),
FINE_GRAINED_PATH_START FINE_GRAINED_PATH_START
+ transformationOperation + transformationOperation
+ "/" + "/"
+ "upstreamType" + downstreamSchemaField
+ "/" + "/"
+ finalType + finalQueryUrn
+ "/" + "/"
+ schemaFieldUrn, + upstreamSchemaFieldUrn,
null)); null));
return this; return this;
} }
public UpstreamLineagePatchBuilder removeFineGrainedUpstreamDataset(
@Nonnull DatasetUrn datasetUrn, @Nonnull String transformationOperation) {
pathValues.add(
ImmutableTriple.of(
PatchOperationType.REMOVE.getValue(),
FINE_GRAINED_PATH_START
+ transformationOperation
+ "/"
+ "upstreamType"
+ "/"
+ "DATASET"
+ "/"
+ datasetUrn,
null));
return this;
}
/**
* Adds a field as a fine grained downstream
*
* @param schemaFieldUrn a schema field to be marked as downstream, format:
* urn:li:schemaField(DATASET_URN, COLUMN NAME)
* @param transformationOperation string operation type that describes the transformation
* operation happening in the lineage edge
* @param type the downstream lineage type, either Field or Field Set
* @return this builder
*/
public UpstreamLineagePatchBuilder removeFineGrainedDownstreamField(
@Nonnull Urn schemaFieldUrn,
@Nonnull String transformationOperation,
@Nullable FineGrainedLineageDownstreamType type) {
String finalType;
if (type == null) {
// Default to set of fields if not explicitly a single field
finalType = FineGrainedLineageDownstreamType.FIELD_SET.toString();
} else {
finalType = type.toString();
}
pathValues.add(
ImmutableTriple.of(
PatchOperationType.REMOVE.getValue(),
FINE_GRAINED_PATH_START
+ transformationOperation
+ "/"
+ "downstreamType"
+ "/"
+ finalType
+ "/"
+ schemaFieldUrn,
null));
return this;
}
@Override @Override
protected String getAspectName() { protected String getAspectName() {
return UPSTREAM_LINEAGE_ASPECT_NAME; return UPSTREAM_LINEAGE_ASPECT_NAME;

View File

@ -1,10 +1,7 @@
package com.linkedin.metadata.aspect.patch.template.dataset; package com.linkedin.metadata.aspect.patch.template.dataset;
import static com.fasterxml.jackson.databind.node.JsonNodeFactory.instance; import static com.fasterxml.jackson.databind.node.JsonNodeFactory.instance;
import static com.linkedin.metadata.Constants.FINE_GRAINED_LINEAGE_DATASET_TYPE; import static com.linkedin.metadata.Constants.*;
import static com.linkedin.metadata.Constants.FINE_GRAINED_LINEAGE_FIELD_SET_TYPE;
import static com.linkedin.metadata.Constants.FINE_GRAINED_LINEAGE_FIELD_TYPE;
import static com.linkedin.metadata.Constants.SCHEMA_FIELD_ENTITY_NAME;
import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.node.ArrayNode; import com.fasterxml.jackson.databind.node.ArrayNode;
@ -22,6 +19,7 @@ import java.util.concurrent.atomic.AtomicReference;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import javax.annotation.Nonnull; import javax.annotation.Nonnull;
import javax.annotation.Nullable; import javax.annotation.Nullable;
import org.codehaus.plexus.util.StringUtils;
public class UpstreamLineageTemplate extends CompoundKeyTemplate<UpstreamLineage> { public class UpstreamLineageTemplate extends CompoundKeyTemplate<UpstreamLineage> {
@ -35,10 +33,12 @@ public class UpstreamLineageTemplate extends CompoundKeyTemplate<UpstreamLineage
private static final String FINE_GRAINED_DOWNSTREAMS = "downstreams"; private static final String FINE_GRAINED_DOWNSTREAMS = "downstreams";
private static final String FINE_GRAINED_TRANSFORMATION_OPERATION = "transformOperation"; private static final String FINE_GRAINED_TRANSFORMATION_OPERATION = "transformOperation";
private static final String FINE_GRAINED_CONFIDENCE_SCORE = "confidenceScore"; private static final String FINE_GRAINED_CONFIDENCE_SCORE = "confidenceScore";
private static final String FINE_GRAINED_QUERY_ID = "query";
// Template support // Template support
private static final String NONE_TRANSFORMATION_TYPE = "NONE"; private static final String NONE_TRANSFORMATION_TYPE = "NONE";
private static final Float DEFAULT_CONFIDENCE_SCORE = 1.0f; private static final Float DEFAULT_CONFIDENCE_SCORE = 1.0f;
private static final String DEFAULT_QUERY_ID = "NONE";
@Override @Override
public UpstreamLineage getSubtype(RecordTemplate recordTemplate) throws ClassCastException { public UpstreamLineage getSubtype(RecordTemplate recordTemplate) throws ClassCastException {
@ -94,7 +94,7 @@ public class UpstreamLineageTemplate extends CompoundKeyTemplate<UpstreamLineage
/** /**
* Combines fine grained lineage array into a map using upstream and downstream types as keys, * Combines fine grained lineage array into a map using upstream and downstream types as keys,
* defaulting when not present. Due to this construction, patches will look like: path: * defaulting when not present. Due to this construction, patches will look like: path:
* /fineGrainedLineages/TRANSFORMATION_OPERATION/(upstreamType || downstreamType)/TYPE/FIELD_URN, * /fineGrainedLineages/TRANSFORMATION_OPERATION/DOWNSTREAM_FIELD_URN/QUERY_ID/UPSTREAM_FIELD_URN,
* op: ADD/REMOVE, value: float (confidenceScore) Due to the way FineGrainedLineage was designed * op: ADD/REMOVE, value: float (confidenceScore) Due to the way FineGrainedLineage was designed
* it doesn't necessarily have a consistent key we can reference, so this specialized method * it doesn't necessarily have a consistent key we can reference, so this specialized method
* mimics the arrayFieldToMap of the super class with the specialization that it does not put the * mimics the arrayFieldToMap of the super class with the specialization that it does not put the
@ -128,6 +128,18 @@ public class UpstreamLineageTemplate extends CompoundKeyTemplate<UpstreamLineage
ObjectNode transformationOperationNode = ObjectNode transformationOperationNode =
(ObjectNode) mapNode.get(transformationOperation); (ObjectNode) mapNode.get(transformationOperation);
ArrayNode downstreams =
nodeClone.has(FINE_GRAINED_DOWNSTREAMS)
? (ArrayNode) nodeClone.get(FINE_GRAINED_DOWNSTREAMS)
: null;
if (downstreams == null || downstreams.size() != 1) {
throw new UnsupportedOperationException(
"Patching not supported on fine grained lineages with not"
+ " exactly one downstream. Current fine grained lineage implementation is downstream derived and "
+ "patches are keyed on the root of this derivation.");
}
Float confidenceScore = Float confidenceScore =
nodeClone.has(FINE_GRAINED_CONFIDENCE_SCORE) nodeClone.has(FINE_GRAINED_CONFIDENCE_SCORE)
? nodeClone.get(FINE_GRAINED_CONFIDENCE_SCORE).floatValue() ? nodeClone.get(FINE_GRAINED_CONFIDENCE_SCORE).floatValue()
@ -145,69 +157,73 @@ public class UpstreamLineageTemplate extends CompoundKeyTemplate<UpstreamLineage
nodeClone.has(FINE_GRAINED_UPSTREAMS) nodeClone.has(FINE_GRAINED_UPSTREAMS)
? (ArrayNode) nodeClone.get(FINE_GRAINED_UPSTREAMS) ? (ArrayNode) nodeClone.get(FINE_GRAINED_UPSTREAMS)
: null; : null;
ArrayNode downstreams =
nodeClone.has(FINE_GRAINED_DOWNSTREAMS)
? (ArrayNode) nodeClone.get(FINE_GRAINED_DOWNSTREAMS)
: null;
// Handle upstreams String queryId =
nodeClone.has(FINE_GRAINED_QUERY_ID)
? nodeClone.get(FINE_GRAINED_QUERY_ID).asText()
: DEFAULT_QUERY_ID;
if (upstreamType == null) { if (upstreamType == null) {
// Determine default type // Determine default type
Urn upstreamUrn = Urn upstreamUrn =
upstreams != null ? UrnUtils.getUrn(upstreams.get(0).asText()) : null; upstreams != null ? UrnUtils.getUrn(upstreams.get(0).asText()) : null;
if (upstreamUrn != null if (upstreamUrn != null
&& SCHEMA_FIELD_ENTITY_NAME.equals(upstreamUrn.getEntityType())) { && DATASET_ENTITY_NAME.equals(upstreamUrn.getEntityType())) {
upstreamType = FINE_GRAINED_LINEAGE_FIELD_SET_TYPE;
} else {
upstreamType = FINE_GRAINED_LINEAGE_DATASET_TYPE; upstreamType = FINE_GRAINED_LINEAGE_DATASET_TYPE;
} else {
upstreamType = FINE_GRAINED_LINEAGE_FIELD_SET_TYPE;
} }
} }
if (!transformationOperationNode.has(FINE_GRAINED_UPSTREAM_TYPE)) {
transformationOperationNode.set(FINE_GRAINED_UPSTREAM_TYPE, instance.objectNode());
}
ObjectNode upstreamTypeNode =
(ObjectNode) transformationOperationNode.get(FINE_GRAINED_UPSTREAM_TYPE);
if (!upstreamTypeNode.has(upstreamType)) {
upstreamTypeNode.set(upstreamType, instance.objectNode());
}
if (upstreams != null) {
addUrnsToSubType(upstreamTypeNode, upstreams, upstreamType, confidenceScore);
}
// Handle downstreams
if (downstreamType == null) { if (downstreamType == null) {
// Determine default type // Always use FIELD type, only support patches for single field downstream
if (downstreams != null && downstreams.size() > 1) { downstreamType = FINE_GRAINED_LINEAGE_FIELD_TYPE;
downstreamType = FINE_GRAINED_LINEAGE_FIELD_SET_TYPE;
} else {
downstreamType = FINE_GRAINED_LINEAGE_FIELD_TYPE;
}
} }
if (!transformationOperationNode.has(FINE_GRAINED_DOWNSTREAM_TYPE)) {
transformationOperationNode.set( String downstreamRoot = downstreams.get(0).asText();
FINE_GRAINED_DOWNSTREAM_TYPE, instance.objectNode()); if (!transformationOperationNode.has(downstreamRoot)) {
transformationOperationNode.set(downstreamRoot, instance.objectNode());
} }
ObjectNode downstreamTypeNode = ObjectNode downstreamRootNode =
(ObjectNode) transformationOperationNode.get(FINE_GRAINED_DOWNSTREAM_TYPE); (ObjectNode) transformationOperationNode.get(downstreamRoot);
if (!downstreamTypeNode.has(downstreamType)) { if (!downstreamRootNode.has(queryId)) {
downstreamTypeNode.set(downstreamType, instance.objectNode()); downstreamRootNode.set(queryId, instance.objectNode());
} }
if (downstreams != null) { ObjectNode queryNode = (ObjectNode) downstreamRootNode.get(queryId);
addUrnsToSubType(downstreamTypeNode, downstreams, downstreamType, confidenceScore); if (upstreams != null) {
addUrnsToParent(
queryNode, upstreams, confidenceScore, upstreamType, downstreamType);
} }
}); });
return mapNode; return mapNode;
} }
private void addUrnsToSubType( private void addUrnsToParent(
JsonNode superType, ArrayNode urnsList, String subType, Float confidenceScore) { JsonNode parentNode,
ObjectNode upstreamSubTypeNode = (ObjectNode) superType.get(subType); ArrayNode urnsList,
Float confidenceScore,
String upstreamType,
String downstreamType) {
// Will overwrite repeat urns with different confidence scores with the most recently seen // Will overwrite repeat urns with different confidence scores with the most recently seen
upstreamSubTypeNode.setAll( ((ObjectNode) parentNode)
Streams.stream(urnsList.elements()) .setAll(
.map(JsonNode::asText) Streams.stream(urnsList.elements())
.distinct() .map(JsonNode::asText)
.collect(Collectors.toMap(urn -> urn, urn -> instance.numberNode(confidenceScore)))); .distinct()
.collect(
Collectors.toMap(
urn -> urn,
urn ->
mapToLineageValueNode(confidenceScore, upstreamType, downstreamType))));
}
private JsonNode mapToLineageValueNode(
Float confidenceScore, String upstreamType, String downstreamType) {
ObjectNode objectNode = instance.objectNode();
objectNode.set(FINE_GRAINED_CONFIDENCE_SCORE, instance.numberNode(confidenceScore));
objectNode.set(FINE_GRAINED_UPSTREAM_TYPE, instance.textNode(upstreamType));
objectNode.set(FINE_GRAINED_DOWNSTREAM_TYPE, instance.textNode(downstreamType));
return objectNode;
} }
/** /**
@ -225,7 +241,7 @@ public class UpstreamLineageTemplate extends CompoundKeyTemplate<UpstreamLineage
return (ArrayNode) transformedFineGrainedLineages; return (ArrayNode) transformedFineGrainedLineages;
} }
ObjectNode mapNode = (ObjectNode) transformedFineGrainedLineages; ObjectNode mapNode = (ObjectNode) transformedFineGrainedLineages;
ArrayNode arrayNode = instance.arrayNode(); ArrayNode fineGrainedLineages = instance.arrayNode();
mapNode mapNode
.fieldNames() .fieldNames()
@ -233,83 +249,95 @@ public class UpstreamLineageTemplate extends CompoundKeyTemplate<UpstreamLineage
transformationOperation -> { transformationOperation -> {
final ObjectNode transformationOperationNode = final ObjectNode transformationOperationNode =
(ObjectNode) mapNode.get(transformationOperation); (ObjectNode) mapNode.get(transformationOperation);
final ObjectNode upstreamType = transformationOperationNode
transformationOperationNode.has(FINE_GRAINED_UPSTREAM_TYPE) .fieldNames()
? (ObjectNode) transformationOperationNode.get(FINE_GRAINED_UPSTREAM_TYPE) .forEachRemaining(
: instance.objectNode(); downstreamName -> {
final ObjectNode downstreamType = final ObjectNode downstreamNode =
transformationOperationNode.has(FINE_GRAINED_DOWNSTREAM_TYPE) (ObjectNode) transformationOperationNode.get(downstreamName);
? (ObjectNode) transformationOperationNode.get(FINE_GRAINED_DOWNSTREAM_TYPE) downstreamNode
: instance.objectNode(); .fieldNames()
.forEachRemaining(
// Handle upstreams queryId ->
if (!upstreamType.isEmpty()) { buildFineGrainedLineage(
populateTypeNode( downstreamName,
upstreamType, downstreamNode,
transformationOperation, queryId,
FINE_GRAINED_UPSTREAM_TYPE, transformationOperation,
FINE_GRAINED_UPSTREAMS, fineGrainedLineages));
FINE_GRAINED_DOWNSTREAM_TYPE, });
arrayNode);
}
// Handle downstreams
if (!downstreamType.isEmpty()) {
populateTypeNode(
downstreamType,
transformationOperation,
FINE_GRAINED_DOWNSTREAM_TYPE,
FINE_GRAINED_DOWNSTREAMS,
FINE_GRAINED_UPSTREAM_TYPE,
arrayNode);
}
}); });
return arrayNode; return fineGrainedLineages;
} }
private void populateTypeNode( private void buildFineGrainedLineage(
JsonNode typeNode, final String downstreamName,
String transformationOperation, final ObjectNode downstreamNode,
String typeName, final String queryId,
String arrayTypeName, final String transformationOperation,
String defaultTypeName, final ArrayNode fineGrainedLineages) {
ArrayNode arrayNode) { final ObjectNode fineGrainedLineage = instance.objectNode();
typeNode final ObjectNode queryNode = (ObjectNode) downstreamNode.get(queryId);
if (queryNode.isEmpty()) {
// Short circuit if no upstreams left
return;
}
ArrayNode downstream = instance.arrayNode();
downstream.add(instance.textNode(downstreamName));
// Set defaults, if found in sub nodes override, for confidenceScore take lowest
AtomicReference<Float> minimumConfidenceScore = new AtomicReference<>(DEFAULT_CONFIDENCE_SCORE);
AtomicReference<String> upstreamType =
new AtomicReference<>(FINE_GRAINED_LINEAGE_FIELD_SET_TYPE);
AtomicReference<String> downstreamType = new AtomicReference<>(FINE_GRAINED_LINEAGE_FIELD_TYPE);
ArrayNode upstreams = instance.arrayNode();
queryNode
.fieldNames() .fieldNames()
.forEachRemaining( .forEachRemaining(
subTypeName -> { upstream ->
ObjectNode subType = (ObjectNode) typeNode.get(subTypeName); processUpstream(
if (!subType.isEmpty()) { queryNode,
ObjectNode fineGrainedLineage = instance.objectNode(); upstream,
AtomicReference<Float> minimumConfidenceScore = new AtomicReference<>(1.0f); minimumConfidenceScore,
upstreamType,
downstreamType,
upstreams));
fineGrainedLineage.set(FINE_GRAINED_DOWNSTREAMS, downstream);
fineGrainedLineage.set(FINE_GRAINED_UPSTREAMS, upstreams);
if (StringUtils.isNotBlank(queryId) && !DEFAULT_QUERY_ID.equals(queryId)) {
fineGrainedLineage.set(FINE_GRAINED_QUERY_ID, instance.textNode(queryId));
}
fineGrainedLineage.set(FINE_GRAINED_UPSTREAM_TYPE, instance.textNode(upstreamType.get()));
fineGrainedLineage.set(FINE_GRAINED_DOWNSTREAM_TYPE, instance.textNode(downstreamType.get()));
fineGrainedLineage.set(
FINE_GRAINED_CONFIDENCE_SCORE, instance.numberNode(minimumConfidenceScore.get()));
fineGrainedLineage.set(
FINE_GRAINED_TRANSFORMATION_OPERATION, instance.textNode(transformationOperation));
fineGrainedLineages.add(fineGrainedLineage);
}
fineGrainedLineage.put(typeName, subTypeName); private void processUpstream(
fineGrainedLineage.put( final ObjectNode queryNode,
FINE_GRAINED_TRANSFORMATION_OPERATION, transformationOperation); final String upstream,
// Array to actually be filled out final AtomicReference<Float> minimumConfidenceScore,
fineGrainedLineage.set(arrayTypeName, instance.arrayNode()); final AtomicReference<String> upstreamType,
// Added to pass model validation, because we have no way of appropriately pairing final AtomicReference<String> downstreamType,
// upstreams and downstreams final ArrayNode upstreams) {
// within fine grained lineages consistently due to being able to have multiple final ObjectNode upstreamNode = (ObjectNode) queryNode.get(upstream);
// downstream types paired with a single if (upstreamNode.has(FINE_GRAINED_CONFIDENCE_SCORE)) {
// transform operation, we just set a default type because it's a required property Float scoreValue = upstreamNode.get(FINE_GRAINED_CONFIDENCE_SCORE).floatValue();
fineGrainedLineage.put(defaultTypeName, FINE_GRAINED_LINEAGE_FIELD_SET_TYPE); if (scoreValue <= minimumConfidenceScore.get()) {
subType minimumConfidenceScore.set(scoreValue);
.fieldNames() }
.forEachRemaining( }
subTypeKey -> { // Set types to last encountered, should never change, but this at least tries to support
((ArrayNode) fineGrainedLineage.get(arrayTypeName)).add(subTypeKey); // other types being specified.
Float scoreValue = subType.get(subTypeKey).floatValue(); if (upstreamNode.has(FINE_GRAINED_UPSTREAM_TYPE)) {
if (scoreValue <= minimumConfidenceScore.get()) { upstreamType.set(upstreamNode.get(FINE_GRAINED_UPSTREAM_TYPE).asText());
minimumConfidenceScore.set(scoreValue); }
fineGrainedLineage.set( if (upstreamNode.has(FINE_GRAINED_DOWNSTREAM_TYPE)) {
FINE_GRAINED_CONFIDENCE_SCORE, downstreamType.set(upstreamNode.get(FINE_GRAINED_DOWNSTREAM_TYPE).asText());
instance.numberNode(minimumConfidenceScore.get())); }
} upstreams.add(instance.textNode(upstream));
});
arrayNode.add(fineGrainedLineage);
}
});
} }
} }

View File

@ -3,6 +3,7 @@ package com.linkedin.metadata.aspect.patch.template;
import static com.fasterxml.jackson.databind.node.JsonNodeFactory.*; import static com.fasterxml.jackson.databind.node.JsonNodeFactory.*;
import com.fasterxml.jackson.databind.node.NumericNode; import com.fasterxml.jackson.databind.node.NumericNode;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.github.fge.jackson.jsonpointer.JsonPointer; import com.github.fge.jackson.jsonpointer.JsonPointer;
import com.github.fge.jsonpatch.AddOperation; import com.github.fge.jsonpatch.AddOperation;
import com.github.fge.jsonpatch.JsonPatch; import com.github.fge.jsonpatch.JsonPatch;
@ -28,12 +29,14 @@ public class UpstreamLineageTemplateTest {
UpstreamLineageTemplate upstreamLineageTemplate = new UpstreamLineageTemplate(); UpstreamLineageTemplate upstreamLineageTemplate = new UpstreamLineageTemplate();
UpstreamLineage upstreamLineage = upstreamLineageTemplate.getDefault(); UpstreamLineage upstreamLineage = upstreamLineageTemplate.getDefault();
List<JsonPatchOperation> patchOperations = new ArrayList<>(); List<JsonPatchOperation> patchOperations = new ArrayList<>();
ObjectNode fineGrainedLineageNode = instance.objectNode();
NumericNode upstreamConfidenceScore = instance.numberNode(1.0f); NumericNode upstreamConfidenceScore = instance.numberNode(1.0f);
fineGrainedLineageNode.set("confidenceScore", upstreamConfidenceScore);
JsonPatchOperation operation = JsonPatchOperation operation =
new AddOperation( new AddOperation(
new JsonPointer( new JsonPointer(
"/fineGrainedLineages/CREATE/upstreamType/FIELD_SET/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD),c1)"), "/fineGrainedLineages/CREATE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD),c1)//urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_2,PROD),c1)"),
upstreamConfidenceScore); fineGrainedLineageNode);
patchOperations.add(operation); patchOperations.add(operation);
JsonPatch jsonPatch = new JsonPatch(patchOperations); JsonPatch jsonPatch = new JsonPatch(patchOperations);
@ -48,24 +51,42 @@ public class UpstreamLineageTemplateTest {
UrnUtils.getUrn( UrnUtils.getUrn(
"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD),c1)"); "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD),c1)");
urns.add(urn1); urns.add(urn1);
fineGrainedLineage.setUpstreams(urns); UrnArray upstreams = new UrnArray();
Urn upstreamUrn =
UrnUtils.getUrn(
"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_2,PROD),c1)");
upstreams.add(upstreamUrn);
fineGrainedLineage.setDownstreams(urns);
fineGrainedLineage.setUpstreams(upstreams);
fineGrainedLineage.setTransformOperation("CREATE"); fineGrainedLineage.setTransformOperation("CREATE");
fineGrainedLineage.setUpstreamType(FineGrainedLineageUpstreamType.FIELD_SET); fineGrainedLineage.setUpstreamType(FineGrainedLineageUpstreamType.FIELD_SET);
fineGrainedLineage.setDownstreamType(FineGrainedLineageDownstreamType.FIELD_SET); fineGrainedLineage.setDownstreamType(FineGrainedLineageDownstreamType.FIELD);
Assert.assertEquals(result.getFineGrainedLineages().get(0), fineGrainedLineage); Assert.assertEquals(result.getFineGrainedLineages().get(0), fineGrainedLineage);
// Test non-overwrite upstreams and correct confidence score // Test non-overwrite upstreams and correct confidence score and types w/ overwrite
ObjectNode finegrainedLineageNode2 = instance.objectNode();
finegrainedLineageNode2.set(
"upstreamType", instance.textNode(FineGrainedLineageUpstreamType.FIELD_SET.name()));
finegrainedLineageNode2.set("confidenceScore", upstreamConfidenceScore);
finegrainedLineageNode2.set(
"downstreamType", instance.textNode(FineGrainedLineageDownstreamType.FIELD.name()));
JsonPatchOperation operation2 = JsonPatchOperation operation2 =
new AddOperation( new AddOperation(
new JsonPointer( new JsonPointer(
"/fineGrainedLineages/CREATE/upstreamType/FIELD_SET/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD),c2)"), "/fineGrainedLineages/CREATE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_2,PROD),c2)/urn:li:query:someQuery/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD),c2)"),
upstreamConfidenceScore); finegrainedLineageNode2);
NumericNode upstreamConfidenceScore2 = instance.numberNode(0.1f); NumericNode upstreamConfidenceScore2 = instance.numberNode(0.1f);
ObjectNode finegrainedLineageNode3 = instance.objectNode();
finegrainedLineageNode3.set(
"upstreamType", instance.textNode(FineGrainedLineageUpstreamType.DATASET.name()));
finegrainedLineageNode3.set("confidenceScore", upstreamConfidenceScore2);
finegrainedLineageNode3.set(
"downstreamType", instance.textNode(FineGrainedLineageDownstreamType.FIELD_SET.name()));
JsonPatchOperation operation3 = JsonPatchOperation operation3 =
new AddOperation( new AddOperation(
new JsonPointer( new JsonPointer(
"/fineGrainedLineages/CREATE/upstreamType/FIELD_SET/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD),c2)"), "/fineGrainedLineages/CREATE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_2,PROD),c2)/urn:li:query:someQuery/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD),c2)"),
upstreamConfidenceScore2); finegrainedLineageNode3);
List<JsonPatchOperation> patchOperations2 = new ArrayList<>(); List<JsonPatchOperation> patchOperations2 = new ArrayList<>();
patchOperations2.add(operation2); patchOperations2.add(operation2);
patchOperations2.add(operation3); patchOperations2.add(operation3);
@ -79,20 +100,32 @@ public class UpstreamLineageTemplateTest {
Urn urn2 = Urn urn2 =
UrnUtils.getUrn( UrnUtils.getUrn(
"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD),c2)"); "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD),c2)");
urns2.add(urn1);
urns2.add(urn2); urns2.add(urn2);
Urn downstreamUrn2 =
UrnUtils.getUrn(
"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_2,PROD),c2)");
UrnArray downstreams2 = new UrnArray();
downstreams2.add(downstreamUrn2);
fineGrainedLineage2.setUpstreams(urns2); fineGrainedLineage2.setUpstreams(urns2);
fineGrainedLineage2.setDownstreams(downstreams2);
fineGrainedLineage2.setTransformOperation("CREATE"); fineGrainedLineage2.setTransformOperation("CREATE");
fineGrainedLineage2.setUpstreamType(FineGrainedLineageUpstreamType.FIELD_SET); fineGrainedLineage2.setUpstreamType(FineGrainedLineageUpstreamType.DATASET);
fineGrainedLineage2.setDownstreamType(FineGrainedLineageDownstreamType.FIELD_SET); fineGrainedLineage2.setDownstreamType(FineGrainedLineageDownstreamType.FIELD_SET);
Assert.assertEquals(result2.getFineGrainedLineages().get(0), fineGrainedLineage2); fineGrainedLineage2.setQuery(UrnUtils.getUrn("urn:li:query:someQuery"));
Assert.assertEquals(result2.getFineGrainedLineages().get(1), fineGrainedLineage2);
// Check different upstream types // Check different queries
ObjectNode finegrainedLineageNode4 = instance.objectNode();
finegrainedLineageNode4.set(
"upstreamType", instance.textNode(FineGrainedLineageUpstreamType.FIELD_SET.name()));
finegrainedLineageNode4.set("confidenceScore", upstreamConfidenceScore);
finegrainedLineageNode4.set(
"downstreamType", instance.textNode(FineGrainedLineageDownstreamType.FIELD.name()));
JsonPatchOperation operation4 = JsonPatchOperation operation4 =
new AddOperation( new AddOperation(
new JsonPointer( new JsonPointer(
"/fineGrainedLineages/CREATE/upstreamType/DATASET/urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD)"), "/fineGrainedLineages/CREATE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_2,PROD),c2)/urn:li:query:anotherQuery/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD),c2)"),
upstreamConfidenceScore); finegrainedLineageNode4);
List<JsonPatchOperation> patchOperations3 = new ArrayList<>(); List<JsonPatchOperation> patchOperations3 = new ArrayList<>();
patchOperations3.add(operation4); patchOperations3.add(operation4);
JsonPatch jsonPatch3 = new JsonPatch(patchOperations3); JsonPatch jsonPatch3 = new JsonPatch(patchOperations3);
@ -103,163 +136,36 @@ public class UpstreamLineageTemplateTest {
FineGrainedLineage fineGrainedLineage3 = new FineGrainedLineage(dataMap3); FineGrainedLineage fineGrainedLineage3 = new FineGrainedLineage(dataMap3);
UrnArray urns3 = new UrnArray(); UrnArray urns3 = new UrnArray();
Urn urn3 = Urn urn3 =
UrnUtils.getUrn("urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD)");
urns3.add(urn3);
fineGrainedLineage3.setUpstreams(urns3);
fineGrainedLineage3.setTransformOperation("CREATE");
fineGrainedLineage3.setUpstreamType(FineGrainedLineageUpstreamType.DATASET);
fineGrainedLineage3.setDownstreamType(FineGrainedLineageDownstreamType.FIELD_SET);
// Splits into two for different types
Assert.assertEquals(result3.getFineGrainedLineages().get(1), fineGrainedLineage3);
// Check different transform types
JsonPatchOperation operation5 =
new AddOperation(
new JsonPointer(
"/fineGrainedLineages/TRANSFORM/upstreamType/DATASET/urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_2,PROD)"),
upstreamConfidenceScore);
List<JsonPatchOperation> patchOperations4 = new ArrayList<>();
patchOperations4.add(operation5);
JsonPatch jsonPatch4 = new JsonPatch(patchOperations4);
UpstreamLineage result4 = upstreamLineageTemplate.applyPatch(result3, jsonPatch4);
// Hack because Jackson parses values to doubles instead of floats
DataMap dataMap4 = new DataMap();
dataMap4.put("confidenceScore", 1.0);
FineGrainedLineage fineGrainedLineage4 = new FineGrainedLineage(dataMap4);
UrnArray urns4 = new UrnArray();
Urn urn4 =
UrnUtils.getUrn("urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_2,PROD)");
urns4.add(urn4);
fineGrainedLineage4.setUpstreams(urns4);
fineGrainedLineage4.setTransformOperation("TRANSFORM");
fineGrainedLineage4.setUpstreamType(FineGrainedLineageUpstreamType.DATASET);
fineGrainedLineage4.setDownstreamType(FineGrainedLineageDownstreamType.FIELD_SET);
// New entry in array because of new transformation type
Assert.assertEquals(result4.getFineGrainedLineages().get(2), fineGrainedLineage4);
// Remove
JsonPatchOperation removeOperation =
new RemoveOperation(
new JsonPointer(
"/fineGrainedLineages/CREATE/upstreamType/FIELD_SET/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD),c1)"));
JsonPatchOperation removeOperation2 =
new RemoveOperation(
new JsonPointer(
"/fineGrainedLineages/CREATE/upstreamType/FIELD_SET/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD),c2)"));
JsonPatchOperation removeOperation3 =
new RemoveOperation(
new JsonPointer(
"/fineGrainedLineages/CREATE/upstreamType/DATASET/urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD)"));
JsonPatchOperation removeOperation4 =
new RemoveOperation(
new JsonPointer(
"/fineGrainedLineages/TRANSFORM/upstreamType/DATASET/urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_2,PROD)"));
List<JsonPatchOperation> removeOperations = new ArrayList<>();
removeOperations.add(removeOperation);
removeOperations.add(removeOperation2);
removeOperations.add(removeOperation3);
removeOperations.add(removeOperation4);
JsonPatch removePatch = new JsonPatch(removeOperations);
UpstreamLineage finalResult = upstreamLineageTemplate.applyPatch(result4, removePatch);
Assert.assertEquals(upstreamLineageTemplate.getDefault(), finalResult);
}
@Test
public void testPatchDownstream() throws Exception {
UpstreamLineageTemplate upstreamLineageTemplate = new UpstreamLineageTemplate();
UpstreamLineage upstreamLineage = upstreamLineageTemplate.getDefault();
List<JsonPatchOperation> patchOperations = new ArrayList<>();
NumericNode downstreamConfidenceScore = instance.numberNode(1.0f);
JsonPatchOperation operation =
new AddOperation(
new JsonPointer(
"/fineGrainedLineages/CREATE/downstreamType/FIELD_SET/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD),c1)"),
downstreamConfidenceScore);
patchOperations.add(operation);
JsonPatch jsonPatch = new JsonPatch(patchOperations);
// Initial population test
UpstreamLineage result = upstreamLineageTemplate.applyPatch(upstreamLineage, jsonPatch);
// Hack because Jackson parses values to doubles instead of floats
DataMap dataMap = new DataMap();
dataMap.put("confidenceScore", 1.0);
FineGrainedLineage fineGrainedLineage = new FineGrainedLineage(dataMap);
UrnArray urns = new UrnArray();
Urn urn1 =
UrnUtils.getUrn( UrnUtils.getUrn(
"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD),c1)"); "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_2,PROD),c2)");
urns.add(urn1); urns3.add(urn3);
fineGrainedLineage.setDownstreams(urns);
fineGrainedLineage.setTransformOperation("CREATE");
fineGrainedLineage.setDownstreamType(FineGrainedLineageDownstreamType.FIELD_SET);
fineGrainedLineage.setUpstreamType(FineGrainedLineageUpstreamType.FIELD_SET);
Assert.assertEquals(result.getFineGrainedLineages().get(0), fineGrainedLineage);
// Test non-overwrite downstreams and correct confidence score Urn upstreamUrn3 =
JsonPatchOperation operation2 =
new AddOperation(
new JsonPointer(
"/fineGrainedLineages/CREATE/downstreamType/FIELD_SET/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD),c2)"),
downstreamConfidenceScore);
NumericNode downstreamConfidenceScore2 = instance.numberNode(0.1f);
JsonPatchOperation operation3 =
new AddOperation(
new JsonPointer(
"/fineGrainedLineages/CREATE/downstreamType/FIELD_SET/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD),c2)"),
downstreamConfidenceScore2);
List<JsonPatchOperation> patchOperations2 = new ArrayList<>();
patchOperations2.add(operation2);
patchOperations2.add(operation3);
JsonPatch jsonPatch2 = new JsonPatch(patchOperations2);
UpstreamLineage result2 = upstreamLineageTemplate.applyPatch(result, jsonPatch2);
// Hack because Jackson parses values to doubles instead of floats
DataMap dataMap2 = new DataMap();
dataMap2.put("confidenceScore", 0.1);
FineGrainedLineage fineGrainedLineage2 = new FineGrainedLineage(dataMap2);
UrnArray urns2 = new UrnArray();
Urn urn2 =
UrnUtils.getUrn( UrnUtils.getUrn(
"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD),c2)"); "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD),c2)");
urns2.add(urn1); UrnArray upstreamUrns3 = new UrnArray();
urns2.add(urn2); upstreamUrns3.add(upstreamUrn3);
fineGrainedLineage2.setDownstreams(urns2);
fineGrainedLineage2.setTransformOperation("CREATE");
fineGrainedLineage2.setDownstreamType(FineGrainedLineageDownstreamType.FIELD_SET);
fineGrainedLineage2.setUpstreamType(FineGrainedLineageUpstreamType.FIELD_SET);
Assert.assertEquals(result2.getFineGrainedLineages().get(0), fineGrainedLineage2);
// Check different downstream types
JsonPatchOperation operation4 =
new AddOperation(
new JsonPointer(
"/fineGrainedLineages/CREATE/downstreamType/FIELD/urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD)"),
downstreamConfidenceScore);
List<JsonPatchOperation> patchOperations3 = new ArrayList<>();
patchOperations3.add(operation4);
JsonPatch jsonPatch3 = new JsonPatch(patchOperations3);
UpstreamLineage result3 = upstreamLineageTemplate.applyPatch(result2, jsonPatch3);
// Hack because Jackson parses values to doubles instead of floats
DataMap dataMap3 = new DataMap();
dataMap3.put("confidenceScore", 1.0);
FineGrainedLineage fineGrainedLineage3 = new FineGrainedLineage(dataMap3);
UrnArray urns3 = new UrnArray();
Urn urn3 =
UrnUtils.getUrn("urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD)");
urns3.add(urn3);
fineGrainedLineage3.setDownstreams(urns3); fineGrainedLineage3.setDownstreams(urns3);
fineGrainedLineage3.setUpstreams(upstreamUrns3);
fineGrainedLineage3.setTransformOperation("CREATE"); fineGrainedLineage3.setTransformOperation("CREATE");
fineGrainedLineage3.setDownstreamType(FineGrainedLineageDownstreamType.FIELD);
fineGrainedLineage3.setUpstreamType(FineGrainedLineageUpstreamType.FIELD_SET); fineGrainedLineage3.setUpstreamType(FineGrainedLineageUpstreamType.FIELD_SET);
fineGrainedLineage3.setDownstreamType(FineGrainedLineageDownstreamType.FIELD);
fineGrainedLineage3.setQuery(UrnUtils.getUrn("urn:li:query:anotherQuery"));
// Splits into two for different types // Splits into two for different types
Assert.assertEquals(result3.getFineGrainedLineages().get(1), fineGrainedLineage3); Assert.assertEquals(result3.getFineGrainedLineages().get(2), fineGrainedLineage3);
// Check different transform types // Check different transform types
ObjectNode finegrainedLineageNode5 = instance.objectNode();
finegrainedLineageNode5.set(
"upstreamType", instance.textNode(FineGrainedLineageUpstreamType.FIELD_SET.name()));
finegrainedLineageNode5.set("confidenceScore", upstreamConfidenceScore);
finegrainedLineageNode5.set(
"downstreamType", instance.textNode(FineGrainedLineageDownstreamType.FIELD.name()));
JsonPatchOperation operation5 = JsonPatchOperation operation5 =
new AddOperation( new AddOperation(
new JsonPointer( new JsonPointer(
"/fineGrainedLineages/TRANSFORM/downstreamType/FIELD/urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_2,PROD)"), "/fineGrainedLineages/TRANSFORM/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_2,PROD),c2)/urn:li:query:anotherQuery/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD),c2)"),
downstreamConfidenceScore); finegrainedLineageNode5);
List<JsonPatchOperation> patchOperations4 = new ArrayList<>(); List<JsonPatchOperation> patchOperations4 = new ArrayList<>();
patchOperations4.add(operation5); patchOperations4.add(operation5);
JsonPatch jsonPatch4 = new JsonPatch(patchOperations4); JsonPatch jsonPatch4 = new JsonPatch(patchOperations4);
@ -268,34 +174,32 @@ public class UpstreamLineageTemplateTest {
DataMap dataMap4 = new DataMap(); DataMap dataMap4 = new DataMap();
dataMap4.put("confidenceScore", 1.0); dataMap4.put("confidenceScore", 1.0);
FineGrainedLineage fineGrainedLineage4 = new FineGrainedLineage(dataMap4); FineGrainedLineage fineGrainedLineage4 = new FineGrainedLineage(dataMap4);
UrnArray urns4 = new UrnArray(); fineGrainedLineage4.setUpstreams(upstreamUrns3);
Urn urn4 = fineGrainedLineage4.setDownstreams(urns3);
UrnUtils.getUrn("urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_2,PROD)");
urns4.add(urn4);
fineGrainedLineage4.setDownstreams(urns4);
fineGrainedLineage4.setTransformOperation("TRANSFORM"); fineGrainedLineage4.setTransformOperation("TRANSFORM");
fineGrainedLineage4.setDownstreamType(FineGrainedLineageDownstreamType.FIELD);
fineGrainedLineage4.setUpstreamType(FineGrainedLineageUpstreamType.FIELD_SET); fineGrainedLineage4.setUpstreamType(FineGrainedLineageUpstreamType.FIELD_SET);
fineGrainedLineage4.setDownstreamType(FineGrainedLineageDownstreamType.FIELD);
fineGrainedLineage4.setQuery(UrnUtils.getUrn("urn:li:query:anotherQuery"));
// New entry in array because of new transformation type // New entry in array because of new transformation type
Assert.assertEquals(result4.getFineGrainedLineages().get(2), fineGrainedLineage4); Assert.assertEquals(result4.getFineGrainedLineages().get(3), fineGrainedLineage4);
// Remove // Remove
JsonPatchOperation removeOperation = JsonPatchOperation removeOperation =
new RemoveOperation( new RemoveOperation(
new JsonPointer( new JsonPointer(
"/fineGrainedLineages/CREATE/downstreamType/FIELD_SET/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD),c1)")); "/fineGrainedLineages/CREATE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD),c1)/NONE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_2,PROD),c1)"));
JsonPatchOperation removeOperation2 = JsonPatchOperation removeOperation2 =
new RemoveOperation( new RemoveOperation(
new JsonPointer( new JsonPointer(
"/fineGrainedLineages/CREATE/downstreamType/FIELD_SET/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD),c2)")); "/fineGrainedLineages/CREATE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_2,PROD),c2)/urn:li:query:someQuery/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD),c2)"));
JsonPatchOperation removeOperation3 = JsonPatchOperation removeOperation3 =
new RemoveOperation( new RemoveOperation(
new JsonPointer( new JsonPointer(
"/fineGrainedLineages/CREATE/downstreamType/FIELD/urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD)")); "/fineGrainedLineages/CREATE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_2,PROD),c2)/urn:li:query:anotherQuery/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD),c2)"));
JsonPatchOperation removeOperation4 = JsonPatchOperation removeOperation4 =
new RemoveOperation( new RemoveOperation(
new JsonPointer( new JsonPointer(
"/fineGrainedLineages/TRANSFORM/downstreamType/FIELD/urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_2,PROD)")); "/fineGrainedLineages/TRANSFORM/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_2,PROD),c2)/urn:li:query:anotherQuery/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD),c2)"));
List<JsonPatchOperation> removeOperations = new ArrayList<>(); List<JsonPatchOperation> removeOperations = new ArrayList<>();
removeOperations.add(removeOperation); removeOperations.add(removeOperation);
@ -304,56 +208,6 @@ public class UpstreamLineageTemplateTest {
removeOperations.add(removeOperation4); removeOperations.add(removeOperation4);
JsonPatch removePatch = new JsonPatch(removeOperations); JsonPatch removePatch = new JsonPatch(removeOperations);
UpstreamLineage finalResult = upstreamLineageTemplate.applyPatch(result4, removePatch); UpstreamLineage finalResult = upstreamLineageTemplate.applyPatch(result4, removePatch);
Assert.assertEquals(upstreamLineageTemplate.getDefault(), finalResult); Assert.assertEquals(finalResult, upstreamLineageTemplate.getDefault());
}
@Test
public void testUpAndDown() throws Exception {
UpstreamLineageTemplate upstreamLineageTemplate = new UpstreamLineageTemplate();
UpstreamLineage upstreamLineage = upstreamLineageTemplate.getDefault();
List<JsonPatchOperation> patchOperations = new ArrayList<>();
NumericNode downstreamConfidenceScore = instance.numberNode(1.0f);
JsonPatchOperation operation =
new AddOperation(
new JsonPointer(
"/fineGrainedLineages/CREATE/downstreamType/FIELD_SET/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD),c1)"),
downstreamConfidenceScore);
patchOperations.add(operation);
NumericNode upstreamConfidenceScore = instance.numberNode(1.0f);
JsonPatchOperation operation2 =
new AddOperation(
new JsonPointer(
"/fineGrainedLineages/CREATE/upstreamType/FIELD_SET/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD),c1)"),
upstreamConfidenceScore);
patchOperations.add(operation2);
JsonPatch jsonPatch = new JsonPatch(patchOperations);
// Initial population test
UpstreamLineage result = upstreamLineageTemplate.applyPatch(upstreamLineage, jsonPatch);
// Hack because Jackson parses values to doubles instead of floats
DataMap dataMap = new DataMap();
dataMap.put("confidenceScore", 1.0);
FineGrainedLineage fineGrainedLineage = new FineGrainedLineage(dataMap);
UrnArray urns = new UrnArray();
Urn urn1 =
UrnUtils.getUrn(
"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD),c1)");
urns.add(urn1);
fineGrainedLineage.setTransformOperation("CREATE");
fineGrainedLineage.setUpstreams(urns);
fineGrainedLineage.setUpstreamType(FineGrainedLineageUpstreamType.FIELD_SET);
fineGrainedLineage.setDownstreamType(FineGrainedLineageDownstreamType.FIELD_SET);
fineGrainedLineage.setDownstreams(urns);
// Hack because Jackson parses values to doubles instead of floats
DataMap dataMap2 = new DataMap();
dataMap2.put("confidenceScore", 1.0);
FineGrainedLineage fineGrainedLineage2 = new FineGrainedLineage(dataMap2);
fineGrainedLineage2.setTransformOperation("CREATE");
fineGrainedLineage2.setUpstreamType(FineGrainedLineageUpstreamType.FIELD_SET);
fineGrainedLineage2.setDownstreamType(FineGrainedLineageDownstreamType.FIELD_SET);
fineGrainedLineage2.setDownstreams(urns);
Assert.assertEquals(result.getFineGrainedLineages().get(1), fineGrainedLineage2);
} }
} }

View File

@ -58,8 +58,8 @@ class MetadataPatchProposal:
self.patches = defaultdict(list) self.patches = defaultdict(list)
# Json Patch quoting based on https://jsonpatch.com/#json-pointer # Json Patch quoting based on https://jsonpatch.com/#json-pointer
@staticmethod @classmethod
def quote(value: str) -> str: def quote(cls, value: str) -> str:
return value.replace("~", "~0").replace("/", "~1") return value.replace("~", "~0").replace("/", "~1")
def _add_patch(self, aspect_name: str, op: str, path: str, value: Any) -> None: def _add_patch(self, aspect_name: str, op: str, path: str, value: Any) -> None:

View File

@ -6,8 +6,6 @@ from datahub.metadata.schema_classes import (
EditableDatasetPropertiesClass as EditableDatasetProperties, EditableDatasetPropertiesClass as EditableDatasetProperties,
EditableSchemaMetadataClass as EditableSchemaMetadata, EditableSchemaMetadataClass as EditableSchemaMetadata,
FineGrainedLineageClass as FineGrainedLineage, FineGrainedLineageClass as FineGrainedLineage,
FineGrainedLineageDownstreamTypeClass as FineGrainedLineageDownstreamType,
FineGrainedLineageUpstreamTypeClass as FineGrainedLineageUpstreamType,
GlobalTagsClass as GlobalTags, GlobalTagsClass as GlobalTags,
GlossaryTermAssociationClass as Term, GlossaryTermAssociationClass as Term,
GlossaryTermsClass as GlossaryTerms, GlossaryTermsClass as GlossaryTerms,
@ -126,7 +124,7 @@ class DatasetPatchBuilder(MetadataPatchProposal):
self._add_patch( self._add_patch(
UpstreamLineage.ASPECT_NAME, UpstreamLineage.ASPECT_NAME,
"add", "add",
path=f"/upstreams/{MetadataPatchProposal.quote(upstream.dataset)}", path=f"/upstreams/{self.quote(upstream.dataset)}",
value=upstream, value=upstream,
) )
return self return self
@ -153,26 +151,17 @@ class DatasetPatchBuilder(MetadataPatchProposal):
) -> "DatasetPatchBuilder": ) -> "DatasetPatchBuilder":
( (
transform_op, transform_op,
upstream_type, downstream_urn,
downstream_type, query_id,
) = DatasetPatchBuilder.get_fine_grained_key(fine_grained_lineage) ) = DatasetPatchBuilder.get_fine_grained_key(fine_grained_lineage)
for upstream_urn in fine_grained_lineage.upstreams or []: for upstream_urn in fine_grained_lineage.upstreams or []:
self._add_patch( self._add_patch(
UpstreamLineage.ASPECT_NAME, UpstreamLineage.ASPECT_NAME,
"add", "add",
path=DatasetPatchBuilder.quote_fine_grained_upstream_path( path=DatasetPatchBuilder.quote_fine_grained_path(
transform_op, upstream_type, upstream_urn transform_op, downstream_urn, query_id, upstream_urn
), ),
value=fine_grained_lineage.confidenceScore, value={"confidenceScore": fine_grained_lineage.confidenceScore},
)
for downstream_urn in fine_grained_lineage.downstreams or []:
self._add_patch(
UpstreamLineage.ASPECT_NAME,
"add",
path=DatasetPatchBuilder.quote_fine_grained_downstream_path(
transform_op, downstream_type, downstream_urn
),
value=fine_grained_lineage.confidenceScore,
) )
return self return self
@ -180,35 +169,21 @@ class DatasetPatchBuilder(MetadataPatchProposal):
def get_fine_grained_key( def get_fine_grained_key(
fine_grained_lineage: FineGrainedLineage, fine_grained_lineage: FineGrainedLineage,
) -> Tuple[str, str, str]: ) -> Tuple[str, str, str]:
downstreams = fine_grained_lineage.downstreams or []
if len(downstreams) != 1:
raise TypeError("Cannot patch with more or less than one downstream.")
transform_op = fine_grained_lineage.transformOperation or "NONE" transform_op = fine_grained_lineage.transformOperation or "NONE"
upstream_type = ( downstream_urn = downstreams[0]
fine_grained_lineage.upstreamType query_id = fine_grained_lineage.query or "NONE"
if isinstance(fine_grained_lineage.upstreamType, str) return transform_op, downstream_urn, query_id
else FineGrainedLineageUpstreamType.FIELD_SET
)
downstream_type = (
fine_grained_lineage.downstreamType
if isinstance(fine_grained_lineage.downstreamType, str)
else FineGrainedLineageDownstreamType.FIELD_SET
)
return transform_op, upstream_type, downstream_type
@staticmethod @classmethod
def quote_fine_grained_downstream_path( def quote_fine_grained_path(
transform_op: str, downstream_type: str, downstream_urn: str cls, transform_op: str, downstream_urn: str, query_id: str, upstream_urn: str
) -> str: ) -> str:
return ( return (
f"/fineGrainedLineages/{MetadataPatchProposal.quote(transform_op)}/downstreamType/" f"/fineGrainedLineages/{cls.quote(transform_op)}/"
f"{MetadataPatchProposal.quote(downstream_type)}/{MetadataPatchProposal.quote(downstream_urn)}" f"{cls.quote(downstream_urn)}/{cls.quote(query_id)}/{cls.quote(upstream_urn)}"
)
@staticmethod
def quote_fine_grained_upstream_path(
transform_op: str, upstream_type: str, upstream_urn: str
) -> str:
return (
f"/fineGrainedLineages/{MetadataPatchProposal.quote(transform_op)}/upstreamType/"
f"{MetadataPatchProposal.quote(upstream_type)}/{MetadataPatchProposal.quote(upstream_urn)}"
) )
def remove_fine_grained_upstream_lineage( def remove_fine_grained_upstream_lineage(
@ -216,24 +191,15 @@ class DatasetPatchBuilder(MetadataPatchProposal):
) -> "DatasetPatchBuilder": ) -> "DatasetPatchBuilder":
( (
transform_op, transform_op,
upstream_type, downstream_urn,
downstream_type, query_id,
) = DatasetPatchBuilder.get_fine_grained_key(fine_grained_lineage) ) = DatasetPatchBuilder.get_fine_grained_key(fine_grained_lineage)
for upstream_urn in fine_grained_lineage.upstreams or []: for upstream_urn in fine_grained_lineage.upstreams or []:
self._add_patch( self._add_patch(
UpstreamLineage.ASPECT_NAME, UpstreamLineage.ASPECT_NAME,
"remove", "remove",
path=DatasetPatchBuilder.quote_fine_grained_upstream_path( path=DatasetPatchBuilder.quote_fine_grained_path(
transform_op, upstream_type, upstream_urn transform_op, downstream_urn, query_id, upstream_urn
),
value={},
)
for downstream_urn in fine_grained_lineage.downstreams or []:
self._add_patch(
UpstreamLineage.ASPECT_NAME,
"remove",
path=DatasetPatchBuilder.quote_fine_grained_downstream_path(
transform_op, downstream_type, downstream_urn
), ),
value={}, value={},
) )

View File

@ -1,105 +1,109 @@
[ [
{ {
"entityType": "dataset", "entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created,PROD)", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created,PROD)",
"changeType": "PATCH", "changeType": "PATCH",
"aspectName": "datasetProperties", "aspectName": "datasetProperties",
"aspect": { "aspect": {
"json": [ "json": [
{ {
"op": "add", "op": "add",
"path": "/description", "path": "/description",
"value": "test description" "value": "test description"
}, },
{ {
"op": "add", "op": "add",
"path": "/customProperties/test_key_1", "path": "/customProperties/test_key_1",
"value": "test_value_1" "value": "test_value_1"
}, },
{ {
"op": "add", "op": "add",
"path": "/customProperties/test_key_2", "path": "/customProperties/test_key_2",
"value": "test_value_2" "value": "test_value_2"
} }
] ]
} }
}, },
{ {
"entityType": "dataset", "entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created,PROD)", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created,PROD)",
"changeType": "PATCH", "changeType": "PATCH",
"aspectName": "globalTags", "aspectName": "globalTags",
"aspect": { "aspect": {
"json": [ "json": [
{ {
"op": "add", "op": "add",
"path": "/tags/urn:li:tag:test_tag", "path": "/tags/urn:li:tag:test_tag",
"value": { "value": {
"tag": "urn:li:tag:test_tag" "tag": "urn:li:tag:test_tag"
} }
} }
] ]
} }
}, },
{ {
"entityType": "dataset", "entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created,PROD)", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created,PROD)",
"changeType": "PATCH", "changeType": "PATCH",
"aspectName": "upstreamLineage", "aspectName": "upstreamLineage",
"aspect": { "aspect": {
"json": [ "json": [
{ {
"op": "add", "op": "add",
"path": "/upstreams/urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created_upstream,PROD)", "path": "/upstreams/urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created_upstream,PROD)",
"value": { "value": {
"auditStamp": { "auditStamp": {
"time": 0, "time": 0,
"actor": "urn:li:corpuser:unknown" "actor": "urn:li:corpuser:unknown"
},
"dataset": "urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created_upstream,PROD)",
"type": "TRANSFORMED"
}
}, },
"dataset": "urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created_upstream,PROD)", {
"type": "TRANSFORMED" "op": "add",
} "path": "/upstreams/urn:li:dataset:(urn:li:dataPlatform:s3,my-bucket~1my-folder~1my-file.txt,PROD)",
}, "value": {
{ "auditStamp": {
"op": "add", "time": 0,
"path": "/upstreams/urn:li:dataset:(urn:li:dataPlatform:s3,my-bucket~1my-folder~1my-file.txt,PROD)", "actor": "urn:li:corpuser:unknown"
"value": { },
"auditStamp": { "dataset": "urn:li:dataset:(urn:li:dataPlatform:s3,my-bucket/my-folder/my-file.txt,PROD)",
"time": 0, "type": "TRANSFORMED"
"actor": "urn:li:corpuser:unknown" }
}, },
"dataset": "urn:li:dataset:(urn:li:dataPlatform:s3,my-bucket/my-folder/my-file.txt,PROD)", {
"type": "TRANSFORMED" "op": "add",
} "path": "/fineGrainedLineages/TRANSFORM/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created,PROD),foo)/NONE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created_upstream,PROD),bar)",
}, "value": {
{ "confidenceScore": 1.0
"op": "add", }
"path": "/fineGrainedLineages/TRANSFORM/upstreamType/DATASET/urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created_upstream,PROD)", },
"value": 1.0 {
}, "op": "add",
{ "path": "/fineGrainedLineages/NONE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created,PROD),foo)/NONE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,my-bucket~1my-folder~1my-file.txt,PROD),foo)",
"op": "add", "value": {
"path": "/fineGrainedLineages/TRANSFORM/upstreamType/DATASET/urn:li:dataset:(urn:li:dataPlatform:s3,my-bucket~1my-folder~1my-file.txt,PROD)", "confidenceScore": 1.0
"value": 1.0 }
} }
] ]
} }
}, },
{ {
"entityType": "dataset", "entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created,PROD)", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created,PROD)",
"changeType": "PATCH", "changeType": "PATCH",
"aspectName": "editableSchemaMetadata", "aspectName": "editableSchemaMetadata",
"aspect": { "aspect": {
"json": [ "json": [
{ {
"op": "add", "op": "add",
"path": "/editableSchemaFieldInfo/field1/globalTags/tags/urn:li:tag:tag1", "path": "/editableSchemaFieldInfo/field1/globalTags/tags/urn:li:tag:tag1",
"value": { "value": {
"tag": "urn:li:tag:tag1" "tag": "urn:li:tag:tag1"
} }
} }
] ]
} }
} }
] ]

View File

@ -1,4 +1,3 @@
import json
import pathlib import pathlib
import pytest import pytest
@ -7,6 +6,7 @@ from datahub.emitter.mce_builder import (
make_chart_urn, make_chart_urn,
make_dashboard_urn, make_dashboard_urn,
make_dataset_urn, make_dataset_urn,
make_schema_field_urn,
make_tag_urn, make_tag_urn,
) )
from datahub.ingestion.sink.file import write_metadata_file from datahub.ingestion.sink.file import write_metadata_file
@ -23,6 +23,7 @@ from datahub.metadata.schema_classes import (
from datahub.specific.chart import ChartPatchBuilder from datahub.specific.chart import ChartPatchBuilder
from datahub.specific.dashboard import DashboardPatchBuilder from datahub.specific.dashboard import DashboardPatchBuilder
from datahub.specific.dataset import DatasetPatchBuilder from datahub.specific.dataset import DatasetPatchBuilder
from tests.test_helpers import mce_helpers
def test_basic_dataset_patch_builder(): def test_basic_dataset_patch_builder():
@ -73,13 +74,28 @@ def test_complex_dataset_patch(
) )
.add_fine_grained_upstream_lineage( .add_fine_grained_upstream_lineage(
fine_grained_lineage=FineGrainedLineageClass( fine_grained_lineage=FineGrainedLineageClass(
upstreamType=FineGrainedLineageUpstreamTypeClass.DATASET, upstreamType=FineGrainedLineageUpstreamTypeClass.FIELD_SET,
upstreams=[ downstreams=[
make_dataset_urn( make_schema_field_urn(
platform="hive", name="fct_users_created_upstream", env="PROD" make_dataset_urn(
platform="hive",
name="fct_users_created",
env="PROD",
),
field_path="foo",
) )
], ],
downstreamType=FineGrainedLineageDownstreamTypeClass.FIELD_SET, upstreams=[
make_schema_field_urn(
make_dataset_urn(
platform="hive",
name="fct_users_created_upstream",
env="PROD",
),
field_path="bar",
)
],
downstreamType=FineGrainedLineageDownstreamTypeClass.FIELD,
transformOperation="TRANSFORM", transformOperation="TRANSFORM",
confidenceScore=1.0, confidenceScore=1.0,
) )
@ -88,15 +104,26 @@ def test_complex_dataset_patch(
fine_grained_lineage=FineGrainedLineageClass( fine_grained_lineage=FineGrainedLineageClass(
upstreamType=FineGrainedLineageUpstreamTypeClass.DATASET, upstreamType=FineGrainedLineageUpstreamTypeClass.DATASET,
upstreams=[ upstreams=[
make_dataset_urn( make_schema_field_urn(
platform="s3", make_dataset_urn(
name="my-bucket/my-folder/my-file.txt", platform="s3",
env="PROD", name="my-bucket/my-folder/my-file.txt",
env="PROD",
),
field_path="foo",
) )
], ],
downstreamType=FineGrainedLineageDownstreamTypeClass.FIELD_SET, downstreamType=FineGrainedLineageDownstreamTypeClass.FIELD_SET,
transformOperation="TRANSFORM", downstreams=[
confidenceScore=1.0, make_schema_field_urn(
make_dataset_urn(
platform="hive",
name="fct_users_created",
env="PROD",
),
field_path="foo",
)
],
) )
) )
) )
@ -105,10 +132,10 @@ def test_complex_dataset_patch(
out_path = tmp_path / "patch.json" out_path = tmp_path / "patch.json"
write_metadata_file(out_path, patcher.build()) write_metadata_file(out_path, patcher.build())
assert json.loads(out_path.read_text()) == json.loads( mce_helpers.check_golden_file(
( pytestconfig,
pytestconfig.rootpath / "tests/unit/patch/complex_dataset_patch.json" out_path,
).read_text() pytestconfig.rootpath / "tests/unit/patch/complex_dataset_patch.json",
) )

View File

@ -56,18 +56,20 @@ public class PatchTest {
DatasetUrn upstreamUrn = DatasetUrn upstreamUrn =
DatasetUrn.createFromString( DatasetUrn.createFromString(
"urn:li:dataset:(urn:li:dataPlatform:kafka,SampleKafkaDataset,PROD)"); "urn:li:dataset:(urn:li:dataPlatform:kafka,SampleKafkaDataset,PROD)");
Urn schemaFieldUrn = Urn upstreamSchemaFieldUrn =
UrnUtils.getUrn( UrnUtils.getUrn(
"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:kafka,SampleKafkaDataset,PROD), foo)"); "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:kafka,SampleKafkaDataset,PROD), foo)");
Urn downstreamSchemaFieldUrn =
UrnUtils.getUrn(
"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:kafka,SampleKafkaDataset,PROD), bar)");
MetadataChangeProposal upstreamPatch = MetadataChangeProposal upstreamPatch =
new UpstreamLineagePatchBuilder() new UpstreamLineagePatchBuilder()
.urn( .urn(
UrnUtils.getUrn( UrnUtils.getUrn(
"urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)")) "urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)"))
.addUpstream(upstreamUrn, DatasetLineageType.TRANSFORMED) .addUpstream(upstreamUrn, DatasetLineageType.TRANSFORMED)
.addFineGrainedUpstreamDataset(upstreamUrn, null, "TRANSFORM") .addFineGrainedUpstreamField(
.addFineGrainedUpstreamField(schemaFieldUrn, null, "TRANSFORM", null) upstreamSchemaFieldUrn, null, "TRANSFORM", downstreamSchemaFieldUrn, null)
.addFineGrainedDownstreamField(schemaFieldUrn, null, "TRANSFORM", null)
.build(); .build();
Future<MetadataWriteResponse> response = restEmitter.emit(upstreamPatch); Future<MetadataWriteResponse> response = restEmitter.emit(upstreamPatch);
@ -83,12 +85,12 @@ public class PatchTest {
public void testLocalUpstreamRemove() { public void testLocalUpstreamRemove() {
RestEmitter restEmitter = new RestEmitter(RestEmitterConfig.builder().build()); RestEmitter restEmitter = new RestEmitter(RestEmitterConfig.builder().build());
try { try {
DatasetUrn upstreamUrn = Urn upstreamSchemaFieldUrn =
DatasetUrn.createFromString(
"urn:li:dataset:(urn:li:dataPlatform:kafka,SampleKafkaDataset,PROD)");
Urn schemaFieldUrn =
UrnUtils.getUrn( UrnUtils.getUrn(
"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:kafka,SampleKafkaDataset,PROD), foo)"); "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:kafka,SampleKafkaDataset,PROD), foo)");
Urn downstreamSchemaFieldUrn =
UrnUtils.getUrn(
"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:kafka,SampleKafkaDataset,PROD), bar)");
MetadataChangeProposal upstreamPatch = MetadataChangeProposal upstreamPatch =
new UpstreamLineagePatchBuilder() new UpstreamLineagePatchBuilder()
.urn( .urn(
@ -97,9 +99,8 @@ public class PatchTest {
.removeUpstream( .removeUpstream(
DatasetUrn.createFromString( DatasetUrn.createFromString(
"urn:li:dataset:(urn:li:dataPlatform:kafka,SampleKafkaDataset,PROD)")) "urn:li:dataset:(urn:li:dataPlatform:kafka,SampleKafkaDataset,PROD)"))
.removeFineGrainedUpstreamDataset(upstreamUrn, "TRANSFORM") .removeFineGrainedUpstreamField(
.removeFineGrainedUpstreamField(schemaFieldUrn, "TRANSFORM", null) upstreamSchemaFieldUrn, "TRANSFORM", downstreamSchemaFieldUrn, null)
.removeFineGrainedDownstreamField(schemaFieldUrn, "TRANSFORM", null)
.build(); .build();
Future<MetadataWriteResponse> response = restEmitter.emit(upstreamPatch); Future<MetadataWriteResponse> response = restEmitter.emit(upstreamPatch);