mirror of
https://github.com/datahub-project/datahub.git
synced 2025-07-09 18:24:48 +00:00

Co-authored-by: Ravindra Lanka <rlanka@acryl.io> Co-authored-by: Mayuri N <mayuri.nehate@gslab.com>
135 lines
4.1 KiB
Python
135 lines
4.1 KiB
Python
import time
|
|
|
|
import datahub.emitter.mce_builder as builder
|
|
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
from datahub.emitter.rest_emitter import DatahubRestEmitter
|
|
from datahub.metadata.com.linkedin.pegasus2avro.assertion import (
|
|
AssertionInfo,
|
|
AssertionResult,
|
|
AssertionScope,
|
|
AssertionStdOperator,
|
|
AssertionType,
|
|
BatchAssertionResult,
|
|
DatasetColumnAssertion,
|
|
DatasetColumnStdAggFunc,
|
|
)
|
|
from datahub.metadata.com.linkedin.pegasus2avro.events.metadata import ChangeType
|
|
from datahub.metadata.schema_classes import PartitionSpecClass
|
|
|
|
|
|
def datasetUrn(tbl: str) -> str:
|
|
return builder.make_dataset_urn("postgres", tbl)
|
|
|
|
|
|
def fldUrn(tbl: str, fld: str) -> str:
|
|
return f"urn:li:schemaField:({datasetUrn(tbl)}, {fld})"
|
|
|
|
|
|
def assertionUrn(info: AssertionInfo) -> str:
|
|
assertionId = builder.datahub_guid(info.to_obj())
|
|
return builder.make_assertion_urn(assertionId)
|
|
|
|
|
|
def emitAssertionResult(
|
|
assertionResult: AssertionResult, datasetUrn: str
|
|
) -> None:
|
|
|
|
dataset_assertionResult_mcp = MetadataChangeProposalWrapper(
|
|
entityType="dataset",
|
|
changeType=ChangeType.UPSERT,
|
|
entityUrn=datasetUrn,
|
|
aspectName="assertionResult",
|
|
aspect=assertionResult,
|
|
)
|
|
|
|
# Emit BatchAssertion Result! (timseries aspect)
|
|
emitter.emit_mcp(dataset_assertionResult_mcp)
|
|
|
|
|
|
# Construct an assertion object.
|
|
assertion_maxVal = AssertionInfo(
|
|
datasetFields=[fldUrn("fooTable", "col1")],
|
|
datasets=[datasetUrn("fooTable")],
|
|
assertionType=AssertionType(
|
|
scope=AssertionScope.DATASET_COLUMN,
|
|
datasetColumnAssertion=DatasetColumnAssertion(
|
|
stdOperator=AssertionStdOperator.LESS_THAN,
|
|
nativeOperator="column_value_is_less_than",
|
|
stdAggFunc=DatasetColumnStdAggFunc.IDENTITY,
|
|
),
|
|
),
|
|
assertionParameters={"max_value": "99"},
|
|
customProperties={"suite_name": "demo_suite"},
|
|
)
|
|
|
|
# Construct a MetadataChangeProposalWrapper object.
|
|
assertion_maxVal_mcp = MetadataChangeProposalWrapper(
|
|
entityType="assertion",
|
|
changeType=ChangeType.UPSERT,
|
|
entityUrn=assertionUrn(assertion_maxVal),
|
|
aspectName="assertionInfo",
|
|
aspect=assertion_maxVal,
|
|
)
|
|
|
|
# Create an emitter to the GMS REST API.
|
|
emitter = DatahubRestEmitter("http://localhost:8080")
|
|
|
|
# Emit Assertion entity info object!
|
|
emitter.emit_mcp(assertion_maxVal_mcp)
|
|
|
|
# Construct batch assertion result object for partition 1 batch
|
|
assertionResult_maxVal_batch_partition1 = AssertionResult(
|
|
timestampMillis=int(time.time() * 1000),
|
|
assertionUrn=assertionUrn(assertion_maxVal),
|
|
asserteeUrn=datasetUrn("fooTable"),
|
|
partitionSpec=PartitionSpecClass(partition=str([{"country": "IN"}])),
|
|
nativeEvaluatorRunId="uuid1",
|
|
batchAssertionResult=BatchAssertionResult(
|
|
success=True,
|
|
externalUrl="http://example.com/uuid1",
|
|
actualAggValue=90,
|
|
),
|
|
)
|
|
|
|
emitAssertionResult(
|
|
assertionResult_maxVal_batch_partition1,
|
|
datasetUrn("fooTable"),
|
|
)
|
|
|
|
# Construct batch assertion result object for partition 2 batch
|
|
assertionResult_maxVal_batch_partition2 = AssertionResult(
|
|
timestampMillis=int(time.time() * 1000),
|
|
assertionUrn=assertionUrn(assertion_maxVal),
|
|
asserteeUrn=datasetUrn("fooTable"),
|
|
partitionSpec=PartitionSpecClass(partition=str([{"country": "US"}])),
|
|
nativeEvaluatorRunId="uuid1",
|
|
batchAssertionResult=BatchAssertionResult(
|
|
success=False,
|
|
externalUrl="http://example.com/uuid1",
|
|
actualAggValue=101,
|
|
),
|
|
)
|
|
|
|
emitAssertionResult(
|
|
assertionResult_maxVal_batch_partition2,
|
|
datasetUrn("fooTable"),
|
|
)
|
|
|
|
# Construct batch assertion result object for full table batch.
|
|
assertionResult_maxVal_batch_fulltable = AssertionResult(
|
|
timestampMillis=int(time.time() * 1000),
|
|
assertionUrn=assertionUrn(assertion_maxVal),
|
|
asserteeUrn=datasetUrn("fooTable"),
|
|
nativeEvaluatorRunId="uuid1",
|
|
batchAssertionResult=BatchAssertionResult(
|
|
success=True,
|
|
externalUrl="http://example.com/uuid1",
|
|
actualAggValue=93,
|
|
),
|
|
)
|
|
|
|
emitAssertionResult(
|
|
assertionResult_maxVal_batch_fulltable,
|
|
datasetUrn("fooTable"),
|
|
)
|