2022-02-03 12:26:08 +05:30
|
|
|
import time
|
|
|
|
|
|
|
|
import datahub.emitter.mce_builder as builder
|
|
|
|
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
|
|
from datahub.emitter.rest_emitter import DatahubRestEmitter
|
|
|
|
from datahub.metadata.com.linkedin.pegasus2avro.assertion import (
|
|
|
|
AssertionInfo,
|
|
|
|
AssertionResult,
|
2022-03-04 11:51:31 -08:00
|
|
|
AssertionResultType,
|
|
|
|
AssertionRunEvent,
|
|
|
|
AssertionRunStatus,
|
|
|
|
AssertionStdAggregation,
|
2022-02-03 12:26:08 +05:30
|
|
|
AssertionStdOperator,
|
|
|
|
AssertionType,
|
2022-03-04 11:51:31 -08:00
|
|
|
DatasetAssertionInfo,
|
|
|
|
DatasetAssertionScope,
|
2022-02-03 12:26:08 +05:30
|
|
|
)
|
|
|
|
from datahub.metadata.com.linkedin.pegasus2avro.events.metadata import ChangeType
|
2022-03-04 11:51:31 -08:00
|
|
|
from datahub.metadata.schema_classes import AssertionRunEventClass, PartitionSpecClass
|
2022-02-03 12:26:08 +05:30
|
|
|
|
|
|
|
|
|
|
|
def datasetUrn(tbl: str) -> str:
|
|
|
|
return builder.make_dataset_urn("postgres", tbl)
|
|
|
|
|
|
|
|
|
|
|
|
def fldUrn(tbl: str, fld: str) -> str:
|
|
|
|
return f"urn:li:schemaField:({datasetUrn(tbl)}, {fld})"
|
|
|
|
|
|
|
|
|
|
|
|
def assertionUrn(info: AssertionInfo) -> str:
|
2022-03-04 11:51:31 -08:00
|
|
|
return "urn:li:assertion:432475190cc846f2894b5b3aa4d55af2"
|
2022-02-03 12:26:08 +05:30
|
|
|
|
|
|
|
|
2022-03-04 11:51:31 -08:00
|
|
|
def emitAssertionResult(assertionResult: AssertionResult) -> None:
|
2022-02-03 12:26:08 +05:30
|
|
|
|
2022-03-04 11:51:31 -08:00
|
|
|
dataset_assertionRunEvent_mcp = MetadataChangeProposalWrapper(
|
|
|
|
entityType="assertion",
|
2022-02-03 12:26:08 +05:30
|
|
|
changeType=ChangeType.UPSERT,
|
2022-03-04 11:51:31 -08:00
|
|
|
entityUrn=assertionResult.assertionUrn,
|
|
|
|
aspectName="assertionRunEvent",
|
2022-02-03 12:26:08 +05:30
|
|
|
aspect=assertionResult,
|
|
|
|
)
|
|
|
|
|
|
|
|
# Emit BatchAssertion Result! (timseries aspect)
|
2022-03-04 11:51:31 -08:00
|
|
|
emitter.emit_mcp(dataset_assertionRunEvent_mcp)
|
2022-02-03 12:26:08 +05:30
|
|
|
|
|
|
|
|
|
|
|
# Construct an assertion object.
|
|
|
|
assertion_maxVal = AssertionInfo(
|
2022-03-04 11:51:31 -08:00
|
|
|
type=AssertionType.DATASET,
|
|
|
|
datasetAssertion=DatasetAssertionInfo(
|
|
|
|
scope=DatasetAssertionScope.DATASET_COLUMN,
|
|
|
|
operator=AssertionStdOperator.LESS_THAN,
|
|
|
|
nativeType="column_value_is_less_than",
|
|
|
|
aggregation=AssertionStdAggregation.IDENTITY,
|
|
|
|
fields=[fldUrn("bazTable", "col1")],
|
|
|
|
dataset=datasetUrn("bazTable"),
|
|
|
|
nativeParameters={"max_value": "99"},
|
2022-02-03 12:26:08 +05:30
|
|
|
),
|
|
|
|
customProperties={"suite_name": "demo_suite"},
|
|
|
|
)
|
|
|
|
|
|
|
|
# Construct a MetadataChangeProposalWrapper object.
|
|
|
|
assertion_maxVal_mcp = MetadataChangeProposalWrapper(
|
|
|
|
entityType="assertion",
|
|
|
|
changeType=ChangeType.UPSERT,
|
|
|
|
entityUrn=assertionUrn(assertion_maxVal),
|
|
|
|
aspectName="assertionInfo",
|
|
|
|
aspect=assertion_maxVal,
|
|
|
|
)
|
|
|
|
|
|
|
|
# Create an emitter to the GMS REST API.
|
|
|
|
emitter = DatahubRestEmitter("http://localhost:8080")
|
|
|
|
|
|
|
|
# Emit Assertion entity info object!
|
|
|
|
emitter.emit_mcp(assertion_maxVal_mcp)
|
|
|
|
|
|
|
|
# Construct batch assertion result object for partition 1 batch
|
2022-03-04 11:51:31 -08:00
|
|
|
assertionResult_maxVal_batch_partition1 = AssertionRunEvent(
|
2022-02-03 12:26:08 +05:30
|
|
|
timestampMillis=int(time.time() * 1000),
|
|
|
|
assertionUrn=assertionUrn(assertion_maxVal),
|
2022-03-04 11:51:31 -08:00
|
|
|
asserteeUrn=datasetUrn("bazTable"),
|
2022-02-03 12:26:08 +05:30
|
|
|
partitionSpec=PartitionSpecClass(partition=str([{"country": "IN"}])),
|
2022-03-04 11:51:31 -08:00
|
|
|
runId="uuid1",
|
|
|
|
status=AssertionRunStatus.COMPLETE,
|
|
|
|
result=AssertionResult(
|
|
|
|
type=AssertionResultType.SUCCESS,
|
2022-02-03 12:26:08 +05:30
|
|
|
externalUrl="http://example.com/uuid1",
|
|
|
|
actualAggValue=90,
|
|
|
|
),
|
|
|
|
)
|
|
|
|
|
|
|
|
emitAssertionResult(
|
|
|
|
assertionResult_maxVal_batch_partition1,
|
|
|
|
)
|
|
|
|
|
|
|
|
# Construct batch assertion result object for partition 2 batch
|
2022-03-04 11:51:31 -08:00
|
|
|
assertionResult_maxVal_batch_partition2 = AssertionRunEventClass(
|
2022-02-03 12:26:08 +05:30
|
|
|
timestampMillis=int(time.time() * 1000),
|
|
|
|
assertionUrn=assertionUrn(assertion_maxVal),
|
2022-03-04 11:51:31 -08:00
|
|
|
asserteeUrn=datasetUrn("bazTable"),
|
2022-02-03 12:26:08 +05:30
|
|
|
partitionSpec=PartitionSpecClass(partition=str([{"country": "US"}])),
|
2022-03-04 11:51:31 -08:00
|
|
|
runId="uuid1",
|
|
|
|
status=AssertionRunStatus.COMPLETE,
|
|
|
|
result=AssertionResult(
|
|
|
|
type=AssertionResultType.FAILURE,
|
2022-02-03 12:26:08 +05:30
|
|
|
externalUrl="http://example.com/uuid1",
|
|
|
|
actualAggValue=101,
|
|
|
|
),
|
|
|
|
)
|
|
|
|
|
|
|
|
emitAssertionResult(
|
|
|
|
assertionResult_maxVal_batch_partition2,
|
|
|
|
)
|
|
|
|
|
|
|
|
# Construct batch assertion result object for full table batch.
|
2022-03-04 11:51:31 -08:00
|
|
|
assertionResult_maxVal_batch_fulltable = AssertionRunEventClass(
|
2022-02-03 12:26:08 +05:30
|
|
|
timestampMillis=int(time.time() * 1000),
|
|
|
|
assertionUrn=assertionUrn(assertion_maxVal),
|
2022-03-04 11:51:31 -08:00
|
|
|
asserteeUrn=datasetUrn("bazTable"),
|
|
|
|
runId="uuid1",
|
|
|
|
status=AssertionRunStatus.COMPLETE,
|
|
|
|
result=AssertionResult(
|
|
|
|
type=AssertionResultType.SUCCESS,
|
2022-02-03 12:26:08 +05:30
|
|
|
externalUrl="http://example.com/uuid1",
|
|
|
|
actualAggValue=93,
|
|
|
|
),
|
|
|
|
)
|
|
|
|
|
|
|
|
emitAssertionResult(
|
|
|
|
assertionResult_maxVal_batch_fulltable,
|
|
|
|
)
|