feat(model): data quality model (#3787)

Co-authored-by: Ravindra Lanka <rlanka@acryl.io>
Co-authored-by: Mayuri N <mayuri.nehate@gslab.com>
This commit is contained in:
ksrinath 2022-02-03 12:26:08 +05:30 committed by GitHub
parent ded16809da
commit 4c24f386a6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
30 changed files with 1034 additions and 51 deletions

View File

@ -0,0 +1,134 @@
import time
import datahub.emitter.mce_builder as builder
from datahub.emitter.mcp import MetadataChangeProposalWrapper
from datahub.emitter.rest_emitter import DatahubRestEmitter
from datahub.metadata.com.linkedin.pegasus2avro.assertion import (
AssertionInfo,
AssertionResult,
AssertionScope,
AssertionStdOperator,
AssertionType,
BatchAssertionResult,
DatasetColumnAssertion,
DatasetColumnStdAggFunc,
)
from datahub.metadata.com.linkedin.pegasus2avro.events.metadata import ChangeType
from datahub.metadata.schema_classes import PartitionSpecClass
def datasetUrn(tbl: str) -> str:
return builder.make_dataset_urn("postgres", tbl)
def fldUrn(tbl: str, fld: str) -> str:
return f"urn:li:schemaField:({datasetUrn(tbl)}, {fld})"
def assertionUrn(info: AssertionInfo) -> str:
assertionId = builder.datahub_guid(info.to_obj())
return builder.make_assertion_urn(assertionId)
def emitAssertionResult(
assertionResult: AssertionResult, datasetUrn: str
) -> None:
dataset_assertionResult_mcp = MetadataChangeProposalWrapper(
entityType="dataset",
changeType=ChangeType.UPSERT,
entityUrn=datasetUrn,
aspectName="assertionResult",
aspect=assertionResult,
)
# Emit BatchAssertion Result! (timseries aspect)
emitter.emit_mcp(dataset_assertionResult_mcp)
# Construct an assertion object.
assertion_maxVal = AssertionInfo(
datasetFields=[fldUrn("fooTable", "col1")],
datasets=[datasetUrn("fooTable")],
assertionType=AssertionType(
scope=AssertionScope.DATASET_COLUMN,
datasetColumnAssertion=DatasetColumnAssertion(
stdOperator=AssertionStdOperator.LESS_THAN,
nativeOperator="column_value_is_less_than",
stdAggFunc=DatasetColumnStdAggFunc.IDENTITY,
),
),
assertionParameters={"max_value": "99"},
customProperties={"suite_name": "demo_suite"},
)
# Construct a MetadataChangeProposalWrapper object.
assertion_maxVal_mcp = MetadataChangeProposalWrapper(
entityType="assertion",
changeType=ChangeType.UPSERT,
entityUrn=assertionUrn(assertion_maxVal),
aspectName="assertionInfo",
aspect=assertion_maxVal,
)
# Create an emitter to the GMS REST API.
emitter = DatahubRestEmitter("http://localhost:8080")
# Emit Assertion entity info object!
emitter.emit_mcp(assertion_maxVal_mcp)
# Construct batch assertion result object for partition 1 batch
assertionResult_maxVal_batch_partition1 = AssertionResult(
timestampMillis=int(time.time() * 1000),
assertionUrn=assertionUrn(assertion_maxVal),
asserteeUrn=datasetUrn("fooTable"),
partitionSpec=PartitionSpecClass(partition=str([{"country": "IN"}])),
nativeEvaluatorRunId="uuid1",
batchAssertionResult=BatchAssertionResult(
success=True,
externalUrl="http://example.com/uuid1",
actualAggValue=90,
),
)
emitAssertionResult(
assertionResult_maxVal_batch_partition1,
datasetUrn("fooTable"),
)
# Construct batch assertion result object for partition 2 batch
assertionResult_maxVal_batch_partition2 = AssertionResult(
timestampMillis=int(time.time() * 1000),
assertionUrn=assertionUrn(assertion_maxVal),
asserteeUrn=datasetUrn("fooTable"),
partitionSpec=PartitionSpecClass(partition=str([{"country": "US"}])),
nativeEvaluatorRunId="uuid1",
batchAssertionResult=BatchAssertionResult(
success=False,
externalUrl="http://example.com/uuid1",
actualAggValue=101,
),
)
emitAssertionResult(
assertionResult_maxVal_batch_partition2,
datasetUrn("fooTable"),
)
# Construct batch assertion result object for full table batch.
assertionResult_maxVal_batch_fulltable = AssertionResult(
timestampMillis=int(time.time() * 1000),
assertionUrn=assertionUrn(assertion_maxVal),
asserteeUrn=datasetUrn("fooTable"),
nativeEvaluatorRunId="uuid1",
batchAssertionResult=BatchAssertionResult(
success=True,
externalUrl="http://example.com/uuid1",
actualAggValue=93,
),
)
emitAssertionResult(
assertionResult_maxVal_batch_fulltable,
datasetUrn("fooTable"),
)

View File

@ -1,14 +1,17 @@
"""Convenience functions for creating MCEs"""
import json
import logging
import re
import time
from enum import Enum
from hashlib import md5
from typing import Any, List, Optional, Type, TypeVar, Union, cast, get_type_hints
import typing_inspect
from avrogen.dict_wrapper import DictWrapper
from datahub.configuration.source_common import DEFAULT_ENV as DEFAULT_ENV_CONFIGURATION
from datahub.emitter.serialization_helper import pre_json_transform
from datahub.metadata.com.linkedin.pegasus2avro.common import GlossaryTerms
from datahub.metadata.schema_classes import (
AuditStampClass,
@ -82,6 +85,18 @@ def dataset_urn_to_key(dataset_urn: str) -> Optional[DatasetKeyClass]:
return None
def datahub_guid(obj: dict) -> str:
obj_str = json.dumps(
pre_json_transform(obj), separators=(",", ":"), sort_keys=True
).encode("utf-8")
datahub_guid = md5(obj_str).hexdigest()
return datahub_guid
def make_assertion_urn(assertion_id: str) -> str:
return f"urn:li:assertion:{assertion_id}"
def make_user_urn(username: str) -> str:
return f"urn:li:corpuser:{username}"

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -1117,7 +1117,7 @@
"changeType": "UPSERT",
"aspectName": "datasetProfile",
"aspect": {
"value": "{\"timestampMillis\": 1586847600000, \"rowCount\": 10, \"columnCount\": 6, \"fieldProfiles\": [{\"fieldPath\": \"emp_no\", \"uniqueCount\": 10, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"10001\", \"10002\", \"10003\", \"10004\", \"10005\", \"10006\", \"10007\", \"10008\", \"10009\", \"10010\"]}, {\"fieldPath\": \"birth_date\", \"uniqueCount\": 10, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"min\": \"1952-04-19\", \"max\": \"1964-06-02\", \"sampleValues\": [\"1953-09-02\", \"1964-06-02\", \"1959-12-03\", \"1954-05-01\", \"1955-01-21\", \"1953-04-20\", \"1957-05-23\", \"1958-02-19\", \"1952-04-19\", \"1963-06-01\"]}, {\"fieldPath\": \"first_name\", \"uniqueCount\": 10, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"Georgi\", \"Bezalel\", \"Parto\", \"Chirstian\", \"Kyoichi\", \"Anneke\", \"Tzvetan\", \"Saniya\", \"Sumant\", \"Duangkaew\"]}, {\"fieldPath\": \"last_name\", \"uniqueCount\": 10, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"Facello\", \"Simmel\", \"Bamford\", \"Koblick\", \"Maliniak\", \"Preusig\", \"Zielinski\", \"Kalloufi\", \"Peac\", \"Piveteau\"]}, {\"fieldPath\": \"gender\", \"uniqueCount\": 2, \"uniqueProportion\": 0.2, \"nullCount\": 0, \"nullProportion\": 0.0, \"distinctValueFrequencies\": [{\"value\": \"M\", \"frequency\": 5}, {\"value\": \"F\", \"frequency\": 5}], \"sampleValues\": [\"M\", \"F\", \"M\", \"M\", \"M\", \"F\", \"F\", \"M\", \"F\", \"F\"]}, {\"fieldPath\": \"hire_date\", \"uniqueCount\": 10, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"min\": \"1985-02-18\", \"max\": \"1994-09-15\", \"sampleValues\": [\"1986-06-26\", \"1985-11-21\", \"1986-08-28\", \"1986-12-01\", \"1989-09-12\", \"1989-06-02\", \"1989-02-10\", \"1994-09-15\", \"1985-02-18\", \"1989-08-24\"]}]}",
"value": "{\"timestampMillis\": 1586847600000, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"rowCount\": 10, \"columnCount\": 6, \"fieldProfiles\": [{\"fieldPath\": \"emp_no\", \"uniqueCount\": 10, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"10001\", \"10002\", \"10003\", \"10004\", \"10005\", \"10006\", \"10007\", \"10008\", \"10009\", \"10010\"]}, {\"fieldPath\": \"birth_date\", \"uniqueCount\": 10, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"min\": \"1952-04-19\", \"max\": \"1964-06-02\", \"sampleValues\": [\"1953-09-02\", \"1964-06-02\", \"1959-12-03\", \"1954-05-01\", \"1955-01-21\", \"1953-04-20\", \"1957-05-23\", \"1958-02-19\", \"1952-04-19\", \"1963-06-01\"]}, {\"fieldPath\": \"first_name\", \"uniqueCount\": 10, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"Georgi\", \"Bezalel\", \"Parto\", \"Chirstian\", \"Kyoichi\", \"Anneke\", \"Tzvetan\", \"Saniya\", \"Sumant\", \"Duangkaew\"]}, {\"fieldPath\": \"last_name\", \"uniqueCount\": 10, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"Facello\", \"Simmel\", \"Bamford\", \"Koblick\", \"Maliniak\", \"Preusig\", \"Zielinski\", \"Kalloufi\", \"Peac\", \"Piveteau\"]}, {\"fieldPath\": \"gender\", \"uniqueCount\": 2, \"uniqueProportion\": 0.2, \"nullCount\": 0, \"nullProportion\": 0.0, \"distinctValueFrequencies\": [{\"value\": \"M\", \"frequency\": 5}, {\"value\": \"F\", \"frequency\": 5}], \"sampleValues\": [\"M\", \"F\", \"M\", \"M\", \"M\", \"F\", \"F\", \"M\", \"F\", \"F\"]}, {\"fieldPath\": \"hire_date\", \"uniqueCount\": 10, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"min\": \"1985-02-18\", \"max\": \"1994-09-15\", \"sampleValues\": [\"1986-06-26\", \"1985-11-21\", \"1986-08-28\", \"1986-12-01\", \"1989-09-12\", \"1989-06-02\", \"1989-02-10\", \"1994-09-15\", \"1985-02-18\", \"1989-08-24\"]}]}",
"contentType": "application/json"
},
"systemMetadata": {
@ -1136,7 +1136,7 @@
"changeType": "UPSERT",
"aspectName": "datasetProfile",
"aspect": {
"value": "{\"timestampMillis\": 1586847600000, \"rowCount\": 112, \"columnCount\": 4, \"fieldProfiles\": [{\"fieldPath\": \"emp_no\", \"uniqueCount\": 10, \"uniqueProportion\": 0.08928571428571429, \"nullCount\": 0, \"nullProportion\": 0.0, \"min\": \"10001\", \"max\": \"10010\", \"mean\": \"10005.3125\", \"median\": \"10005.0\", \"stdev\": \"2.834889609688869\", \"distinctValueFrequencies\": [{\"value\": \"10001\", \"frequency\": 17}, {\"value\": \"10002\", \"frequency\": 6}, {\"value\": \"10003\", \"frequency\": 7}, {\"value\": \"10004\", \"frequency\": 16}, {\"value\": \"10005\", \"frequency\": 13}, {\"value\": \"10006\", \"frequency\": 12}, {\"value\": \"10007\", \"frequency\": 14}, {\"value\": \"10008\", \"frequency\": 3}, {\"value\": \"10009\", \"frequency\": 18}, {\"value\": \"10010\", \"frequency\": 6}], \"sampleValues\": [\"10001\", \"10001\", \"10001\", \"10001\", \"10001\", \"10001\", \"10001\", \"10001\", \"10001\", \"10001\", \"10001\", \"10001\", \"10001\", \"10001\", \"10001\", \"10001\", \"10001\", \"10002\", \"10002\", \"10002\"]}, {\"fieldPath\": \"salary\", \"uniqueCount\": 111, \"uniqueProportion\": 0.9910714285714286, \"nullCount\": 0, \"nullProportion\": 0.0, \"min\": \"40000\", \"max\": \"94692\", \"mean\": \"68303.11607142857\", \"median\": \"69544.0\", \"stdev\": \"15505.291475014095\", \"sampleValues\": [\"60117\", \"62102\", \"66074\", \"66596\", \"66961\", \"71046\", \"74333\", \"75286\", \"75994\", \"76884\", \"80013\", \"81025\", \"81097\", \"84917\", \"85112\", \"85097\", \"88958\", \"65909\", \"65909\", \"67534\"]}, {\"fieldPath\": \"from_date\", \"uniqueCount\": 106, \"uniqueProportion\": 0.9464285714285714, \"nullCount\": 0, \"nullProportion\": 0.0, \"min\": \"1985-02-18\", \"max\": \"2002-06-22\", \"sampleValues\": [\"1986-06-26\", \"1987-06-26\", \"1988-06-25\", \"1989-06-25\", \"1990-06-25\", \"1991-06-25\", \"1992-06-24\", \"1993-06-24\", \"1994-06-24\", \"1995-06-24\", \"1996-06-23\", \"1997-06-23\", \"1998-06-23\", \"1999-06-23\", \"2000-06-22\", \"2001-06-22\", \"2002-06-22\", \"1996-08-03\", \"1997-08-03\", \"1998-08-03\"]}, {\"fieldPath\": \"to_date\", \"uniqueCount\": 99, \"uniqueProportion\": 0.8839285714285714, \"nullCount\": 0, \"nullProportion\": 0.0, \"min\": \"1986-02-18\", \"max\": \"9999-01-01\", \"sampleValues\": [\"1987-06-26\", \"1988-06-25\", \"1989-06-25\", \"1990-06-25\", \"1991-06-25\", \"1992-06-24\", \"1993-06-24\", \"1994-06-24\", \"1995-06-24\", \"1996-06-23\", \"1997-06-23\", \"1998-06-23\", \"1999-06-23\", \"2000-06-22\", \"2001-06-22\", \"2002-06-22\", \"9999-01-01\", \"1997-08-03\", \"1998-08-03\", \"1999-08-03\"]}]}",
"value": "{\"timestampMillis\": 1586847600000, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"rowCount\": 112, \"columnCount\": 4, \"fieldProfiles\": [{\"fieldPath\": \"emp_no\", \"uniqueCount\": 10, \"uniqueProportion\": 0.08928571428571429, \"nullCount\": 0, \"nullProportion\": 0.0, \"min\": \"10001\", \"max\": \"10010\", \"mean\": \"10005.3125\", \"median\": \"10005.0\", \"stdev\": \"2.834889609688869\", \"distinctValueFrequencies\": [{\"value\": \"10001\", \"frequency\": 17}, {\"value\": \"10002\", \"frequency\": 6}, {\"value\": \"10003\", \"frequency\": 7}, {\"value\": \"10004\", \"frequency\": 16}, {\"value\": \"10005\", \"frequency\": 13}, {\"value\": \"10006\", \"frequency\": 12}, {\"value\": \"10007\", \"frequency\": 14}, {\"value\": \"10008\", \"frequency\": 3}, {\"value\": \"10009\", \"frequency\": 18}, {\"value\": \"10010\", \"frequency\": 6}], \"sampleValues\": [\"10001\", \"10001\", \"10001\", \"10001\", \"10001\", \"10001\", \"10001\", \"10001\", \"10001\", \"10001\", \"10001\", \"10001\", \"10001\", \"10001\", \"10001\", \"10001\", \"10001\", \"10002\", \"10002\", \"10002\"]}, {\"fieldPath\": \"salary\", \"uniqueCount\": 111, \"uniqueProportion\": 0.9910714285714286, \"nullCount\": 0, \"nullProportion\": 0.0, \"min\": \"40000\", \"max\": \"94692\", \"mean\": \"68303.11607142857\", \"median\": \"69544.0\", \"stdev\": \"15505.291475014095\", \"sampleValues\": [\"60117\", \"62102\", \"66074\", \"66596\", \"66961\", \"71046\", \"74333\", \"75286\", \"75994\", \"76884\", \"80013\", \"81025\", \"81097\", \"84917\", \"85112\", \"85097\", \"88958\", \"65909\", \"65909\", \"67534\"]}, {\"fieldPath\": \"from_date\", \"uniqueCount\": 106, \"uniqueProportion\": 0.9464285714285714, \"nullCount\": 0, \"nullProportion\": 0.0, \"min\": \"1985-02-18\", \"max\": \"2002-06-22\", \"sampleValues\": [\"1986-06-26\", \"1987-06-26\", \"1988-06-25\", \"1989-06-25\", \"1990-06-25\", \"1991-06-25\", \"1992-06-24\", \"1993-06-24\", \"1994-06-24\", \"1995-06-24\", \"1996-06-23\", \"1997-06-23\", \"1998-06-23\", \"1999-06-23\", \"2000-06-22\", \"2001-06-22\", \"2002-06-22\", \"1996-08-03\", \"1997-08-03\", \"1998-08-03\"]}, {\"fieldPath\": \"to_date\", \"uniqueCount\": 99, \"uniqueProportion\": 0.8839285714285714, \"nullCount\": 0, \"nullProportion\": 0.0, \"min\": \"1986-02-18\", \"max\": \"9999-01-01\", \"sampleValues\": [\"1987-06-26\", \"1988-06-25\", \"1989-06-25\", \"1990-06-25\", \"1991-06-25\", \"1992-06-24\", \"1993-06-24\", \"1994-06-24\", \"1995-06-24\", \"1996-06-23\", \"1997-06-23\", \"1998-06-23\", \"1999-06-23\", \"2000-06-22\", \"2001-06-22\", \"2002-06-22\", \"9999-01-01\", \"1997-08-03\", \"1998-08-03\", \"1999-08-03\"]}]}",
"contentType": "application/json"
},
"systemMetadata": {
@ -1155,7 +1155,7 @@
"changeType": "UPSERT",
"aspectName": "datasetProfile",
"aspect": {
"value": "{\"timestampMillis\": 1586847600000, \"rowCount\": 5, \"columnCount\": 6, \"fieldProfiles\": [{\"fieldPath\": \"id\", \"uniqueCount\": 5, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"1\", \"2\", \"3\", \"4\", \"5\"]}, {\"fieldPath\": \"company\", \"uniqueCount\": 5, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"Company A\", \"Company B\", \"Company C\", \"Company D\", \"Company E\"]}, {\"fieldPath\": \"last_name\", \"uniqueCount\": 5, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"Axen\", \"Bedecs\", \"Donnell\", \"Gratacos Solsona\", \"Lee\"]}, {\"fieldPath\": \"first_name\", \"uniqueCount\": 5, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"Anna\", \"Antonio\", \"Christina\", \"Martin\", \"Thomas\"]}, {\"fieldPath\": \"email_address\", \"uniqueCount\": 0, \"nullCount\": 5, \"nullProportion\": 1.0, \"sampleValues\": []}, {\"fieldPath\": \"priority\", \"uniqueCount\": 3, \"uniqueProportion\": 0.75, \"nullCount\": 1, \"nullProportion\": 0.2, \"min\": \"3.8\", \"max\": \"4.9\", \"mean\": \"4.175000011920929\", \"median\": \"4.0\", \"distinctValueFrequencies\": [{\"value\": \"3.8\", \"frequency\": 1}, {\"value\": \"4.0\", \"frequency\": 2}, {\"value\": \"4.9\", \"frequency\": 1}], \"sampleValues\": [\"4.0\", \"4.9\", \"4.0\", \"3.8\"]}]}",
"value": "{\"timestampMillis\": 1586847600000, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"rowCount\": 5, \"columnCount\": 6, \"fieldProfiles\": [{\"fieldPath\": \"id\", \"uniqueCount\": 5, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"1\", \"2\", \"3\", \"4\", \"5\"]}, {\"fieldPath\": \"company\", \"uniqueCount\": 5, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"Company A\", \"Company B\", \"Company C\", \"Company D\", \"Company E\"]}, {\"fieldPath\": \"last_name\", \"uniqueCount\": 5, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"Axen\", \"Bedecs\", \"Donnell\", \"Gratacos Solsona\", \"Lee\"]}, {\"fieldPath\": \"first_name\", \"uniqueCount\": 5, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"Anna\", \"Antonio\", \"Christina\", \"Martin\", \"Thomas\"]}, {\"fieldPath\": \"email_address\", \"uniqueCount\": 0, \"nullCount\": 5, \"nullProportion\": 1.0, \"sampleValues\": []}, {\"fieldPath\": \"priority\", \"uniqueCount\": 3, \"uniqueProportion\": 0.75, \"nullCount\": 1, \"nullProportion\": 0.2, \"min\": \"3.8\", \"max\": \"4.9\", \"mean\": \"4.175000011920929\", \"median\": \"4.0\", \"distinctValueFrequencies\": [{\"value\": \"3.8\", \"frequency\": 1}, {\"value\": \"4.0\", \"frequency\": 2}, {\"value\": \"4.9\", \"frequency\": 1}], \"sampleValues\": [\"4.0\", \"4.9\", \"4.0\", \"3.8\"]}]}",
"contentType": "application/json"
},
"systemMetadata": {
@ -1174,7 +1174,7 @@
"changeType": "UPSERT",
"aspectName": "datasetProfile",
"aspect": {
"value": "{\"timestampMillis\": 1586847600000, \"rowCount\": 0, \"columnCount\": 3, \"fieldProfiles\": [{\"fieldPath\": \"id\", \"uniqueCount\": 0, \"nullCount\": 0, \"sampleValues\": []}, {\"fieldPath\": \"description\", \"uniqueCount\": 0, \"nullCount\": 0, \"sampleValues\": []}, {\"fieldPath\": \"customer_id\", \"uniqueCount\": 0, \"nullCount\": 0, \"sampleValues\": []}]}",
"value": "{\"timestampMillis\": 1586847600000, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"rowCount\": 0, \"columnCount\": 3, \"fieldProfiles\": [{\"fieldPath\": \"id\", \"uniqueCount\": 0, \"nullCount\": 0, \"sampleValues\": []}, {\"fieldPath\": \"description\", \"uniqueCount\": 0, \"nullCount\": 0, \"sampleValues\": []}, {\"fieldPath\": \"customer_id\", \"uniqueCount\": 0, \"nullCount\": 0, \"sampleValues\": []}]}",
"contentType": "application/json"
},
"systemMetadata": {
@ -1193,7 +1193,7 @@
"changeType": "UPSERT",
"aspectName": "datasetProfile",
"aspect": {
"value": "{\"timestampMillis\": 1586847600000, \"rowCount\": 0, \"columnCount\": 1, \"fieldProfiles\": [{\"fieldPath\": \"dummy\", \"uniqueCount\": 0, \"nullCount\": 0, \"sampleValues\": []}]}",
"value": "{\"timestampMillis\": 1586847600000, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"rowCount\": 0, \"columnCount\": 1, \"fieldProfiles\": [{\"fieldPath\": \"dummy\", \"uniqueCount\": 0, \"nullCount\": 0, \"sampleValues\": []}]}",
"contentType": "application/json"
},
"systemMetadata": {

View File

@ -7,7 +7,7 @@
"changeType": "UPSERT",
"aspectName": "operation",
"aspect": {
"value": "{\"timestampMillis\": 1631664000000, \"lastUpdatedTimestamp\": 1631664000000, \"actor\": \"urn:li:corpuser:test-name\", \"operationType\": \"INSERT\"}",
"value": "{\"timestampMillis\": 1631664000000, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"lastUpdatedTimestamp\": 1631664000000, \"actor\": \"urn:li:corpuser:test-name\", \"operationType\": \"INSERT\"}",
"contentType": "application/json"
},
"systemMetadata": {
@ -26,7 +26,7 @@
"changeType": "UPSERT",
"aspectName": "operation",
"aspect": {
"value": "{\"timestampMillis\": 1631664000000, \"lastUpdatedTimestamp\": 1631664000000, \"actor\": \"urn:li:corpuser:real_shirshanka\", \"operationType\": \"INSERT\"}",
"value": "{\"timestampMillis\": 1631664000000, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"lastUpdatedTimestamp\": 1631664000000, \"actor\": \"urn:li:corpuser:real_shirshanka\", \"operationType\": \"INSERT\"}",
"contentType": "application/json"
},
"systemMetadata": {
@ -45,7 +45,7 @@
"changeType": "UPSERT",
"aspectName": "operation",
"aspect": {
"value": "{\"timestampMillis\": 1631664000000, \"lastUpdatedTimestamp\": 1631664000000, \"actor\": \"urn:li:corpuser:test-name\", \"operationType\": \"DELETE\"}",
"value": "{\"timestampMillis\": 1631664000000, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"lastUpdatedTimestamp\": 1631664000000, \"actor\": \"urn:li:corpuser:test-name\", \"operationType\": \"DELETE\"}",
"contentType": "application/json"
},
"systemMetadata": {
@ -64,7 +64,7 @@
"changeType": "UPSERT",
"aspectName": "operation",
"aspect": {
"value": "{\"timestampMillis\": 1631664000000, \"lastUpdatedTimestamp\": 1631664000000, \"actor\": \"urn:li:corpuser:real_shirshanka\", \"operationType\": \"DELETE\"}",
"value": "{\"timestampMillis\": 1631664000000, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"lastUpdatedTimestamp\": 1631664000000, \"actor\": \"urn:li:corpuser:real_shirshanka\", \"operationType\": \"DELETE\"}",
"contentType": "application/json"
},
"systemMetadata": {
@ -83,7 +83,7 @@
"changeType": "UPSERT",
"aspectName": "datasetUsageStatistics",
"aspect": {
"value": "{\"timestampMillis\": 1631577600000, \"eventGranularity\": {\"unit\": \"DAY\", \"multiple\": 1}, \"uniqueUserCount\": 1, \"totalSqlQueries\": 1, \"topSqlQueries\": [\"select userid from users\"], \"userCounts\": [{\"user\": \"urn:li:corpuser:test-name\", \"count\": 1, \"userEmail\": \"test-name@acryl.io\"}], \"fieldCounts\": []}",
"value": "{\"timestampMillis\": 1631577600000, \"eventGranularity\": {\"unit\": \"DAY\", \"multiple\": 1}, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"uniqueUserCount\": 1, \"totalSqlQueries\": 1, \"topSqlQueries\": [\"select userid from users\"], \"userCounts\": [{\"user\": \"urn:li:corpuser:test-name\", \"count\": 1, \"userEmail\": \"test-name@acryl.io\"}], \"fieldCounts\": []}",
"contentType": "application/json"
},
"systemMetadata": {
@ -102,7 +102,7 @@
"changeType": "UPSERT",
"aspectName": "datasetUsageStatistics",
"aspect": {
"value": "{\"timestampMillis\": 1631577600000, \"eventGranularity\": {\"unit\": \"DAY\", \"multiple\": 1}, \"uniqueUserCount\": 1, \"totalSqlQueries\": 1, \"topSqlQueries\": [\"select catid from category\"], \"userCounts\": [{\"user\": \"urn:li:corpuser:real_shirshanka\", \"count\": 1, \"userEmail\": \"real_shirshanka@acryl.io\"}], \"fieldCounts\": []}",
"value": "{\"timestampMillis\": 1631577600000, \"eventGranularity\": {\"unit\": \"DAY\", \"multiple\": 1}, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"uniqueUserCount\": 1, \"totalSqlQueries\": 1, \"topSqlQueries\": [\"select catid from category\"], \"userCounts\": [{\"user\": \"urn:li:corpuser:real_shirshanka\", \"count\": 1, \"userEmail\": \"real_shirshanka@acryl.io\"}], \"fieldCounts\": []}",
"contentType": "application/json"
},
"systemMetadata": {

View File

@ -7,7 +7,7 @@
"changeType": "UPSERT",
"aspectName": "datasetUsageStatistics",
"aspect": {
"value": "{\"timestampMillis\": 1634169600000, \"eventGranularity\": {\"unit\": \"DAY\", \"multiple\": 1}, \"uniqueUserCount\": 1, \"totalSqlQueries\": 2, \"topSqlQueries\": [\"select * from testcatalog.testschema.testtable limit 100\"], \"userCounts\": [{\"user\": \"urn:li:corpuser:test-name\", \"count\": 2, \"userEmail\": \"test-name@acryl.io\"}], \"fieldCounts\": [{\"fieldPath\": \"column1\", \"count\": 2}, {\"fieldPath\": \"column2\", \"count\": 2}]}",
"value": "{\"timestampMillis\": 1634169600000, \"eventGranularity\": {\"unit\": \"DAY\", \"multiple\": 1}, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"uniqueUserCount\": 1, \"totalSqlQueries\": 2, \"topSqlQueries\": [\"select * from testcatalog.testschema.testtable limit 100\"], \"userCounts\": [{\"user\": \"urn:li:corpuser:test-name\", \"count\": 2, \"userEmail\": \"test-name@acryl.io\"}], \"fieldCounts\": [{\"fieldPath\": \"column1\", \"count\": 2}, {\"fieldPath\": \"column2\", \"count\": 2}]}",
"contentType": "application/json"
},
"systemMetadata": {

View File

@ -531,7 +531,7 @@
"changeType": "UPSERT",
"aspectName": "datasetProfile",
"aspect": {
"value": "{\"timestampMillis\": 1632398400000, \"rowCount\": 3, \"columnCount\": 6, \"fieldProfiles\": [{\"fieldPath\": \"id\", \"uniqueCount\": 3, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"1\", \"2\", \"3\"]}, {\"fieldPath\": \"name\", \"uniqueCount\": 3, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"Book 1\", \"Book 2\", \"Book 3\"]}, {\"fieldPath\": \"author\", \"uniqueCount\": 3, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"ABC\", \"PQR\", \"XYZ\"]}, {\"fieldPath\": \"publisher\", \"uniqueCount\": 0, \"nullCount\": 3, \"nullProportion\": 1.0, \"sampleValues\": []}, {\"fieldPath\": \"tags\", \"nullCount\": 3, \"nullProportion\": 1.0, \"sampleValues\": []}, {\"fieldPath\": \"genre_ids\", \"uniqueCount\": 0, \"nullCount\": 3, \"nullProportion\": 1.0, \"sampleValues\": []}]}",
"value": "{\"timestampMillis\": 1632398400000, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"rowCount\": 3, \"columnCount\": 6, \"fieldProfiles\": [{\"fieldPath\": \"id\", \"uniqueCount\": 3, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"1\", \"2\", \"3\"]}, {\"fieldPath\": \"name\", \"uniqueCount\": 3, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"Book 1\", \"Book 2\", \"Book 3\"]}, {\"fieldPath\": \"author\", \"uniqueCount\": 3, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"ABC\", \"PQR\", \"XYZ\"]}, {\"fieldPath\": \"publisher\", \"uniqueCount\": 0, \"nullCount\": 3, \"nullProportion\": 1.0, \"sampleValues\": []}, {\"fieldPath\": \"tags\", \"nullCount\": 3, \"nullProportion\": 1.0, \"sampleValues\": []}, {\"fieldPath\": \"genre_ids\", \"uniqueCount\": 0, \"nullCount\": 3, \"nullProportion\": 1.0, \"sampleValues\": []}]}",
"contentType": "application/json"
},
"systemMetadata": {
@ -550,7 +550,7 @@
"changeType": "UPSERT",
"aspectName": "datasetProfile",
"aspect": {
"value": "{\"timestampMillis\": 1632398400000, \"rowCount\": 2, \"columnCount\": 4, \"fieldProfiles\": [{\"fieldPath\": \"book_id\", \"uniqueCount\": 2, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"1\", \"2\"]}, {\"fieldPath\": \"member_id\", \"uniqueCount\": 2, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"1\", \"2\"]}, {\"fieldPath\": \"issue_date\", \"uniqueCount\": 1, \"uniqueProportion\": 0.5, \"nullCount\": 0, \"nullProportion\": 0.0, \"min\": \"2021-09-27\", \"max\": \"2021-09-27\", \"distinctValueFrequencies\": [{\"value\": \"2021-09-27\", \"frequency\": 2}], \"sampleValues\": [\"2021-09-27\", \"2021-09-27\"]}, {\"fieldPath\": \"return_date\", \"uniqueCount\": 1, \"uniqueProportion\": 1.0, \"nullCount\": 1, \"nullProportion\": 0.5, \"min\": \"2021-09-27\", \"max\": \"2021-09-27\", \"sampleValues\": [\"2021-09-27\"]}]}",
"value": "{\"timestampMillis\": 1632398400000, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"rowCount\": 2, \"columnCount\": 4, \"fieldProfiles\": [{\"fieldPath\": \"book_id\", \"uniqueCount\": 2, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"1\", \"2\"]}, {\"fieldPath\": \"member_id\", \"uniqueCount\": 2, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"1\", \"2\"]}, {\"fieldPath\": \"issue_date\", \"uniqueCount\": 1, \"uniqueProportion\": 0.5, \"nullCount\": 0, \"nullProportion\": 0.0, \"min\": \"2021-09-27\", \"max\": \"2021-09-27\", \"distinctValueFrequencies\": [{\"value\": \"2021-09-27\", \"frequency\": 2}], \"sampleValues\": [\"2021-09-27\", \"2021-09-27\"]}, {\"fieldPath\": \"return_date\", \"uniqueCount\": 1, \"uniqueProportion\": 1.0, \"nullCount\": 1, \"nullProportion\": 0.5, \"min\": \"2021-09-27\", \"max\": \"2021-09-27\", \"sampleValues\": [\"2021-09-27\"]}]}",
"contentType": "application/json"
},
"systemMetadata": {
@ -569,7 +569,7 @@
"changeType": "UPSERT",
"aspectName": "datasetProfile",
"aspect": {
"value": "{\"timestampMillis\": 1632398400000, \"rowCount\": 2, \"columnCount\": 2, \"fieldProfiles\": [{\"fieldPath\": \"id\", \"uniqueCount\": 2, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"1\", \"2\"]}, {\"fieldPath\": \"name\", \"uniqueCount\": 2, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"Member 1\", \"Member 2\"]}]}",
"value": "{\"timestampMillis\": 1632398400000, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"rowCount\": 2, \"columnCount\": 2, \"fieldProfiles\": [{\"fieldPath\": \"id\", \"uniqueCount\": 2, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"1\", \"2\"]}, {\"fieldPath\": \"name\", \"uniqueCount\": 2, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"Member 1\", \"Member 2\"]}]}",
"contentType": "application/json"
},
"systemMetadata": {
@ -588,7 +588,7 @@
"changeType": "UPSERT",
"aspectName": "datasetProfile",
"aspect": {
"value": "{\"timestampMillis\": 1632398400000, \"rowCount\": 1, \"columnCount\": 6, \"fieldProfiles\": [{\"fieldPath\": \"id\", \"uniqueCount\": 1, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"2\"]}, {\"fieldPath\": \"name\", \"uniqueCount\": 1, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"Book 2\"]}, {\"fieldPath\": \"author\", \"uniqueCount\": 1, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"PQR\"]}, {\"fieldPath\": \"publisher\", \"uniqueCount\": 0, \"nullCount\": 1, \"nullProportion\": 1.0, \"sampleValues\": []}, {\"fieldPath\": \"member_id\", \"uniqueCount\": 1, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"2\"]}, {\"fieldPath\": \"issue_date\", \"uniqueCount\": 1, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"min\": \"2021-09-27\", \"max\": \"2021-09-27\", \"sampleValues\": [\"2021-09-27\"]}]}",
"value": "{\"timestampMillis\": 1632398400000, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"rowCount\": 1, \"columnCount\": 6, \"fieldProfiles\": [{\"fieldPath\": \"id\", \"uniqueCount\": 1, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"2\"]}, {\"fieldPath\": \"name\", \"uniqueCount\": 1, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"Book 2\"]}, {\"fieldPath\": \"author\", \"uniqueCount\": 1, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"PQR\"]}, {\"fieldPath\": \"publisher\", \"uniqueCount\": 0, \"nullCount\": 1, \"nullProportion\": 1.0, \"sampleValues\": []}, {\"fieldPath\": \"member_id\", \"uniqueCount\": 1, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"2\"]}, {\"fieldPath\": \"issue_date\", \"uniqueCount\": 1, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"min\": \"2021-09-27\", \"max\": \"2021-09-27\", \"sampleValues\": [\"2021-09-27\"]}]}",
"contentType": "application/json"
},
"systemMetadata": {

View File

@ -19,6 +19,9 @@ public class MappingsBuilder {
public static final String EVENT_FIELD = "event";
public static final String SYSTEM_METADATA_FIELD = "systemMetadata";
public static final String IS_EXPLODED_FIELD = "isExploded";
public static final String PARTITION_SPEC = "partitionSpec";
public static final String PARTITION_SPEC_PARTITION = "partition";
public static final String PARTITION_SPEC_TIME_PARTITION = "timePartition";
private MappingsBuilder() {
}
@ -36,6 +39,9 @@ public class MappingsBuilder {
mappings.put(TIMESTAMP_FIELD, ImmutableMap.of("type", "date"));
mappings.put(TIMESTAMP_MILLIS_FIELD, ImmutableMap.of("type", "date"));
mappings.put(EVENT_GRANULARITY, ImmutableMap.of("type", "keyword"));
mappings.put(PARTITION_SPEC, ImmutableMap.of("properties",
ImmutableMap.of(PARTITION_SPEC_PARTITION, ImmutableMap.of("type", "keyword"), PARTITION_SPEC_TIME_PARTITION,
ImmutableMap.of("type", "keyword"))));
mappings.put(EVENT_FIELD, ImmutableMap.of("type", "object", "enabled", false));
mappings.put(SYSTEM_METADATA_FIELD, ImmutableMap.of("type", "object", "enabled", false));
mappings.put(IS_EXPLODED_FIELD, ImmutableMap.of("type", "boolean"));

View File

@ -12,6 +12,7 @@ import com.linkedin.metadata.models.TimeseriesFieldSpec;
import com.linkedin.metadata.models.registry.EntityRegistry;
import com.linkedin.metadata.query.filter.Filter;
import com.linkedin.metadata.search.utils.ESUtils;
import com.linkedin.metadata.timeseries.elastic.indexbuilder.MappingsBuilder;
import com.linkedin.metadata.utils.elasticsearch.IndexConvention;
import com.linkedin.timeseries.AggregationSpec;
import com.linkedin.timeseries.GenericTable;
@ -190,6 +191,16 @@ public class ESAggregatedStatsDAO {
return timeseriesFieldCollectionSpec.getPegasusSchema().getType();
}
} else if (memberParts.length == 2) {
// Check if partitionSpec
if (memberParts[0].equals(MappingsBuilder.PARTITION_SPEC)) {
if (memberParts[1].equals(MappingsBuilder.PARTITION_SPEC_PARTITION) || memberParts[1].equals(
MappingsBuilder.PARTITION_SPEC_TIME_PARTITION)) {
return DataSchema.Type.STRING;
} else {
throw new IllegalArgumentException("Unknown partitionSpec member" + memberParts[1]);
}
}
// This is either a collection key/stat.
TimeseriesFieldCollectionSpec timeseriesFieldCollectionSpec =
aspectSpec.getTimeseriesFieldCollectionSpecMap().get(memberParts[0]);

View File

@ -1,5 +1,6 @@
package com.linkedin.metadata.timeseries.transformer;
import com.datahub.util.RecordUtils;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
@ -7,10 +8,10 @@ import com.fasterxml.jackson.databind.node.ArrayNode;
import com.fasterxml.jackson.databind.node.JsonNodeFactory;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.linkedin.common.urn.Urn;
import com.linkedin.data.DataMap;
import com.linkedin.data.schema.ArrayDataSchema;
import com.linkedin.data.schema.DataSchema;
import com.linkedin.data.template.RecordTemplate;
import com.datahub.util.RecordUtils;
import com.linkedin.metadata.extractor.FieldExtractor;
import com.linkedin.metadata.models.AspectSpec;
import com.linkedin.metadata.models.TimeseriesFieldCollectionSpec;
@ -87,6 +88,30 @@ public class TimeseriesAspectTransformer {
throw new IllegalArgumentException("Failed to convert eventGranulairty to Json string!", e);
}
}
// PartitionSpec handling
DataMap partitionSpec = (DataMap) timeseriesAspect.data().get(MappingsBuilder.PARTITION_SPEC);
if (partitionSpec != null) {
Object partition = partitionSpec.get(MappingsBuilder.PARTITION_SPEC_PARTITION);
Object timePartition = partitionSpec.get(MappingsBuilder.PARTITION_SPEC_TIME_PARTITION);
if (partition != null && timePartition != null) {
throw new IllegalArgumentException("Both partition and timePartition cannot be specified in partitionSpec!");
} else if (partition != null) {
ObjectNode partitionDoc = JsonNodeFactory.instance.objectNode();
partitionDoc.put(MappingsBuilder.PARTITION_SPEC_PARTITION, partition.toString());
document.set(MappingsBuilder.PARTITION_SPEC, partitionDoc);
} else if (timePartition != null) {
ObjectNode timePartitionDoc = JsonNodeFactory.instance.objectNode();
try {
timePartitionDoc.put(MappingsBuilder.PARTITION_SPEC_TIME_PARTITION,
OBJECT_MAPPER.writeValueAsString(timePartition));
} catch (JsonProcessingException e) {
throw new IllegalArgumentException("Failed to convert timePartition to Json string!", e);
}
document.set(MappingsBuilder.PARTITION_SPEC, timePartitionDoc);
} else {
throw new IllegalArgumentException("Both partition and timePartition cannot be null in partitionSpec.");
}
}
String messageId = (String) timeseriesAspect.data().get(MappingsBuilder.MESSAGE_ID_FIELD);
if (messageId != null) {
document.put(MappingsBuilder.MESSAGE_ID_FIELD, messageId);
@ -198,6 +223,10 @@ public class TimeseriesAspectTransformer {
if (messageId != null) {
docId += messageId.toString();
}
JsonNode partitionSpec = document.get(MappingsBuilder.PARTITION_SPEC);
if (partitionSpec != null) {
docId += partitionSpec.toString();
}
return DigestUtils.md5Hex(docId);
}

View File

@ -1,5 +1,7 @@
package com.linkedin.metadata.timeseries.elastic;
import com.datahub.test.BatchType;
import com.datahub.test.ComplexNestedRecord;
import com.datahub.test.TestEntityComponentProfile;
import com.datahub.test.TestEntityComponentProfileArray;
import com.datahub.test.TestEntityProfile;
@ -11,6 +13,8 @@ import com.linkedin.common.urn.TestEntityUrn;
import com.linkedin.common.urn.Urn;
import com.linkedin.data.template.StringArray;
import com.linkedin.data.template.StringArrayArray;
import com.linkedin.data.template.StringMap;
import com.linkedin.data.template.StringMapArray;
import com.linkedin.metadata.aspect.EnvelopedAspect;
import com.linkedin.metadata.models.AspectSpec;
import com.linkedin.metadata.models.DataSchemaFactory;
@ -155,6 +159,15 @@ public class ElasticSearchTimeseriesAspectServiceTest {
componentProfile2.setKey("col2");
componentProfile2.setStat(stat + 2);
testEntityProfile.setComponentProfiles(new TestEntityComponentProfileArray(componentProfile1, componentProfile2));
StringMap stringMap1 = new StringMap();
stringMap1.put("p_key1", "p_val1");
StringMap stringMap2 = new StringMap();
stringMap2.put("p_key2", "p_val2");
ComplexNestedRecord nestedRecord = new ComplexNestedRecord().setType(BatchType.PARTITION_BATCH)
.setPartitions(new StringMapArray(stringMap1, stringMap2));
testEntityProfile.setAComplexNestedRecord(nestedRecord);
return testEntityProfile;
}
@ -346,6 +359,50 @@ public class ElasticSearchTimeseriesAspectServiceTest {
_testEntityProfiles.get(_startTime + 23 * TIME_INCREMENT).getStat().toString())));
}
@Test(groups = {"getAggregatedStats"}, dependsOnGroups = {"upsert"})
public void testGetAggregatedStatsLatestAComplexNestedRecordForDay1() {
// Filter is only on the urn
Criterion hasUrnCriterion =
new Criterion().setField("urn").setCondition(Condition.EQUAL).setValue(TEST_URN.toString());
Criterion startTimeCriterion = new Criterion().setField(ES_FILED_TIMESTAMP)
.setCondition(Condition.GREATER_THAN_OR_EQUAL_TO)
.setValue(_startTime.toString());
Criterion endTimeCriterion = new Criterion().setField(ES_FILED_TIMESTAMP)
.setCondition(Condition.LESS_THAN_OR_EQUAL_TO)
.setValue(String.valueOf(_startTime + 23 * TIME_INCREMENT));
Filter filter =
QueryUtils.getFilterFromCriteria(ImmutableList.of(hasUrnCriterion, startTimeCriterion, endTimeCriterion));
// Aggregate on latest stat value
AggregationSpec latestStatAggregationSpec =
new AggregationSpec().setAggregationType(AggregationType.LATEST).setFieldPath("aComplexNestedRecord");
// Grouping bucket is only timestamp filed.
GroupingBucket timestampBucket = new GroupingBucket().setKey(ES_FILED_TIMESTAMP)
.setType(GroupingBucketType.DATE_GROUPING_BUCKET)
.setTimeWindowSize(new TimeWindowSize().setMultiple(1).setUnit(CalendarInterval.DAY));
GenericTable resultTable = _elasticSearchTimeseriesAspectService.getAggregatedStats(ENTITY_NAME, ASPECT_NAME,
new AggregationSpec[]{latestStatAggregationSpec}, filter, new GroupingBucket[]{timestampBucket});
// Validate column names
assertEquals(resultTable.getColumnNames(), new StringArray(ES_FILED_TIMESTAMP, "latest_aComplexNestedRecord"));
// Validate column types
assertEquals(resultTable.getColumnTypes(), new StringArray("long", "record"));
// Validate rows
assertNotNull(resultTable.getRows());
assertEquals(resultTable.getRows().size(), 1);
assertEquals(resultTable.getRows().get(0).get(0), _startTime.toString());
try {
ComplexNestedRecord latestAComplexNestedRecord =
OBJECT_MAPPER.readValue(resultTable.getRows().get(0).get(1), ComplexNestedRecord.class);
assertEquals(latestAComplexNestedRecord,
_testEntityProfiles.get(_startTime + 23 * TIME_INCREMENT).getAComplexNestedRecord());
} catch (JsonProcessingException e) {
fail("Unexpected exception thrown" + e);
}
}
@Test(groups = {"getAggregatedStats"}, dependsOnGroups = {"upsert"})
public void testGetAggregatedStatsLatestStrArrayDay1() {
// Filter is only on the urn

View File

@ -0,0 +1,51 @@
namespace com.linkedin.assertion
import com.linkedin.common.CustomProperties
import com.linkedin.common.ExternalReference
import com.linkedin.common.Urn
/**
* Information about an assertion
*/
@Aspect = {
"name": "assertionInfo"
}
record AssertionInfo includes CustomProperties, ExternalReference {
/**
* One or more dataset schema fields that are targeted by this assertion
*/
@Relationship = {
"/*": {
"name": "Asserts",
"entityTypes": [ "schemaField" ]
}
}
datasetFields: optional array[Urn]
/**
* One or more datasets that are targeted by this assertion
*/
@Relationship = {
"/*": {
"name": "Asserts",
"entityTypes": [ "dataset" ]
}
}
datasets: optional array[Urn]
/**
* Type of assertion
*/
assertionType: AssertionType
/*
* Logic for assertion such as implementation of custom nativeOperator
*/
assertionLogic: optional string
/**
* Parameters required for the assertion. e.g. min_value, max_value, value, columns
*/
assertionParameters: map[string, string] = { }
}

View File

@ -0,0 +1,51 @@
namespace com.linkedin.assertion
import com.linkedin.timeseries.TimeseriesAspectBase
import com.linkedin.common.ExternalReference
import com.linkedin.common.Urn
/**
* The results of evaluating the assertion on the batch
*/
@Aspect = {
"name": "assertionResult",
"type": "timeseries",
}
record AssertionResult includes TimeseriesAspectBase {
/*
* Urn of assertion which is evaluated
*/
@TimeseriesField = {}
assertionUrn: Urn
/*
* Urn of entity being asserted
*/
//example - dataset urn, if dataset is being asserted
@TimeseriesField = {}
asserteeUrn: Urn
/**
* Specification of the batch whose data quality is evaluated
*/
batchSpec: optional BatchSpec
/**
* Results of assertion
*/
@TimeseriesField = {}
batchAssertionResult: BatchAssertionResult
/**
* Native Run identifier of platform evaluating the assertions
*/
//Multiple assertions could occur in same evaluator run
nativeEvaluatorRunId: optional string
/**
* Runtime parameters of evaluation
*/
runtimeContext: map[string, string] = { }
}

View File

@ -0,0 +1,63 @@
namespace com.linkedin.assertion
enum AssertionStdOperator {
/**
* Value being asserted is between min_value and max_value
*/
BETWEEN
/**
* Value being asserted is less than max_value
*/
LESS_THAN
/**
* Value being asserted is less than or equal to max_value
*/
LESS_THAN_OR_EQUAL_TO
/**
* Value being asserted is greater than min_value
*/
GREATER_THAN
/**
* Value being asserted is greater than or equal to min_value
*/
GREATER_THAN_OR_EQUAL_TO
/**
* Value being asserted is equal to value
*/
EQUAL_TO
/**
* Value being asserted is not null
*/
NOT_NULL
/**
* Value being asserted contains value
*/
CONTAIN
/**
* Value being asserted ends with value
*/
END_WITH
/**
* Value being asserted starts with value
*/
START_WITH
/**
* Value being asserted is one of the array values
*/
IN
/**
* Other
*/
_NATIVE_
}

View File

@ -0,0 +1,32 @@
namespace com.linkedin.assertion
/**
* Type of Assertion
*/
record AssertionType {
/**
* Scope of Assertion
*/
scope: enum AssertionScope {
DATASET_COLUMN
DATASET_ROWS
DATASET_SCHEMA
CROSS_DATASET
}
/**
* Assertion details for scope DATASET_COLUMN
*/
datasetColumnAssertion: optional DatasetColumnAssertion
/**
* Assertion details for scope DATASET_ROWS
*/
datasetRowsAssertion: optional DatasetRowsAssertion
/**
* Assertion details for scope DATASET_SCHEMA
*/
datasetSchemaAssertion: optional DatasetSchemaAssertion
}

View File

@ -0,0 +1,40 @@
namespace com.linkedin.assertion
record BatchAssertionResult {
/**
* Indicator of whether the constraint is fully satisfied for the batch
*/
success: boolean
/**
* Number of rows for evaluated batch
*/
rowCount: optional long
/**
* Number of rows with missing value for evaluated batch
*/
missingCount: optional long
/**
* Number of rows with unexpected value for evaluated batch
*/
unexpectedCount: optional long
/**
* Observed aggregate value for evaluated batch
*/
actualAggValue: optional float
/**
* Other results of evaluation
*/
nativeResults: map[string, string] = { }
/**
* URL where the reference exist
*/
//TODO - Change type to optional Url, not working
externalUrl: optional string
}

View File

@ -0,0 +1,24 @@
namespace com.linkedin.assertion
import com.linkedin.common.CustomProperties
/**
* A batch on which certain operations, e.g. data quality evaluation, is done.
*/
record BatchSpec includes CustomProperties {
/**
* The native identifier as specified by the system operating on the batch.
*/
nativeBatchId: optional string
/**
* A query that identifies a batch of data
*/
query: optional string
/**
* Any limit to the number of rows in the batch, if applied
*/
limit: optional int
}

View File

@ -0,0 +1,83 @@
namespace com.linkedin.assertion
import com.linkedin.common.Urn
/**
* Assertion on column of a dataset
*/
record DatasetColumnAssertion {
/**
* Standardized assertion operator
*/
stdOperator: AssertionStdOperator
/**
* Native assertion operator
*/
nativeOperator: optional string // filled with the platform specific native operator string
/**
* Standardized aggrgation function applied on column values
*/
stdAggFunc: enum DatasetColumnStdAggFunc {
/**
* Assertion is applied on individual column value
*/
IDENTITY
/**
* Assertion is applied on column mean
*/
MEAN
/**
* Assertion is applied on column median
*/
MEDIAN
/**
* Assertion is applied on number of distinct values in column
*/
UNIQUE_COUNT
/**
* Assertion is applied on proportion of distinct values in column
*/
UNIQUE_PROPOTION
/**
* Assertion is applied on number of null values in column
*/
NULL_COUNT
/**
* Assertion is applied on proportion of null values in column
*/
NULL_PROPORTION
/**
* Assertion is applied on column std deviation
*/
STDDEV
/**
* Assertion is applied on column min
*/
MIN
/**
* Assertion is applied on column std deviation
*/
MAX
/**
* Other
*/
_NATIVE_
}
/**
* Native aggrgation function applied on column values
*/
nativeAggFunc: optional string
}

View File

@ -0,0 +1,40 @@
namespace com.linkedin.assertion
import com.linkedin.common.Urn
/**
* Assertion on rows of a dataset
*/
record DatasetRowsAssertion {
/**
* Standardized assertion operator
*/
stdOperator: AssertionStdOperator
/**
* Native assertion operator
*/
nativeOperator: optional string
/**
* Standardized aggrgation function applied on rows
*/
stdAggFunc: enum DatasetRowsStdAggFunc {
/**
* Assertion is applied on number of rows
*/
ROW_COUNT
/**
* Other
*/
_NATIVE_
}
/**
* Native aggrgation function applied on rows
*/
nativeAggFunc: optional string
}

View File

@ -0,0 +1,45 @@
namespace com.linkedin.assertion
import com.linkedin.common.Urn
/**
* Assertion on schema of a dataset
*/
record DatasetSchemaAssertion {
/**
* Standardized assertion operator
*/
stdOperator: AssertionStdOperator
/**
* Native assertion operator
*/
nativeOperator: optional string
/**
* Standardized aggrgation function applied on rows
*/
stdAggFunc: enum DatasetSchemaStdAggFunc {
/**
* Assertion is applied on all columns
*/
COLUMNS
/**
* Assertion is applied on number of columns
*/
COLUMN_COUNT
/**
* Other
*/
_NATIVE_
}
/**
* Native aggrgation function applied on rows
*/
nativeAggFunc: optional string
}

View File

@ -0,0 +1,14 @@
namespace com.linkedin.metadata.aspect
import com.linkedin.metadata.key.AssertionKey
import com.linkedin.common.DataPlatformInstance
import com.linkedin.assertion.AssertionInfo
/**
* A union of all supported metadata aspects for a Assertion
*/
typeref AssertionAspect = union[
AssertionKey,
DataPlatformInstance,
AssertionInfo
]

View File

@ -0,0 +1,20 @@
namespace com.linkedin.metadata.key
import com.linkedin.common.Urn
/**
* Key for a Assertion
*/
@Aspect = {
"name": "assertionKey",
}
record AssertionKey {
//The name of the assertion platform such as greatExpectations etc.
//assertionPlatform: Urn
/**
* Unique id for the assertion.
*/
assertionId: string
}

View File

@ -0,0 +1,24 @@
namespace com.linkedin.metadata.snapshot
import com.linkedin.common.Urn
import com.linkedin.metadata.aspect.AssertionAspect
/**
* A metadata snapshot for a specific Assertion entity.
*/
@Entity = {
"name": "assertion",
"keyAspect": "assertionKey"
}
record AssertionSnapshot {
/**
* URN for the entity the metadata snapshot is associated with.
*/
urn: Urn
/**
* The list of metadata aspects associated with the assertion.
*/
aspects: array[AssertionAspect]
}

View File

@ -4,6 +4,13 @@ namespace com.linkedin.timeseries
* Defines how the data is partitioned
*/
record PartitionSpec {
type: enum PartitionType {
FULL_TABLE,
QUERY,
PARTITION
} = "PARTITION"
/**
* String representation of the partition
*/

View File

@ -14,7 +14,10 @@ record TimeseriesAspectBase {
/**
* The optional partition specification.
*/
partitionSpec: optional PartitionSpec
partitionSpec: optional PartitionSpec = {
"type":"FULL_TABLE",
"partition":"FULL_TABLE_SNAPSHOT"
}
/**
* The optional messageId, if provided serves as a custom user-defined unique identifier for an aspect value.

View File

@ -12,6 +12,7 @@ entities:
- schemaMetadata
- status
- container
- assertionResult
- name: dataHubPolicy
doc: DataHub Policies represent access policies granted to users or groups on metadata operations like edit, view etc.
keyAspect: dataHubPolicyKey
@ -84,3 +85,9 @@ entities:
- dataHubExecutionRequestInput
- dataHubExecutionRequestSignal
- dataHubExecutionRequestResult
- name: assertion
doc: Assertion represents a data quality rule applied on one or more dataset.
keyAspect: assertionKey
aspects:
- assertionInfo
- dataPlatformInstance

View File

@ -0,0 +1,144 @@
import json
import urllib
import time
import pytest
import requests
from datahub.cli.docker import check_local_docker_containers
from tests.utils import ingest_file_via_rest
bootstrap_sample_data = "test_resources/bootstrap_data_quality.json"
GMS_ENDPOINT = "http://localhost:8080"
restli_default_headers = {
"X-RestLi-Protocol-Version": "2.0.0",
}
@pytest.fixture(scope="session")
def wait_for_healthchecks():
# Simply assert that everything is healthy, but don't wait.
assert not check_local_docker_containers()
yield
@pytest.mark.dependency()
def test_healthchecks(wait_for_healthchecks):
# Call to wait_for_healthchecks fixture will do the actual functionality.
pass
@pytest.mark.dependency(depends=["test_healthchecks"])
def test_run_ingestion(wait_for_healthchecks):
ingest_file_via_rest(bootstrap_sample_data)
@pytest.mark.dependency(depends=["test_healthchecks", "test_run_ingestion"])
def test_gms_get_latest_assertions_results_by_partition():
urn = "urn:li:dataset:(urn:li:dataPlatform:postgres,fooTable,PROD)"
# sleep for elasticsearch indices to be updated
time.sleep(5)
# Query
# Given the dataset
# show me latest assertion results grouped-by date, partition, assertionId
query = json.dumps(
{
"entityName": "dataset",
"aspectName": "assertionResult",
"filter": {
"or": [
{
"and": [
{
"field": "urn",
"value": urn,
"condition": "EQUAL",
}
]
}
]
},
"metrics": [
{"fieldPath": "batchAssertionResult", "aggregationType": "LATEST"}
],
"buckets": [
{"key": "asserteeUrn", "type": "STRING_GROUPING_BUCKET"},
{"key": "partitionSpec.partition", "type": "STRING_GROUPING_BUCKET"},
{
"key": "timestampMillis",
"type": "DATE_GROUPING_BUCKET",
"timeWindowSize": {"multiple": 1, "unit": "DAY"},
},
{"key": "assertionUrn", "type": "STRING_GROUPING_BUCKET"},
],
}
)
response = requests.post(
f"{GMS_ENDPOINT}/analytics?action=getTimeseriesStats",
data=query,
headers=restli_default_headers,
)
response.raise_for_status()
data = response.json()
assert data["value"]
assert data["value"]["table"]
assert sorted(data["value"]["table"]["columnNames"]) == [
"asserteeUrn",
"assertionUrn",
"latest_batchAssertionResult",
"partitionSpec.partition",
"timestampMillis",
]
assert len(data["value"]["table"]["rows"]) == 6
assert (
data["value"]["table"]["rows"][0][
data["value"]["table"]["columnNames"].index("asserteeUrn")
]
== urn
)
@pytest.mark.dependency(depends=["test_healthchecks", "test_run_ingestion"])
def test_gms_get_assertions_on_dataset():
"""lists all assertion urns including those which may not have executed"""
urn = "urn:li:dataset:(urn:li:dataPlatform:postgres,fooTable,PROD)"
response = requests.get(
f"{GMS_ENDPOINT}/relationships?direction=INCOMING&urn={urllib.parse.quote(urn)}&types=Asserts"
)
response.raise_for_status()
data = response.json()
assert len(data["relationships"]) == 1
@pytest.mark.dependency(depends=["test_healthchecks", "test_run_ingestion"])
def test_gms_get_assertions_on_dataset_field():
"""lists all assertion urns including those which may not have executed"""
urn = "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,fooTable,PROD), col1)"
response = requests.get(
f"{GMS_ENDPOINT}/relationships?direction=INCOMING&urn={urllib.parse.quote(urn)}&types=Asserts"
)
response.raise_for_status()
data = response.json()
assert len(data["relationships"]) == 1
@pytest.mark.dependency(depends=["test_healthchecks", "test_run_ingestion"])
def test_gms_get_assertion_info():
assertion_urn = "urn:li:assertion:2d3b06a6e77e1f24adc9860a05ea089b"
response = requests.get(
f"{GMS_ENDPOINT}/aspects/{urllib.parse.quote(assertion_urn)}\
?aspect=assertionInfo&version=0",
headers=restli_default_headers,
)
response.raise_for_status()
data = response.json()
assert data["aspect"]
assert data["aspect"]["com.linkedin.assertion.AssertionInfo"]
assert data["aspect"]["com.linkedin.assertion.AssertionInfo"]["assertionType"]

View File

@ -0,0 +1,72 @@
[
{
"entityType": "assertion",
"entityUrn": "urn:li:assertion:2d3b06a6e77e1f24adc9860a05ea089b",
"changeType": "UPSERT",
"aspectName": "assertionInfo",
"aspect": {
"value": "{\"customProperties\": {\"suite_name\": \"demo_suite\"}, \"datasetFields\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,fooTable,PROD), col1)\"], \"datasets\": [\"urn:li:dataset:(urn:li:dataPlatform:postgres,fooTable,PROD)\"], \"assertionType\": {\"scope\": \"DATASET_COLUMN\", \"datasetColumnAssertion\": {\"stdOperator\": \"LESS_THAN\", \"nativeOperator\": \"column_value_is_less_than\", \"stdAggFunc\": \"IDENTITY\"}}, \"assertionParameters\": {\"max_value\": \"99\"}}",
"contentType": "application/json"
}
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,fooTable,PROD)",
"changeType": "UPSERT",
"aspectName": "assertionResult",
"aspect": {
"value": "{\"timestampMillis\": 1643794280350, \"partitionSpec\": {\"type\": \"PARTITION\", \"partition\": \"[{'country': 'IN'}]\"}, \"messageId\": \"1643794280350\", \"assertionUrn\": \"urn:li:assertion:2d3b06a6e77e1f24adc9860a05ea089b\", \"asserteeUrn\": \"urn:li:dataset:(urn:li:dataPlatform:postgres,fooTable,PROD)\", \"batchAssertionResult\": {\"success\": true, \"actualAggValue\": 90, \"nativeResults\": {}, \"externalUrl\": \"http://example.com/uuid1\"}, \"nativeEvaluatorRunId\": \"uuid1\", \"runtimeContext\": {}}",
"contentType": "application/json"
}
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,fooTable,PROD)",
"changeType": "UPSERT",
"aspectName": "assertionResult",
"aspect": {
"value": "{\"timestampMillis\": 1643794280352, \"partitionSpec\": {\"type\": \"PARTITION\", \"partition\": \"[{'country': 'US'}]\"}, \"messageId\": \"1643794280352\", \"assertionUrn\": \"urn:li:assertion:2d3b06a6e77e1f24adc9860a05ea089b\", \"asserteeUrn\": \"urn:li:dataset:(urn:li:dataPlatform:postgres,fooTable,PROD)\", \"batchAssertionResult\": {\"success\": false, \"actualAggValue\": 101, \"nativeResults\": {}, \"externalUrl\": \"http://example.com/uuid1\"}, \"nativeEvaluatorRunId\": \"uuid1\", \"runtimeContext\": {}}",
"contentType": "application/json"
}
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,fooTable,PROD)",
"changeType": "UPSERT",
"aspectName": "assertionResult",
"aspect": {
"value": "{\"timestampMillis\": 1643794280354, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"messageId\": \"1643794280354\", \"assertionUrn\": \"urn:li:assertion:2d3b06a6e77e1f24adc9860a05ea089b\", \"asserteeUrn\": \"urn:li:dataset:(urn:li:dataPlatform:postgres,fooTable,PROD)\", \"batchAssertionResult\": {\"success\": true, \"actualAggValue\": 93, \"nativeResults\": {}, \"externalUrl\": \"http://example.com/uuid1\"}, \"nativeEvaluatorRunId\": \"uuid1\", \"runtimeContext\": {}}",
"contentType": "application/json"
}
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,fooTable,PROD)",
"changeType": "UPSERT",
"aspectName": "assertionResult",
"aspect": {
"value": "{\"timestampMillis\": 1643880726872, \"partitionSpec\": {\"type\": \"PARTITION\", \"partition\": \"[{'country': 'IN'}]\"}, \"messageId\": \"1643880726872\", \"assertionUrn\": \"urn:li:assertion:2d3b06a6e77e1f24adc9860a05ea089b\", \"asserteeUrn\": \"urn:li:dataset:(urn:li:dataPlatform:postgres,fooTable,PROD)\", \"batchAssertionResult\": {\"success\": true, \"actualAggValue\": 90, \"nativeResults\": {}, \"externalUrl\": \"http://example.com/uuid1\"}, \"nativeEvaluatorRunId\": \"uuid1\", \"runtimeContext\": {}}",
"contentType": "application/json"
}
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,fooTable,PROD)",
"changeType": "UPSERT",
"aspectName": "assertionResult",
"aspect": {
"value": "{\"timestampMillis\": 1643880726874, \"partitionSpec\": {\"type\": \"PARTITION\", \"partition\": \"[{'country': 'US'}]\"}, \"messageId\": \"1643880726874\", \"assertionUrn\": \"urn:li:assertion:2d3b06a6e77e1f24adc9860a05ea089b\", \"asserteeUrn\": \"urn:li:dataset:(urn:li:dataPlatform:postgres,fooTable,PROD)\", \"batchAssertionResult\": {\"success\": false, \"actualAggValue\": 101, \"nativeResults\": {}, \"externalUrl\": \"http://example.com/uuid1\"}, \"nativeEvaluatorRunId\": \"uuid1\", \"runtimeContext\": {}}",
"contentType": "application/json"
}
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,fooTable,PROD)",
"changeType": "UPSERT",
"aspectName": "assertionResult",
"aspect": {
"value": "{\"timestampMillis\": 1643880726875, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"messageId\": \"1643880726875\", \"assertionUrn\": \"urn:li:assertion:2d3b06a6e77e1f24adc9860a05ea089b\", \"asserteeUrn\": \"urn:li:dataset:(urn:li:dataPlatform:postgres,fooTable,PROD)\", \"batchAssertionResult\": {\"success\": true, \"actualAggValue\": 93, \"nativeResults\": {}, \"externalUrl\": \"http://example.com/uuid1\"}, \"nativeEvaluatorRunId\": \"uuid1\", \"runtimeContext\": {}}",
"contentType": "application/json"
}
}
]

View File

@ -23,4 +23,15 @@ record TestEntityProfile includes TimeseriesAspectBase {
"key":"key"
}
componentProfiles: array[TestEntityComponentProfile]
@TimeseriesField = {}
aComplexNestedRecord: record ComplexNestedRecord {
type: enum BatchType {
QUERY_BATCH,
PARTITION_BATCH,
FULL_TABLE,
_NATIVE_
}
partitions: array[map[string, string]]
}
}