mirror of
				https://github.com/datahub-project/datahub.git
				synced 2025-11-03 20:27:50 +00:00 
			
		
		
		
	feat(model): data quality model (#3787)
Co-authored-by: Ravindra Lanka <rlanka@acryl.io> Co-authored-by: Mayuri N <mayuri.nehate@gslab.com>
This commit is contained in:
		
							parent
							
								
									ded16809da
								
							
						
					
					
						commit
						4c24f386a6
					
				
							
								
								
									
										134
									
								
								metadata-ingestion/examples/library/data_quality_mcpw_rest.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										134
									
								
								metadata-ingestion/examples/library/data_quality_mcpw_rest.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,134 @@
 | 
			
		||||
import time
 | 
			
		||||
 | 
			
		||||
import datahub.emitter.mce_builder as builder
 | 
			
		||||
from datahub.emitter.mcp import MetadataChangeProposalWrapper
 | 
			
		||||
from datahub.emitter.rest_emitter import DatahubRestEmitter
 | 
			
		||||
from datahub.metadata.com.linkedin.pegasus2avro.assertion import (
 | 
			
		||||
    AssertionInfo,
 | 
			
		||||
    AssertionResult,
 | 
			
		||||
    AssertionScope,
 | 
			
		||||
    AssertionStdOperator,
 | 
			
		||||
    AssertionType,
 | 
			
		||||
    BatchAssertionResult,
 | 
			
		||||
    DatasetColumnAssertion,
 | 
			
		||||
    DatasetColumnStdAggFunc,
 | 
			
		||||
)
 | 
			
		||||
from datahub.metadata.com.linkedin.pegasus2avro.events.metadata import ChangeType
 | 
			
		||||
from datahub.metadata.schema_classes import PartitionSpecClass
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def datasetUrn(tbl: str) -> str:
 | 
			
		||||
    return builder.make_dataset_urn("postgres", tbl)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def fldUrn(tbl: str, fld: str) -> str:
 | 
			
		||||
    return f"urn:li:schemaField:({datasetUrn(tbl)}, {fld})"
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def assertionUrn(info: AssertionInfo) -> str:
 | 
			
		||||
    assertionId = builder.datahub_guid(info.to_obj())
 | 
			
		||||
    return builder.make_assertion_urn(assertionId)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def emitAssertionResult(
 | 
			
		||||
    assertionResult: AssertionResult, datasetUrn: str
 | 
			
		||||
) -> None:
 | 
			
		||||
 | 
			
		||||
    dataset_assertionResult_mcp = MetadataChangeProposalWrapper(
 | 
			
		||||
        entityType="dataset",
 | 
			
		||||
        changeType=ChangeType.UPSERT,
 | 
			
		||||
        entityUrn=datasetUrn,
 | 
			
		||||
        aspectName="assertionResult",
 | 
			
		||||
        aspect=assertionResult,
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    # Emit BatchAssertion Result! (timseries aspect)
 | 
			
		||||
    emitter.emit_mcp(dataset_assertionResult_mcp)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# Construct an assertion object.
 | 
			
		||||
assertion_maxVal = AssertionInfo(
 | 
			
		||||
    datasetFields=[fldUrn("fooTable", "col1")],
 | 
			
		||||
    datasets=[datasetUrn("fooTable")],
 | 
			
		||||
    assertionType=AssertionType(
 | 
			
		||||
        scope=AssertionScope.DATASET_COLUMN,
 | 
			
		||||
        datasetColumnAssertion=DatasetColumnAssertion(
 | 
			
		||||
            stdOperator=AssertionStdOperator.LESS_THAN,
 | 
			
		||||
            nativeOperator="column_value_is_less_than",
 | 
			
		||||
            stdAggFunc=DatasetColumnStdAggFunc.IDENTITY,
 | 
			
		||||
        ),
 | 
			
		||||
    ),
 | 
			
		||||
    assertionParameters={"max_value": "99"},
 | 
			
		||||
    customProperties={"suite_name": "demo_suite"},
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
# Construct a MetadataChangeProposalWrapper object.
 | 
			
		||||
assertion_maxVal_mcp = MetadataChangeProposalWrapper(
 | 
			
		||||
    entityType="assertion",
 | 
			
		||||
    changeType=ChangeType.UPSERT,
 | 
			
		||||
    entityUrn=assertionUrn(assertion_maxVal),
 | 
			
		||||
    aspectName="assertionInfo",
 | 
			
		||||
    aspect=assertion_maxVal,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
# Create an emitter to the GMS REST API.
 | 
			
		||||
emitter = DatahubRestEmitter("http://localhost:8080")
 | 
			
		||||
 | 
			
		||||
# Emit Assertion entity info object!
 | 
			
		||||
emitter.emit_mcp(assertion_maxVal_mcp)
 | 
			
		||||
 | 
			
		||||
# Construct batch assertion result object for partition 1 batch
 | 
			
		||||
assertionResult_maxVal_batch_partition1 = AssertionResult(
 | 
			
		||||
    timestampMillis=int(time.time() * 1000),
 | 
			
		||||
    assertionUrn=assertionUrn(assertion_maxVal),
 | 
			
		||||
    asserteeUrn=datasetUrn("fooTable"),
 | 
			
		||||
    partitionSpec=PartitionSpecClass(partition=str([{"country": "IN"}])),
 | 
			
		||||
    nativeEvaluatorRunId="uuid1",
 | 
			
		||||
    batchAssertionResult=BatchAssertionResult(
 | 
			
		||||
        success=True,
 | 
			
		||||
        externalUrl="http://example.com/uuid1",
 | 
			
		||||
        actualAggValue=90,
 | 
			
		||||
    ),
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
emitAssertionResult(
 | 
			
		||||
    assertionResult_maxVal_batch_partition1,
 | 
			
		||||
    datasetUrn("fooTable"),
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
# Construct batch assertion result object for partition 2 batch
 | 
			
		||||
assertionResult_maxVal_batch_partition2 = AssertionResult(
 | 
			
		||||
    timestampMillis=int(time.time() * 1000),
 | 
			
		||||
    assertionUrn=assertionUrn(assertion_maxVal),
 | 
			
		||||
    asserteeUrn=datasetUrn("fooTable"),
 | 
			
		||||
    partitionSpec=PartitionSpecClass(partition=str([{"country": "US"}])),
 | 
			
		||||
    nativeEvaluatorRunId="uuid1",
 | 
			
		||||
    batchAssertionResult=BatchAssertionResult(
 | 
			
		||||
        success=False,
 | 
			
		||||
        externalUrl="http://example.com/uuid1",
 | 
			
		||||
        actualAggValue=101,
 | 
			
		||||
    ),
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
emitAssertionResult(
 | 
			
		||||
    assertionResult_maxVal_batch_partition2,
 | 
			
		||||
    datasetUrn("fooTable"),
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
# Construct batch assertion result object for full table batch.
 | 
			
		||||
assertionResult_maxVal_batch_fulltable = AssertionResult(
 | 
			
		||||
    timestampMillis=int(time.time() * 1000),
 | 
			
		||||
    assertionUrn=assertionUrn(assertion_maxVal),
 | 
			
		||||
    asserteeUrn=datasetUrn("fooTable"),
 | 
			
		||||
    nativeEvaluatorRunId="uuid1",
 | 
			
		||||
    batchAssertionResult=BatchAssertionResult(
 | 
			
		||||
        success=True,
 | 
			
		||||
        externalUrl="http://example.com/uuid1",
 | 
			
		||||
        actualAggValue=93,
 | 
			
		||||
    ),
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
emitAssertionResult(
 | 
			
		||||
    assertionResult_maxVal_batch_fulltable,
 | 
			
		||||
    datasetUrn("fooTable"),
 | 
			
		||||
)
 | 
			
		||||
@ -1,14 +1,17 @@
 | 
			
		||||
"""Convenience functions for creating MCEs"""
 | 
			
		||||
import json
 | 
			
		||||
import logging
 | 
			
		||||
import re
 | 
			
		||||
import time
 | 
			
		||||
from enum import Enum
 | 
			
		||||
from hashlib import md5
 | 
			
		||||
from typing import Any, List, Optional, Type, TypeVar, Union, cast, get_type_hints
 | 
			
		||||
 | 
			
		||||
import typing_inspect
 | 
			
		||||
from avrogen.dict_wrapper import DictWrapper
 | 
			
		||||
 | 
			
		||||
from datahub.configuration.source_common import DEFAULT_ENV as DEFAULT_ENV_CONFIGURATION
 | 
			
		||||
from datahub.emitter.serialization_helper import pre_json_transform
 | 
			
		||||
from datahub.metadata.com.linkedin.pegasus2avro.common import GlossaryTerms
 | 
			
		||||
from datahub.metadata.schema_classes import (
 | 
			
		||||
    AuditStampClass,
 | 
			
		||||
@ -82,6 +85,18 @@ def dataset_urn_to_key(dataset_urn: str) -> Optional[DatasetKeyClass]:
 | 
			
		||||
    return None
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def datahub_guid(obj: dict) -> str:
 | 
			
		||||
    obj_str = json.dumps(
 | 
			
		||||
        pre_json_transform(obj), separators=(",", ":"), sort_keys=True
 | 
			
		||||
    ).encode("utf-8")
 | 
			
		||||
    datahub_guid = md5(obj_str).hexdigest()
 | 
			
		||||
    return datahub_guid
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def make_assertion_urn(assertion_id: str) -> str:
 | 
			
		||||
    return f"urn:li:assertion:{assertion_id}"
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def make_user_urn(username: str) -> str:
 | 
			
		||||
    return f"urn:li:corpuser:{username}"
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							@ -1117,7 +1117,7 @@
 | 
			
		||||
    "changeType": "UPSERT",
 | 
			
		||||
    "aspectName": "datasetProfile",
 | 
			
		||||
    "aspect": {
 | 
			
		||||
      "value": "{\"timestampMillis\": 1586847600000, \"rowCount\": 10, \"columnCount\": 6, \"fieldProfiles\": [{\"fieldPath\": \"emp_no\", \"uniqueCount\": 10, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"10001\", \"10002\", \"10003\", \"10004\", \"10005\", \"10006\", \"10007\", \"10008\", \"10009\", \"10010\"]}, {\"fieldPath\": \"birth_date\", \"uniqueCount\": 10, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"min\": \"1952-04-19\", \"max\": \"1964-06-02\", \"sampleValues\": [\"1953-09-02\", \"1964-06-02\", \"1959-12-03\", \"1954-05-01\", \"1955-01-21\", \"1953-04-20\", \"1957-05-23\", \"1958-02-19\", \"1952-04-19\", \"1963-06-01\"]}, {\"fieldPath\": \"first_name\", \"uniqueCount\": 10, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"Georgi\", \"Bezalel\", \"Parto\", \"Chirstian\", \"Kyoichi\", \"Anneke\", \"Tzvetan\", \"Saniya\", \"Sumant\", \"Duangkaew\"]}, {\"fieldPath\": \"last_name\", \"uniqueCount\": 10, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"Facello\", \"Simmel\", \"Bamford\", \"Koblick\", \"Maliniak\", \"Preusig\", \"Zielinski\", \"Kalloufi\", \"Peac\", \"Piveteau\"]}, {\"fieldPath\": \"gender\", \"uniqueCount\": 2, \"uniqueProportion\": 0.2, \"nullCount\": 0, \"nullProportion\": 0.0, \"distinctValueFrequencies\": [{\"value\": \"M\", \"frequency\": 5}, {\"value\": \"F\", \"frequency\": 5}], \"sampleValues\": [\"M\", \"F\", \"M\", \"M\", \"M\", \"F\", \"F\", \"M\", \"F\", \"F\"]}, {\"fieldPath\": \"hire_date\", \"uniqueCount\": 10, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"min\": \"1985-02-18\", \"max\": \"1994-09-15\", \"sampleValues\": [\"1986-06-26\", \"1985-11-21\", \"1986-08-28\", \"1986-12-01\", \"1989-09-12\", \"1989-06-02\", \"1989-02-10\", \"1994-09-15\", \"1985-02-18\", \"1989-08-24\"]}]}",
 | 
			
		||||
      "value": "{\"timestampMillis\": 1586847600000, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"rowCount\": 10, \"columnCount\": 6, \"fieldProfiles\": [{\"fieldPath\": \"emp_no\", \"uniqueCount\": 10, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"10001\", \"10002\", \"10003\", \"10004\", \"10005\", \"10006\", \"10007\", \"10008\", \"10009\", \"10010\"]}, {\"fieldPath\": \"birth_date\", \"uniqueCount\": 10, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"min\": \"1952-04-19\", \"max\": \"1964-06-02\", \"sampleValues\": [\"1953-09-02\", \"1964-06-02\", \"1959-12-03\", \"1954-05-01\", \"1955-01-21\", \"1953-04-20\", \"1957-05-23\", \"1958-02-19\", \"1952-04-19\", \"1963-06-01\"]}, {\"fieldPath\": \"first_name\", \"uniqueCount\": 10, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"Georgi\", \"Bezalel\", \"Parto\", \"Chirstian\", \"Kyoichi\", \"Anneke\", \"Tzvetan\", \"Saniya\", \"Sumant\", \"Duangkaew\"]}, {\"fieldPath\": \"last_name\", \"uniqueCount\": 10, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"Facello\", \"Simmel\", \"Bamford\", \"Koblick\", \"Maliniak\", \"Preusig\", \"Zielinski\", \"Kalloufi\", \"Peac\", \"Piveteau\"]}, {\"fieldPath\": \"gender\", \"uniqueCount\": 2, \"uniqueProportion\": 0.2, \"nullCount\": 0, \"nullProportion\": 0.0, \"distinctValueFrequencies\": [{\"value\": \"M\", \"frequency\": 5}, {\"value\": \"F\", \"frequency\": 5}], \"sampleValues\": [\"M\", \"F\", \"M\", \"M\", \"M\", \"F\", \"F\", \"M\", \"F\", \"F\"]}, {\"fieldPath\": \"hire_date\", \"uniqueCount\": 10, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"min\": \"1985-02-18\", \"max\": \"1994-09-15\", \"sampleValues\": [\"1986-06-26\", \"1985-11-21\", \"1986-08-28\", \"1986-12-01\", \"1989-09-12\", \"1989-06-02\", \"1989-02-10\", \"1994-09-15\", \"1985-02-18\", \"1989-08-24\"]}]}",
 | 
			
		||||
      "contentType": "application/json"
 | 
			
		||||
    },
 | 
			
		||||
    "systemMetadata": {
 | 
			
		||||
@ -1136,7 +1136,7 @@
 | 
			
		||||
    "changeType": "UPSERT",
 | 
			
		||||
    "aspectName": "datasetProfile",
 | 
			
		||||
    "aspect": {
 | 
			
		||||
      "value": "{\"timestampMillis\": 1586847600000, \"rowCount\": 112, \"columnCount\": 4, \"fieldProfiles\": [{\"fieldPath\": \"emp_no\", \"uniqueCount\": 10, \"uniqueProportion\": 0.08928571428571429, \"nullCount\": 0, \"nullProportion\": 0.0, \"min\": \"10001\", \"max\": \"10010\", \"mean\": \"10005.3125\", \"median\": \"10005.0\", \"stdev\": \"2.834889609688869\", \"distinctValueFrequencies\": [{\"value\": \"10001\", \"frequency\": 17}, {\"value\": \"10002\", \"frequency\": 6}, {\"value\": \"10003\", \"frequency\": 7}, {\"value\": \"10004\", \"frequency\": 16}, {\"value\": \"10005\", \"frequency\": 13}, {\"value\": \"10006\", \"frequency\": 12}, {\"value\": \"10007\", \"frequency\": 14}, {\"value\": \"10008\", \"frequency\": 3}, {\"value\": \"10009\", \"frequency\": 18}, {\"value\": \"10010\", \"frequency\": 6}], \"sampleValues\": [\"10001\", \"10001\", \"10001\", \"10001\", \"10001\", \"10001\", \"10001\", \"10001\", \"10001\", \"10001\", \"10001\", \"10001\", \"10001\", \"10001\", \"10001\", \"10001\", \"10001\", \"10002\", \"10002\", \"10002\"]}, {\"fieldPath\": \"salary\", \"uniqueCount\": 111, \"uniqueProportion\": 0.9910714285714286, \"nullCount\": 0, \"nullProportion\": 0.0, \"min\": \"40000\", \"max\": \"94692\", \"mean\": \"68303.11607142857\", \"median\": \"69544.0\", \"stdev\": \"15505.291475014095\", \"sampleValues\": [\"60117\", \"62102\", \"66074\", \"66596\", \"66961\", \"71046\", \"74333\", \"75286\", \"75994\", \"76884\", \"80013\", \"81025\", \"81097\", \"84917\", \"85112\", \"85097\", \"88958\", \"65909\", \"65909\", \"67534\"]}, {\"fieldPath\": \"from_date\", \"uniqueCount\": 106, \"uniqueProportion\": 0.9464285714285714, \"nullCount\": 0, \"nullProportion\": 0.0, \"min\": \"1985-02-18\", \"max\": \"2002-06-22\", \"sampleValues\": [\"1986-06-26\", \"1987-06-26\", \"1988-06-25\", \"1989-06-25\", \"1990-06-25\", \"1991-06-25\", \"1992-06-24\", \"1993-06-24\", \"1994-06-24\", \"1995-06-24\", \"1996-06-23\", \"1997-06-23\", \"1998-06-23\", \"1999-06-23\", \"2000-06-22\", \"2001-06-22\", \"2002-06-22\", \"1996-08-03\", \"1997-08-03\", \"1998-08-03\"]}, {\"fieldPath\": \"to_date\", \"uniqueCount\": 99, \"uniqueProportion\": 0.8839285714285714, \"nullCount\": 0, \"nullProportion\": 0.0, \"min\": \"1986-02-18\", \"max\": \"9999-01-01\", \"sampleValues\": [\"1987-06-26\", \"1988-06-25\", \"1989-06-25\", \"1990-06-25\", \"1991-06-25\", \"1992-06-24\", \"1993-06-24\", \"1994-06-24\", \"1995-06-24\", \"1996-06-23\", \"1997-06-23\", \"1998-06-23\", \"1999-06-23\", \"2000-06-22\", \"2001-06-22\", \"2002-06-22\", \"9999-01-01\", \"1997-08-03\", \"1998-08-03\", \"1999-08-03\"]}]}",
 | 
			
		||||
      "value": "{\"timestampMillis\": 1586847600000, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"rowCount\": 112, \"columnCount\": 4, \"fieldProfiles\": [{\"fieldPath\": \"emp_no\", \"uniqueCount\": 10, \"uniqueProportion\": 0.08928571428571429, \"nullCount\": 0, \"nullProportion\": 0.0, \"min\": \"10001\", \"max\": \"10010\", \"mean\": \"10005.3125\", \"median\": \"10005.0\", \"stdev\": \"2.834889609688869\", \"distinctValueFrequencies\": [{\"value\": \"10001\", \"frequency\": 17}, {\"value\": \"10002\", \"frequency\": 6}, {\"value\": \"10003\", \"frequency\": 7}, {\"value\": \"10004\", \"frequency\": 16}, {\"value\": \"10005\", \"frequency\": 13}, {\"value\": \"10006\", \"frequency\": 12}, {\"value\": \"10007\", \"frequency\": 14}, {\"value\": \"10008\", \"frequency\": 3}, {\"value\": \"10009\", \"frequency\": 18}, {\"value\": \"10010\", \"frequency\": 6}], \"sampleValues\": [\"10001\", \"10001\", \"10001\", \"10001\", \"10001\", \"10001\", \"10001\", \"10001\", \"10001\", \"10001\", \"10001\", \"10001\", \"10001\", \"10001\", \"10001\", \"10001\", \"10001\", \"10002\", \"10002\", \"10002\"]}, {\"fieldPath\": \"salary\", \"uniqueCount\": 111, \"uniqueProportion\": 0.9910714285714286, \"nullCount\": 0, \"nullProportion\": 0.0, \"min\": \"40000\", \"max\": \"94692\", \"mean\": \"68303.11607142857\", \"median\": \"69544.0\", \"stdev\": \"15505.291475014095\", \"sampleValues\": [\"60117\", \"62102\", \"66074\", \"66596\", \"66961\", \"71046\", \"74333\", \"75286\", \"75994\", \"76884\", \"80013\", \"81025\", \"81097\", \"84917\", \"85112\", \"85097\", \"88958\", \"65909\", \"65909\", \"67534\"]}, {\"fieldPath\": \"from_date\", \"uniqueCount\": 106, \"uniqueProportion\": 0.9464285714285714, \"nullCount\": 0, \"nullProportion\": 0.0, \"min\": \"1985-02-18\", \"max\": \"2002-06-22\", \"sampleValues\": [\"1986-06-26\", \"1987-06-26\", \"1988-06-25\", \"1989-06-25\", \"1990-06-25\", \"1991-06-25\", \"1992-06-24\", \"1993-06-24\", \"1994-06-24\", \"1995-06-24\", \"1996-06-23\", \"1997-06-23\", \"1998-06-23\", \"1999-06-23\", \"2000-06-22\", \"2001-06-22\", \"2002-06-22\", \"1996-08-03\", \"1997-08-03\", \"1998-08-03\"]}, {\"fieldPath\": \"to_date\", \"uniqueCount\": 99, \"uniqueProportion\": 0.8839285714285714, \"nullCount\": 0, \"nullProportion\": 0.0, \"min\": \"1986-02-18\", \"max\": \"9999-01-01\", \"sampleValues\": [\"1987-06-26\", \"1988-06-25\", \"1989-06-25\", \"1990-06-25\", \"1991-06-25\", \"1992-06-24\", \"1993-06-24\", \"1994-06-24\", \"1995-06-24\", \"1996-06-23\", \"1997-06-23\", \"1998-06-23\", \"1999-06-23\", \"2000-06-22\", \"2001-06-22\", \"2002-06-22\", \"9999-01-01\", \"1997-08-03\", \"1998-08-03\", \"1999-08-03\"]}]}",
 | 
			
		||||
      "contentType": "application/json"
 | 
			
		||||
    },
 | 
			
		||||
    "systemMetadata": {
 | 
			
		||||
@ -1155,7 +1155,7 @@
 | 
			
		||||
    "changeType": "UPSERT",
 | 
			
		||||
    "aspectName": "datasetProfile",
 | 
			
		||||
    "aspect": {
 | 
			
		||||
      "value": "{\"timestampMillis\": 1586847600000, \"rowCount\": 5, \"columnCount\": 6, \"fieldProfiles\": [{\"fieldPath\": \"id\", \"uniqueCount\": 5, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"1\", \"2\", \"3\", \"4\", \"5\"]}, {\"fieldPath\": \"company\", \"uniqueCount\": 5, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"Company A\", \"Company B\", \"Company C\", \"Company D\", \"Company E\"]}, {\"fieldPath\": \"last_name\", \"uniqueCount\": 5, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"Axen\", \"Bedecs\", \"Donnell\", \"Gratacos Solsona\", \"Lee\"]}, {\"fieldPath\": \"first_name\", \"uniqueCount\": 5, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"Anna\", \"Antonio\", \"Christina\", \"Martin\", \"Thomas\"]}, {\"fieldPath\": \"email_address\", \"uniqueCount\": 0, \"nullCount\": 5, \"nullProportion\": 1.0, \"sampleValues\": []}, {\"fieldPath\": \"priority\", \"uniqueCount\": 3, \"uniqueProportion\": 0.75, \"nullCount\": 1, \"nullProportion\": 0.2, \"min\": \"3.8\", \"max\": \"4.9\", \"mean\": \"4.175000011920929\", \"median\": \"4.0\", \"distinctValueFrequencies\": [{\"value\": \"3.8\", \"frequency\": 1}, {\"value\": \"4.0\", \"frequency\": 2}, {\"value\": \"4.9\", \"frequency\": 1}], \"sampleValues\": [\"4.0\", \"4.9\", \"4.0\", \"3.8\"]}]}",
 | 
			
		||||
      "value": "{\"timestampMillis\": 1586847600000, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"rowCount\": 5, \"columnCount\": 6, \"fieldProfiles\": [{\"fieldPath\": \"id\", \"uniqueCount\": 5, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"1\", \"2\", \"3\", \"4\", \"5\"]}, {\"fieldPath\": \"company\", \"uniqueCount\": 5, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"Company A\", \"Company B\", \"Company C\", \"Company D\", \"Company E\"]}, {\"fieldPath\": \"last_name\", \"uniqueCount\": 5, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"Axen\", \"Bedecs\", \"Donnell\", \"Gratacos Solsona\", \"Lee\"]}, {\"fieldPath\": \"first_name\", \"uniqueCount\": 5, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"Anna\", \"Antonio\", \"Christina\", \"Martin\", \"Thomas\"]}, {\"fieldPath\": \"email_address\", \"uniqueCount\": 0, \"nullCount\": 5, \"nullProportion\": 1.0, \"sampleValues\": []}, {\"fieldPath\": \"priority\", \"uniqueCount\": 3, \"uniqueProportion\": 0.75, \"nullCount\": 1, \"nullProportion\": 0.2, \"min\": \"3.8\", \"max\": \"4.9\", \"mean\": \"4.175000011920929\", \"median\": \"4.0\", \"distinctValueFrequencies\": [{\"value\": \"3.8\", \"frequency\": 1}, {\"value\": \"4.0\", \"frequency\": 2}, {\"value\": \"4.9\", \"frequency\": 1}], \"sampleValues\": [\"4.0\", \"4.9\", \"4.0\", \"3.8\"]}]}",
 | 
			
		||||
      "contentType": "application/json"
 | 
			
		||||
    },
 | 
			
		||||
    "systemMetadata": {
 | 
			
		||||
@ -1174,7 +1174,7 @@
 | 
			
		||||
    "changeType": "UPSERT",
 | 
			
		||||
    "aspectName": "datasetProfile",
 | 
			
		||||
    "aspect": {
 | 
			
		||||
      "value": "{\"timestampMillis\": 1586847600000, \"rowCount\": 0, \"columnCount\": 3, \"fieldProfiles\": [{\"fieldPath\": \"id\", \"uniqueCount\": 0, \"nullCount\": 0, \"sampleValues\": []}, {\"fieldPath\": \"description\", \"uniqueCount\": 0, \"nullCount\": 0, \"sampleValues\": []}, {\"fieldPath\": \"customer_id\", \"uniqueCount\": 0, \"nullCount\": 0, \"sampleValues\": []}]}",
 | 
			
		||||
      "value": "{\"timestampMillis\": 1586847600000, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"rowCount\": 0, \"columnCount\": 3, \"fieldProfiles\": [{\"fieldPath\": \"id\", \"uniqueCount\": 0, \"nullCount\": 0, \"sampleValues\": []}, {\"fieldPath\": \"description\", \"uniqueCount\": 0, \"nullCount\": 0, \"sampleValues\": []}, {\"fieldPath\": \"customer_id\", \"uniqueCount\": 0, \"nullCount\": 0, \"sampleValues\": []}]}",
 | 
			
		||||
      "contentType": "application/json"
 | 
			
		||||
    },
 | 
			
		||||
    "systemMetadata": {
 | 
			
		||||
@ -1193,7 +1193,7 @@
 | 
			
		||||
    "changeType": "UPSERT",
 | 
			
		||||
    "aspectName": "datasetProfile",
 | 
			
		||||
    "aspect": {
 | 
			
		||||
      "value": "{\"timestampMillis\": 1586847600000, \"rowCount\": 0, \"columnCount\": 1, \"fieldProfiles\": [{\"fieldPath\": \"dummy\", \"uniqueCount\": 0, \"nullCount\": 0, \"sampleValues\": []}]}",
 | 
			
		||||
      "value": "{\"timestampMillis\": 1586847600000, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"rowCount\": 0, \"columnCount\": 1, \"fieldProfiles\": [{\"fieldPath\": \"dummy\", \"uniqueCount\": 0, \"nullCount\": 0, \"sampleValues\": []}]}",
 | 
			
		||||
      "contentType": "application/json"
 | 
			
		||||
    },
 | 
			
		||||
    "systemMetadata": {
 | 
			
		||||
 | 
			
		||||
@ -7,7 +7,7 @@
 | 
			
		||||
    "changeType": "UPSERT",
 | 
			
		||||
    "aspectName": "operation",
 | 
			
		||||
    "aspect": {
 | 
			
		||||
        "value": "{\"timestampMillis\": 1631664000000, \"lastUpdatedTimestamp\": 1631664000000, \"actor\": \"urn:li:corpuser:test-name\", \"operationType\": \"INSERT\"}",
 | 
			
		||||
        "value": "{\"timestampMillis\": 1631664000000, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"lastUpdatedTimestamp\": 1631664000000, \"actor\": \"urn:li:corpuser:test-name\", \"operationType\": \"INSERT\"}",
 | 
			
		||||
        "contentType": "application/json"
 | 
			
		||||
    },
 | 
			
		||||
    "systemMetadata": {
 | 
			
		||||
@ -26,7 +26,7 @@
 | 
			
		||||
    "changeType": "UPSERT",
 | 
			
		||||
    "aspectName": "operation",
 | 
			
		||||
    "aspect": {
 | 
			
		||||
        "value": "{\"timestampMillis\": 1631664000000, \"lastUpdatedTimestamp\": 1631664000000, \"actor\": \"urn:li:corpuser:real_shirshanka\", \"operationType\": \"INSERT\"}",
 | 
			
		||||
        "value": "{\"timestampMillis\": 1631664000000, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"lastUpdatedTimestamp\": 1631664000000, \"actor\": \"urn:li:corpuser:real_shirshanka\", \"operationType\": \"INSERT\"}",
 | 
			
		||||
        "contentType": "application/json"
 | 
			
		||||
    },
 | 
			
		||||
    "systemMetadata": {
 | 
			
		||||
@ -45,7 +45,7 @@
 | 
			
		||||
    "changeType": "UPSERT",
 | 
			
		||||
    "aspectName": "operation",
 | 
			
		||||
    "aspect": {
 | 
			
		||||
        "value": "{\"timestampMillis\": 1631664000000, \"lastUpdatedTimestamp\": 1631664000000, \"actor\": \"urn:li:corpuser:test-name\", \"operationType\": \"DELETE\"}",
 | 
			
		||||
        "value": "{\"timestampMillis\": 1631664000000, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"lastUpdatedTimestamp\": 1631664000000, \"actor\": \"urn:li:corpuser:test-name\", \"operationType\": \"DELETE\"}",
 | 
			
		||||
        "contentType": "application/json"
 | 
			
		||||
    },
 | 
			
		||||
    "systemMetadata": {
 | 
			
		||||
@ -64,7 +64,7 @@
 | 
			
		||||
    "changeType": "UPSERT",
 | 
			
		||||
    "aspectName": "operation",
 | 
			
		||||
    "aspect": {
 | 
			
		||||
        "value": "{\"timestampMillis\": 1631664000000, \"lastUpdatedTimestamp\": 1631664000000, \"actor\": \"urn:li:corpuser:real_shirshanka\", \"operationType\": \"DELETE\"}",
 | 
			
		||||
        "value": "{\"timestampMillis\": 1631664000000, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"lastUpdatedTimestamp\": 1631664000000, \"actor\": \"urn:li:corpuser:real_shirshanka\", \"operationType\": \"DELETE\"}",
 | 
			
		||||
        "contentType": "application/json"
 | 
			
		||||
    },
 | 
			
		||||
    "systemMetadata": {
 | 
			
		||||
@ -83,7 +83,7 @@
 | 
			
		||||
    "changeType": "UPSERT",
 | 
			
		||||
    "aspectName": "datasetUsageStatistics",
 | 
			
		||||
    "aspect": {
 | 
			
		||||
        "value": "{\"timestampMillis\": 1631577600000, \"eventGranularity\": {\"unit\": \"DAY\", \"multiple\": 1}, \"uniqueUserCount\": 1, \"totalSqlQueries\": 1, \"topSqlQueries\": [\"select userid from users\"], \"userCounts\": [{\"user\": \"urn:li:corpuser:test-name\", \"count\": 1, \"userEmail\": \"test-name@acryl.io\"}], \"fieldCounts\": []}",
 | 
			
		||||
        "value": "{\"timestampMillis\": 1631577600000, \"eventGranularity\": {\"unit\": \"DAY\", \"multiple\": 1}, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"uniqueUserCount\": 1, \"totalSqlQueries\": 1, \"topSqlQueries\": [\"select userid from users\"], \"userCounts\": [{\"user\": \"urn:li:corpuser:test-name\", \"count\": 1, \"userEmail\": \"test-name@acryl.io\"}], \"fieldCounts\": []}",
 | 
			
		||||
        "contentType": "application/json"
 | 
			
		||||
    },
 | 
			
		||||
    "systemMetadata": {
 | 
			
		||||
@ -102,7 +102,7 @@
 | 
			
		||||
    "changeType": "UPSERT",
 | 
			
		||||
    "aspectName": "datasetUsageStatistics",
 | 
			
		||||
    "aspect": {
 | 
			
		||||
        "value": "{\"timestampMillis\": 1631577600000, \"eventGranularity\": {\"unit\": \"DAY\", \"multiple\": 1}, \"uniqueUserCount\": 1, \"totalSqlQueries\": 1, \"topSqlQueries\": [\"select catid from category\"], \"userCounts\": [{\"user\": \"urn:li:corpuser:real_shirshanka\", \"count\": 1, \"userEmail\": \"real_shirshanka@acryl.io\"}], \"fieldCounts\": []}",
 | 
			
		||||
        "value": "{\"timestampMillis\": 1631577600000, \"eventGranularity\": {\"unit\": \"DAY\", \"multiple\": 1}, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"uniqueUserCount\": 1, \"totalSqlQueries\": 1, \"topSqlQueries\": [\"select catid from category\"], \"userCounts\": [{\"user\": \"urn:li:corpuser:real_shirshanka\", \"count\": 1, \"userEmail\": \"real_shirshanka@acryl.io\"}], \"fieldCounts\": []}",
 | 
			
		||||
        "contentType": "application/json"
 | 
			
		||||
    },
 | 
			
		||||
    "systemMetadata": {
 | 
			
		||||
 | 
			
		||||
@ -7,7 +7,7 @@
 | 
			
		||||
    "changeType": "UPSERT",
 | 
			
		||||
    "aspectName": "datasetUsageStatistics",
 | 
			
		||||
    "aspect": {
 | 
			
		||||
        "value": "{\"timestampMillis\": 1634169600000, \"eventGranularity\": {\"unit\": \"DAY\", \"multiple\": 1}, \"uniqueUserCount\": 1, \"totalSqlQueries\": 2, \"topSqlQueries\": [\"select * from testcatalog.testschema.testtable limit 100\"], \"userCounts\": [{\"user\": \"urn:li:corpuser:test-name\", \"count\": 2, \"userEmail\": \"test-name@acryl.io\"}], \"fieldCounts\": [{\"fieldPath\": \"column1\", \"count\": 2}, {\"fieldPath\": \"column2\", \"count\": 2}]}",
 | 
			
		||||
        "value": "{\"timestampMillis\": 1634169600000, \"eventGranularity\": {\"unit\": \"DAY\", \"multiple\": 1}, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"uniqueUserCount\": 1, \"totalSqlQueries\": 2, \"topSqlQueries\": [\"select * from testcatalog.testschema.testtable limit 100\"], \"userCounts\": [{\"user\": \"urn:li:corpuser:test-name\", \"count\": 2, \"userEmail\": \"test-name@acryl.io\"}], \"fieldCounts\": [{\"fieldPath\": \"column1\", \"count\": 2}, {\"fieldPath\": \"column2\", \"count\": 2}]}",
 | 
			
		||||
        "contentType": "application/json"
 | 
			
		||||
    },
 | 
			
		||||
    "systemMetadata": {
 | 
			
		||||
 | 
			
		||||
@ -531,7 +531,7 @@
 | 
			
		||||
    "changeType": "UPSERT",
 | 
			
		||||
    "aspectName": "datasetProfile",
 | 
			
		||||
    "aspect": {
 | 
			
		||||
        "value": "{\"timestampMillis\": 1632398400000, \"rowCount\": 3, \"columnCount\": 6, \"fieldProfiles\": [{\"fieldPath\": \"id\", \"uniqueCount\": 3, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"1\", \"2\", \"3\"]}, {\"fieldPath\": \"name\", \"uniqueCount\": 3, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"Book 1\", \"Book 2\", \"Book 3\"]}, {\"fieldPath\": \"author\", \"uniqueCount\": 3, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"ABC\", \"PQR\", \"XYZ\"]}, {\"fieldPath\": \"publisher\", \"uniqueCount\": 0, \"nullCount\": 3, \"nullProportion\": 1.0, \"sampleValues\": []}, {\"fieldPath\": \"tags\", \"nullCount\": 3, \"nullProportion\": 1.0, \"sampleValues\": []}, {\"fieldPath\": \"genre_ids\", \"uniqueCount\": 0, \"nullCount\": 3, \"nullProportion\": 1.0, \"sampleValues\": []}]}",
 | 
			
		||||
        "value": "{\"timestampMillis\": 1632398400000, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"rowCount\": 3, \"columnCount\": 6, \"fieldProfiles\": [{\"fieldPath\": \"id\", \"uniqueCount\": 3, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"1\", \"2\", \"3\"]}, {\"fieldPath\": \"name\", \"uniqueCount\": 3, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"Book 1\", \"Book 2\", \"Book 3\"]}, {\"fieldPath\": \"author\", \"uniqueCount\": 3, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"ABC\", \"PQR\", \"XYZ\"]}, {\"fieldPath\": \"publisher\", \"uniqueCount\": 0, \"nullCount\": 3, \"nullProportion\": 1.0, \"sampleValues\": []}, {\"fieldPath\": \"tags\", \"nullCount\": 3, \"nullProportion\": 1.0, \"sampleValues\": []}, {\"fieldPath\": \"genre_ids\", \"uniqueCount\": 0, \"nullCount\": 3, \"nullProportion\": 1.0, \"sampleValues\": []}]}",
 | 
			
		||||
        "contentType": "application/json"
 | 
			
		||||
    },
 | 
			
		||||
    "systemMetadata": {
 | 
			
		||||
@ -550,7 +550,7 @@
 | 
			
		||||
    "changeType": "UPSERT",
 | 
			
		||||
    "aspectName": "datasetProfile",
 | 
			
		||||
    "aspect": {
 | 
			
		||||
        "value": "{\"timestampMillis\": 1632398400000, \"rowCount\": 2, \"columnCount\": 4, \"fieldProfiles\": [{\"fieldPath\": \"book_id\", \"uniqueCount\": 2, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"1\", \"2\"]}, {\"fieldPath\": \"member_id\", \"uniqueCount\": 2, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"1\", \"2\"]}, {\"fieldPath\": \"issue_date\", \"uniqueCount\": 1, \"uniqueProportion\": 0.5, \"nullCount\": 0, \"nullProportion\": 0.0, \"min\": \"2021-09-27\", \"max\": \"2021-09-27\", \"distinctValueFrequencies\": [{\"value\": \"2021-09-27\", \"frequency\": 2}], \"sampleValues\": [\"2021-09-27\", \"2021-09-27\"]}, {\"fieldPath\": \"return_date\", \"uniqueCount\": 1, \"uniqueProportion\": 1.0, \"nullCount\": 1, \"nullProportion\": 0.5, \"min\": \"2021-09-27\", \"max\": \"2021-09-27\", \"sampleValues\": [\"2021-09-27\"]}]}",
 | 
			
		||||
        "value": "{\"timestampMillis\": 1632398400000, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"rowCount\": 2, \"columnCount\": 4, \"fieldProfiles\": [{\"fieldPath\": \"book_id\", \"uniqueCount\": 2, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"1\", \"2\"]}, {\"fieldPath\": \"member_id\", \"uniqueCount\": 2, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"1\", \"2\"]}, {\"fieldPath\": \"issue_date\", \"uniqueCount\": 1, \"uniqueProportion\": 0.5, \"nullCount\": 0, \"nullProportion\": 0.0, \"min\": \"2021-09-27\", \"max\": \"2021-09-27\", \"distinctValueFrequencies\": [{\"value\": \"2021-09-27\", \"frequency\": 2}], \"sampleValues\": [\"2021-09-27\", \"2021-09-27\"]}, {\"fieldPath\": \"return_date\", \"uniqueCount\": 1, \"uniqueProportion\": 1.0, \"nullCount\": 1, \"nullProportion\": 0.5, \"min\": \"2021-09-27\", \"max\": \"2021-09-27\", \"sampleValues\": [\"2021-09-27\"]}]}",
 | 
			
		||||
        "contentType": "application/json"
 | 
			
		||||
    },
 | 
			
		||||
    "systemMetadata": {
 | 
			
		||||
@ -569,7 +569,7 @@
 | 
			
		||||
    "changeType": "UPSERT",
 | 
			
		||||
    "aspectName": "datasetProfile",
 | 
			
		||||
    "aspect": {
 | 
			
		||||
        "value": "{\"timestampMillis\": 1632398400000, \"rowCount\": 2, \"columnCount\": 2, \"fieldProfiles\": [{\"fieldPath\": \"id\", \"uniqueCount\": 2, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"1\", \"2\"]}, {\"fieldPath\": \"name\", \"uniqueCount\": 2, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"Member 1\", \"Member 2\"]}]}",
 | 
			
		||||
        "value": "{\"timestampMillis\": 1632398400000, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"rowCount\": 2, \"columnCount\": 2, \"fieldProfiles\": [{\"fieldPath\": \"id\", \"uniqueCount\": 2, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"1\", \"2\"]}, {\"fieldPath\": \"name\", \"uniqueCount\": 2, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"Member 1\", \"Member 2\"]}]}",
 | 
			
		||||
        "contentType": "application/json"
 | 
			
		||||
    },
 | 
			
		||||
    "systemMetadata": {
 | 
			
		||||
@ -588,7 +588,7 @@
 | 
			
		||||
    "changeType": "UPSERT",
 | 
			
		||||
    "aspectName": "datasetProfile",
 | 
			
		||||
    "aspect": {
 | 
			
		||||
        "value": "{\"timestampMillis\": 1632398400000, \"rowCount\": 1, \"columnCount\": 6, \"fieldProfiles\": [{\"fieldPath\": \"id\", \"uniqueCount\": 1, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"2\"]}, {\"fieldPath\": \"name\", \"uniqueCount\": 1, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"Book 2\"]}, {\"fieldPath\": \"author\", \"uniqueCount\": 1, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"PQR\"]}, {\"fieldPath\": \"publisher\", \"uniqueCount\": 0, \"nullCount\": 1, \"nullProportion\": 1.0, \"sampleValues\": []}, {\"fieldPath\": \"member_id\", \"uniqueCount\": 1, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"2\"]}, {\"fieldPath\": \"issue_date\", \"uniqueCount\": 1, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"min\": \"2021-09-27\", \"max\": \"2021-09-27\", \"sampleValues\": [\"2021-09-27\"]}]}",
 | 
			
		||||
        "value": "{\"timestampMillis\": 1632398400000, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"rowCount\": 1, \"columnCount\": 6, \"fieldProfiles\": [{\"fieldPath\": \"id\", \"uniqueCount\": 1, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"2\"]}, {\"fieldPath\": \"name\", \"uniqueCount\": 1, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"Book 2\"]}, {\"fieldPath\": \"author\", \"uniqueCount\": 1, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"PQR\"]}, {\"fieldPath\": \"publisher\", \"uniqueCount\": 0, \"nullCount\": 1, \"nullProportion\": 1.0, \"sampleValues\": []}, {\"fieldPath\": \"member_id\", \"uniqueCount\": 1, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"2\"]}, {\"fieldPath\": \"issue_date\", \"uniqueCount\": 1, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"min\": \"2021-09-27\", \"max\": \"2021-09-27\", \"sampleValues\": [\"2021-09-27\"]}]}",
 | 
			
		||||
        "contentType": "application/json"
 | 
			
		||||
    },
 | 
			
		||||
    "systemMetadata": {
 | 
			
		||||
 | 
			
		||||
@ -19,6 +19,9 @@ public class MappingsBuilder {
 | 
			
		||||
  public static final String EVENT_FIELD = "event";
 | 
			
		||||
  public static final String SYSTEM_METADATA_FIELD = "systemMetadata";
 | 
			
		||||
  public static final String IS_EXPLODED_FIELD = "isExploded";
 | 
			
		||||
  public static final String PARTITION_SPEC = "partitionSpec";
 | 
			
		||||
  public static final String PARTITION_SPEC_PARTITION = "partition";
 | 
			
		||||
  public static final String PARTITION_SPEC_TIME_PARTITION = "timePartition";
 | 
			
		||||
 | 
			
		||||
  private MappingsBuilder() {
 | 
			
		||||
  }
 | 
			
		||||
@ -36,6 +39,9 @@ public class MappingsBuilder {
 | 
			
		||||
    mappings.put(TIMESTAMP_FIELD, ImmutableMap.of("type", "date"));
 | 
			
		||||
    mappings.put(TIMESTAMP_MILLIS_FIELD, ImmutableMap.of("type", "date"));
 | 
			
		||||
    mappings.put(EVENT_GRANULARITY, ImmutableMap.of("type", "keyword"));
 | 
			
		||||
    mappings.put(PARTITION_SPEC, ImmutableMap.of("properties",
 | 
			
		||||
        ImmutableMap.of(PARTITION_SPEC_PARTITION, ImmutableMap.of("type", "keyword"), PARTITION_SPEC_TIME_PARTITION,
 | 
			
		||||
            ImmutableMap.of("type", "keyword"))));
 | 
			
		||||
    mappings.put(EVENT_FIELD, ImmutableMap.of("type", "object", "enabled", false));
 | 
			
		||||
    mappings.put(SYSTEM_METADATA_FIELD, ImmutableMap.of("type", "object", "enabled", false));
 | 
			
		||||
    mappings.put(IS_EXPLODED_FIELD, ImmutableMap.of("type", "boolean"));
 | 
			
		||||
 | 
			
		||||
@ -12,6 +12,7 @@ import com.linkedin.metadata.models.TimeseriesFieldSpec;
 | 
			
		||||
import com.linkedin.metadata.models.registry.EntityRegistry;
 | 
			
		||||
import com.linkedin.metadata.query.filter.Filter;
 | 
			
		||||
import com.linkedin.metadata.search.utils.ESUtils;
 | 
			
		||||
import com.linkedin.metadata.timeseries.elastic.indexbuilder.MappingsBuilder;
 | 
			
		||||
import com.linkedin.metadata.utils.elasticsearch.IndexConvention;
 | 
			
		||||
import com.linkedin.timeseries.AggregationSpec;
 | 
			
		||||
import com.linkedin.timeseries.GenericTable;
 | 
			
		||||
@ -190,6 +191,16 @@ public class ESAggregatedStatsDAO {
 | 
			
		||||
        return timeseriesFieldCollectionSpec.getPegasusSchema().getType();
 | 
			
		||||
      }
 | 
			
		||||
    } else if (memberParts.length == 2) {
 | 
			
		||||
      // Check if partitionSpec
 | 
			
		||||
      if (memberParts[0].equals(MappingsBuilder.PARTITION_SPEC)) {
 | 
			
		||||
        if (memberParts[1].equals(MappingsBuilder.PARTITION_SPEC_PARTITION) || memberParts[1].equals(
 | 
			
		||||
            MappingsBuilder.PARTITION_SPEC_TIME_PARTITION)) {
 | 
			
		||||
          return DataSchema.Type.STRING;
 | 
			
		||||
        } else {
 | 
			
		||||
          throw new IllegalArgumentException("Unknown partitionSpec member" + memberParts[1]);
 | 
			
		||||
        }
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      // This is either a collection key/stat.
 | 
			
		||||
      TimeseriesFieldCollectionSpec timeseriesFieldCollectionSpec =
 | 
			
		||||
          aspectSpec.getTimeseriesFieldCollectionSpecMap().get(memberParts[0]);
 | 
			
		||||
 | 
			
		||||
@ -1,5 +1,6 @@
 | 
			
		||||
package com.linkedin.metadata.timeseries.transformer;
 | 
			
		||||
 | 
			
		||||
import com.datahub.util.RecordUtils;
 | 
			
		||||
import com.fasterxml.jackson.core.JsonProcessingException;
 | 
			
		||||
import com.fasterxml.jackson.databind.JsonNode;
 | 
			
		||||
import com.fasterxml.jackson.databind.ObjectMapper;
 | 
			
		||||
@ -7,10 +8,10 @@ import com.fasterxml.jackson.databind.node.ArrayNode;
 | 
			
		||||
import com.fasterxml.jackson.databind.node.JsonNodeFactory;
 | 
			
		||||
import com.fasterxml.jackson.databind.node.ObjectNode;
 | 
			
		||||
import com.linkedin.common.urn.Urn;
 | 
			
		||||
import com.linkedin.data.DataMap;
 | 
			
		||||
import com.linkedin.data.schema.ArrayDataSchema;
 | 
			
		||||
import com.linkedin.data.schema.DataSchema;
 | 
			
		||||
import com.linkedin.data.template.RecordTemplate;
 | 
			
		||||
import com.datahub.util.RecordUtils;
 | 
			
		||||
import com.linkedin.metadata.extractor.FieldExtractor;
 | 
			
		||||
import com.linkedin.metadata.models.AspectSpec;
 | 
			
		||||
import com.linkedin.metadata.models.TimeseriesFieldCollectionSpec;
 | 
			
		||||
@ -87,6 +88,30 @@ public class TimeseriesAspectTransformer {
 | 
			
		||||
        throw new IllegalArgumentException("Failed to convert eventGranulairty to Json string!", e);
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    // PartitionSpec handling
 | 
			
		||||
    DataMap partitionSpec = (DataMap) timeseriesAspect.data().get(MappingsBuilder.PARTITION_SPEC);
 | 
			
		||||
    if (partitionSpec != null) {
 | 
			
		||||
      Object partition = partitionSpec.get(MappingsBuilder.PARTITION_SPEC_PARTITION);
 | 
			
		||||
      Object timePartition = partitionSpec.get(MappingsBuilder.PARTITION_SPEC_TIME_PARTITION);
 | 
			
		||||
      if (partition != null && timePartition != null) {
 | 
			
		||||
        throw new IllegalArgumentException("Both partition and timePartition cannot be specified in partitionSpec!");
 | 
			
		||||
      } else if (partition != null) {
 | 
			
		||||
        ObjectNode partitionDoc = JsonNodeFactory.instance.objectNode();
 | 
			
		||||
        partitionDoc.put(MappingsBuilder.PARTITION_SPEC_PARTITION, partition.toString());
 | 
			
		||||
        document.set(MappingsBuilder.PARTITION_SPEC, partitionDoc);
 | 
			
		||||
      } else if (timePartition != null) {
 | 
			
		||||
        ObjectNode timePartitionDoc = JsonNodeFactory.instance.objectNode();
 | 
			
		||||
        try {
 | 
			
		||||
          timePartitionDoc.put(MappingsBuilder.PARTITION_SPEC_TIME_PARTITION,
 | 
			
		||||
              OBJECT_MAPPER.writeValueAsString(timePartition));
 | 
			
		||||
        } catch (JsonProcessingException e) {
 | 
			
		||||
          throw new IllegalArgumentException("Failed to convert timePartition to Json string!", e);
 | 
			
		||||
        }
 | 
			
		||||
        document.set(MappingsBuilder.PARTITION_SPEC, timePartitionDoc);
 | 
			
		||||
      } else {
 | 
			
		||||
        throw new IllegalArgumentException("Both partition and timePartition cannot be null in partitionSpec.");
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    String messageId = (String) timeseriesAspect.data().get(MappingsBuilder.MESSAGE_ID_FIELD);
 | 
			
		||||
    if (messageId != null) {
 | 
			
		||||
      document.put(MappingsBuilder.MESSAGE_ID_FIELD, messageId);
 | 
			
		||||
@ -198,6 +223,10 @@ public class TimeseriesAspectTransformer {
 | 
			
		||||
    if (messageId != null) {
 | 
			
		||||
      docId += messageId.toString();
 | 
			
		||||
    }
 | 
			
		||||
    JsonNode partitionSpec = document.get(MappingsBuilder.PARTITION_SPEC);
 | 
			
		||||
    if (partitionSpec != null) {
 | 
			
		||||
      docId += partitionSpec.toString();
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return DigestUtils.md5Hex(docId);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
@ -1,5 +1,7 @@
 | 
			
		||||
package com.linkedin.metadata.timeseries.elastic;
 | 
			
		||||
 | 
			
		||||
import com.datahub.test.BatchType;
 | 
			
		||||
import com.datahub.test.ComplexNestedRecord;
 | 
			
		||||
import com.datahub.test.TestEntityComponentProfile;
 | 
			
		||||
import com.datahub.test.TestEntityComponentProfileArray;
 | 
			
		||||
import com.datahub.test.TestEntityProfile;
 | 
			
		||||
@ -11,6 +13,8 @@ import com.linkedin.common.urn.TestEntityUrn;
 | 
			
		||||
import com.linkedin.common.urn.Urn;
 | 
			
		||||
import com.linkedin.data.template.StringArray;
 | 
			
		||||
import com.linkedin.data.template.StringArrayArray;
 | 
			
		||||
import com.linkedin.data.template.StringMap;
 | 
			
		||||
import com.linkedin.data.template.StringMapArray;
 | 
			
		||||
import com.linkedin.metadata.aspect.EnvelopedAspect;
 | 
			
		||||
import com.linkedin.metadata.models.AspectSpec;
 | 
			
		||||
import com.linkedin.metadata.models.DataSchemaFactory;
 | 
			
		||||
@ -155,6 +159,15 @@ public class ElasticSearchTimeseriesAspectServiceTest {
 | 
			
		||||
    componentProfile2.setKey("col2");
 | 
			
		||||
    componentProfile2.setStat(stat + 2);
 | 
			
		||||
    testEntityProfile.setComponentProfiles(new TestEntityComponentProfileArray(componentProfile1, componentProfile2));
 | 
			
		||||
 | 
			
		||||
    StringMap stringMap1 = new StringMap();
 | 
			
		||||
    stringMap1.put("p_key1", "p_val1");
 | 
			
		||||
    StringMap stringMap2 = new StringMap();
 | 
			
		||||
    stringMap2.put("p_key2", "p_val2");
 | 
			
		||||
    ComplexNestedRecord nestedRecord = new ComplexNestedRecord().setType(BatchType.PARTITION_BATCH)
 | 
			
		||||
        .setPartitions(new StringMapArray(stringMap1, stringMap2));
 | 
			
		||||
    testEntityProfile.setAComplexNestedRecord(nestedRecord);
 | 
			
		||||
 | 
			
		||||
    return testEntityProfile;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
@ -346,6 +359,50 @@ public class ElasticSearchTimeseriesAspectServiceTest {
 | 
			
		||||
        _testEntityProfiles.get(_startTime + 23 * TIME_INCREMENT).getStat().toString())));
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  @Test(groups = {"getAggregatedStats"}, dependsOnGroups = {"upsert"})
 | 
			
		||||
  public void testGetAggregatedStatsLatestAComplexNestedRecordForDay1() {
 | 
			
		||||
    // Filter is only on the urn
 | 
			
		||||
    Criterion hasUrnCriterion =
 | 
			
		||||
        new Criterion().setField("urn").setCondition(Condition.EQUAL).setValue(TEST_URN.toString());
 | 
			
		||||
    Criterion startTimeCriterion = new Criterion().setField(ES_FILED_TIMESTAMP)
 | 
			
		||||
        .setCondition(Condition.GREATER_THAN_OR_EQUAL_TO)
 | 
			
		||||
        .setValue(_startTime.toString());
 | 
			
		||||
    Criterion endTimeCriterion = new Criterion().setField(ES_FILED_TIMESTAMP)
 | 
			
		||||
        .setCondition(Condition.LESS_THAN_OR_EQUAL_TO)
 | 
			
		||||
        .setValue(String.valueOf(_startTime + 23 * TIME_INCREMENT));
 | 
			
		||||
 | 
			
		||||
    Filter filter =
 | 
			
		||||
        QueryUtils.getFilterFromCriteria(ImmutableList.of(hasUrnCriterion, startTimeCriterion, endTimeCriterion));
 | 
			
		||||
 | 
			
		||||
    // Aggregate on latest stat value
 | 
			
		||||
    AggregationSpec latestStatAggregationSpec =
 | 
			
		||||
        new AggregationSpec().setAggregationType(AggregationType.LATEST).setFieldPath("aComplexNestedRecord");
 | 
			
		||||
 | 
			
		||||
    // Grouping bucket is only timestamp filed.
 | 
			
		||||
    GroupingBucket timestampBucket = new GroupingBucket().setKey(ES_FILED_TIMESTAMP)
 | 
			
		||||
        .setType(GroupingBucketType.DATE_GROUPING_BUCKET)
 | 
			
		||||
        .setTimeWindowSize(new TimeWindowSize().setMultiple(1).setUnit(CalendarInterval.DAY));
 | 
			
		||||
 | 
			
		||||
    GenericTable resultTable = _elasticSearchTimeseriesAspectService.getAggregatedStats(ENTITY_NAME, ASPECT_NAME,
 | 
			
		||||
        new AggregationSpec[]{latestStatAggregationSpec}, filter, new GroupingBucket[]{timestampBucket});
 | 
			
		||||
    // Validate column names
 | 
			
		||||
    assertEquals(resultTable.getColumnNames(), new StringArray(ES_FILED_TIMESTAMP, "latest_aComplexNestedRecord"));
 | 
			
		||||
    // Validate column types
 | 
			
		||||
    assertEquals(resultTable.getColumnTypes(), new StringArray("long", "record"));
 | 
			
		||||
    // Validate rows
 | 
			
		||||
    assertNotNull(resultTable.getRows());
 | 
			
		||||
    assertEquals(resultTable.getRows().size(), 1);
 | 
			
		||||
    assertEquals(resultTable.getRows().get(0).get(0), _startTime.toString());
 | 
			
		||||
    try {
 | 
			
		||||
      ComplexNestedRecord latestAComplexNestedRecord =
 | 
			
		||||
          OBJECT_MAPPER.readValue(resultTable.getRows().get(0).get(1), ComplexNestedRecord.class);
 | 
			
		||||
      assertEquals(latestAComplexNestedRecord,
 | 
			
		||||
          _testEntityProfiles.get(_startTime + 23 * TIME_INCREMENT).getAComplexNestedRecord());
 | 
			
		||||
    } catch (JsonProcessingException e) {
 | 
			
		||||
      fail("Unexpected exception thrown" + e);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  @Test(groups = {"getAggregatedStats"}, dependsOnGroups = {"upsert"})
 | 
			
		||||
  public void testGetAggregatedStatsLatestStrArrayDay1() {
 | 
			
		||||
    // Filter is only on the urn
 | 
			
		||||
 | 
			
		||||
@ -0,0 +1,51 @@
 | 
			
		||||
namespace com.linkedin.assertion
 | 
			
		||||
 | 
			
		||||
import com.linkedin.common.CustomProperties
 | 
			
		||||
import com.linkedin.common.ExternalReference
 | 
			
		||||
import com.linkedin.common.Urn
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Information about an assertion
 | 
			
		||||
 */
 | 
			
		||||
@Aspect = {
 | 
			
		||||
  "name": "assertionInfo"
 | 
			
		||||
}
 | 
			
		||||
record AssertionInfo includes CustomProperties, ExternalReference { 
 | 
			
		||||
    /**
 | 
			
		||||
    * One or more dataset schema fields that are targeted by this assertion
 | 
			
		||||
    */
 | 
			
		||||
    @Relationship = {
 | 
			
		||||
      "/*": {
 | 
			
		||||
        "name": "Asserts",
 | 
			
		||||
        "entityTypes": [ "schemaField" ]
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    datasetFields: optional array[Urn]
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
    * One or more datasets that are targeted by this assertion
 | 
			
		||||
    */
 | 
			
		||||
    @Relationship = {
 | 
			
		||||
      "/*": {
 | 
			
		||||
        "name": "Asserts",
 | 
			
		||||
        "entityTypes": [ "dataset" ]
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    datasets: optional array[Urn]
 | 
			
		||||
    
 | 
			
		||||
    /**
 | 
			
		||||
    * Type of assertion
 | 
			
		||||
    */
 | 
			
		||||
    assertionType: AssertionType
 | 
			
		||||
 | 
			
		||||
    /*
 | 
			
		||||
    * Logic for assertion such as implementation of custom nativeOperator
 | 
			
		||||
    */
 | 
			
		||||
    assertionLogic: optional string
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
    * Parameters required for the assertion. e.g. min_value, max_value, value, columns
 | 
			
		||||
    */
 | 
			
		||||
    assertionParameters: map[string, string] = { }
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
@ -0,0 +1,51 @@
 | 
			
		||||
namespace com.linkedin.assertion
 | 
			
		||||
 | 
			
		||||
import com.linkedin.timeseries.TimeseriesAspectBase
 | 
			
		||||
import com.linkedin.common.ExternalReference
 | 
			
		||||
import com.linkedin.common.Urn
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * The results of evaluating the assertion on the batch
 | 
			
		||||
 */
 | 
			
		||||
@Aspect = {
 | 
			
		||||
  "name": "assertionResult",
 | 
			
		||||
  "type": "timeseries",
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
record AssertionResult includes TimeseriesAspectBase {
 | 
			
		||||
  
 | 
			
		||||
  /*
 | 
			
		||||
  * Urn of assertion which is evaluated
 | 
			
		||||
  */
 | 
			
		||||
  @TimeseriesField = {}
 | 
			
		||||
  assertionUrn: Urn
 | 
			
		||||
 | 
			
		||||
  /*
 | 
			
		||||
  * Urn of entity being asserted
 | 
			
		||||
  */
 | 
			
		||||
  //example - dataset urn, if dataset is being asserted
 | 
			
		||||
  @TimeseriesField = {}
 | 
			
		||||
  asserteeUrn: Urn
 | 
			
		||||
  
 | 
			
		||||
  /**
 | 
			
		||||
  * Specification of the batch whose data quality is evaluated
 | 
			
		||||
  */
 | 
			
		||||
  batchSpec: optional BatchSpec
 | 
			
		||||
 | 
			
		||||
  /**
 | 
			
		||||
  * Results of assertion
 | 
			
		||||
  */
 | 
			
		||||
  @TimeseriesField = {}
 | 
			
		||||
  batchAssertionResult: BatchAssertionResult
 | 
			
		||||
 | 
			
		||||
  /**
 | 
			
		||||
  *  Native Run identifier of platform evaluating the assertions 
 | 
			
		||||
  */
 | 
			
		||||
  //Multiple assertions could occur in same evaluator run
 | 
			
		||||
  nativeEvaluatorRunId: optional string
 | 
			
		||||
 | 
			
		||||
  /**
 | 
			
		||||
   * Runtime parameters of evaluation
 | 
			
		||||
   */
 | 
			
		||||
  runtimeContext: map[string, string] = { }
 | 
			
		||||
}
 | 
			
		||||
@ -0,0 +1,63 @@
 | 
			
		||||
namespace com.linkedin.assertion
 | 
			
		||||
 | 
			
		||||
enum AssertionStdOperator {
 | 
			
		||||
        /**
 | 
			
		||||
        * Value being asserted is between min_value and max_value
 | 
			
		||||
        */
 | 
			
		||||
        BETWEEN
 | 
			
		||||
 | 
			
		||||
        /**
 | 
			
		||||
        * Value being asserted is less than max_value
 | 
			
		||||
        */
 | 
			
		||||
        LESS_THAN
 | 
			
		||||
 | 
			
		||||
        /**
 | 
			
		||||
        * Value being asserted is less than or equal to max_value
 | 
			
		||||
        */
 | 
			
		||||
        LESS_THAN_OR_EQUAL_TO
 | 
			
		||||
 | 
			
		||||
        /**
 | 
			
		||||
        * Value being asserted is greater than min_value
 | 
			
		||||
        */
 | 
			
		||||
        GREATER_THAN
 | 
			
		||||
 | 
			
		||||
        /**
 | 
			
		||||
        * Value being asserted is greater than or equal to min_value
 | 
			
		||||
        */
 | 
			
		||||
        GREATER_THAN_OR_EQUAL_TO
 | 
			
		||||
 | 
			
		||||
        /**
 | 
			
		||||
        * Value being asserted is equal to value
 | 
			
		||||
        */
 | 
			
		||||
        EQUAL_TO
 | 
			
		||||
 | 
			
		||||
        /**
 | 
			
		||||
        * Value being asserted is not null
 | 
			
		||||
        */
 | 
			
		||||
        NOT_NULL 
 | 
			
		||||
 | 
			
		||||
        /**
 | 
			
		||||
        * Value being asserted contains value
 | 
			
		||||
        */
 | 
			
		||||
        CONTAIN
 | 
			
		||||
 | 
			
		||||
        /**
 | 
			
		||||
        * Value being asserted ends with value
 | 
			
		||||
        */
 | 
			
		||||
        END_WITH
 | 
			
		||||
 | 
			
		||||
        /**
 | 
			
		||||
        * Value being asserted starts with value
 | 
			
		||||
        */
 | 
			
		||||
        START_WITH
 | 
			
		||||
 | 
			
		||||
        /**
 | 
			
		||||
        * Value being asserted is one of the array values
 | 
			
		||||
        */
 | 
			
		||||
        IN
 | 
			
		||||
 | 
			
		||||
        /**
 | 
			
		||||
        * Other
 | 
			
		||||
        */  
 | 
			
		||||
        _NATIVE_
 | 
			
		||||
    }
 | 
			
		||||
@ -0,0 +1,32 @@
 | 
			
		||||
namespace com.linkedin.assertion
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
* Type of Assertion
 | 
			
		||||
*/
 | 
			
		||||
record AssertionType {
 | 
			
		||||
    /**
 | 
			
		||||
    * Scope of Assertion
 | 
			
		||||
    */
 | 
			
		||||
    scope: enum AssertionScope {
 | 
			
		||||
        DATASET_COLUMN
 | 
			
		||||
        DATASET_ROWS
 | 
			
		||||
        DATASET_SCHEMA
 | 
			
		||||
        CROSS_DATASET
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
    * Assertion details for scope DATASET_COLUMN
 | 
			
		||||
    */
 | 
			
		||||
    datasetColumnAssertion: optional DatasetColumnAssertion
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
    * Assertion details for scope DATASET_ROWS
 | 
			
		||||
    */
 | 
			
		||||
    datasetRowsAssertion: optional DatasetRowsAssertion
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
    * Assertion details for scope DATASET_SCHEMA
 | 
			
		||||
    */
 | 
			
		||||
    datasetSchemaAssertion: optional DatasetSchemaAssertion
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
@ -0,0 +1,40 @@
 | 
			
		||||
namespace com.linkedin.assertion
 | 
			
		||||
 | 
			
		||||
record BatchAssertionResult {
 | 
			
		||||
 | 
			
		||||
  /**
 | 
			
		||||
  *  Indicator of whether the constraint is fully satisfied for the batch
 | 
			
		||||
  */
 | 
			
		||||
  success: boolean
 | 
			
		||||
 | 
			
		||||
  /**
 | 
			
		||||
   * Number of rows for evaluated batch
 | 
			
		||||
   */
 | 
			
		||||
  rowCount: optional long
 | 
			
		||||
 | 
			
		||||
  /**
 | 
			
		||||
   * Number of rows with missing value for evaluated batch
 | 
			
		||||
   */
 | 
			
		||||
  missingCount: optional long
 | 
			
		||||
 | 
			
		||||
  /**
 | 
			
		||||
   * Number of rows with unexpected value for evaluated batch
 | 
			
		||||
   */
 | 
			
		||||
  unexpectedCount: optional long
 | 
			
		||||
 | 
			
		||||
  /**
 | 
			
		||||
   * Observed aggregate value for evaluated batch
 | 
			
		||||
   */
 | 
			
		||||
  actualAggValue: optional float
 | 
			
		||||
 | 
			
		||||
  /**
 | 
			
		||||
   * Other results of evaluation
 | 
			
		||||
   */
 | 
			
		||||
  nativeResults: map[string, string] = { }
 | 
			
		||||
 | 
			
		||||
  /**
 | 
			
		||||
   * URL where the reference exist
 | 
			
		||||
   */
 | 
			
		||||
  //TODO - Change type to optional Url, not working 
 | 
			
		||||
  externalUrl: optional string
 | 
			
		||||
}
 | 
			
		||||
@ -0,0 +1,24 @@
 | 
			
		||||
namespace com.linkedin.assertion
 | 
			
		||||
 | 
			
		||||
import com.linkedin.common.CustomProperties
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * A batch on which certain operations, e.g. data quality evaluation, is done.
 | 
			
		||||
 */
 | 
			
		||||
record BatchSpec includes CustomProperties {
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
    * The native identifier as specified by the system operating on the batch.
 | 
			
		||||
    */
 | 
			
		||||
    nativeBatchId: optional string
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
    * A query that identifies a batch of data
 | 
			
		||||
    */
 | 
			
		||||
    query: optional string
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
    * Any limit to the number of rows in the batch, if applied
 | 
			
		||||
    */
 | 
			
		||||
    limit: optional int
 | 
			
		||||
}
 | 
			
		||||
@ -0,0 +1,83 @@
 | 
			
		||||
namespace com.linkedin.assertion
 | 
			
		||||
 | 
			
		||||
import com.linkedin.common.Urn
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
  * Assertion on column of a dataset
 | 
			
		||||
  */
 | 
			
		||||
record DatasetColumnAssertion {
 | 
			
		||||
    /**
 | 
			
		||||
    * Standardized assertion operator
 | 
			
		||||
    */
 | 
			
		||||
    stdOperator: AssertionStdOperator
 | 
			
		||||
 | 
			
		||||
     /**
 | 
			
		||||
    * Native assertion operator
 | 
			
		||||
    */
 | 
			
		||||
    nativeOperator: optional string // filled with the platform specific native operator string
 | 
			
		||||
    
 | 
			
		||||
    /**
 | 
			
		||||
    * Standardized aggrgation function applied on column values
 | 
			
		||||
    */
 | 
			
		||||
    stdAggFunc: enum DatasetColumnStdAggFunc {
 | 
			
		||||
        /**
 | 
			
		||||
        * Assertion is applied on individual column value
 | 
			
		||||
        */
 | 
			
		||||
        IDENTITY
 | 
			
		||||
 | 
			
		||||
        /**
 | 
			
		||||
        * Assertion is applied on column mean 
 | 
			
		||||
        */
 | 
			
		||||
        MEAN
 | 
			
		||||
 | 
			
		||||
        /**
 | 
			
		||||
        * Assertion is applied on column median
 | 
			
		||||
        */
 | 
			
		||||
        MEDIAN
 | 
			
		||||
 | 
			
		||||
        /**
 | 
			
		||||
        * Assertion is applied on number of distinct values in column
 | 
			
		||||
        */
 | 
			
		||||
        UNIQUE_COUNT
 | 
			
		||||
 | 
			
		||||
        /**
 | 
			
		||||
        * Assertion is applied on proportion of distinct values in column
 | 
			
		||||
        */
 | 
			
		||||
        UNIQUE_PROPOTION
 | 
			
		||||
 | 
			
		||||
        /**
 | 
			
		||||
        * Assertion is applied on number of null values in column
 | 
			
		||||
        */
 | 
			
		||||
        NULL_COUNT
 | 
			
		||||
 | 
			
		||||
        /**
 | 
			
		||||
        * Assertion is applied on proportion of null values in column
 | 
			
		||||
        */
 | 
			
		||||
        NULL_PROPORTION
 | 
			
		||||
 | 
			
		||||
        /**
 | 
			
		||||
        * Assertion is applied on column std deviation
 | 
			
		||||
        */
 | 
			
		||||
        STDDEV
 | 
			
		||||
 | 
			
		||||
        /**
 | 
			
		||||
        * Assertion is applied on column min
 | 
			
		||||
        */
 | 
			
		||||
        MIN
 | 
			
		||||
 | 
			
		||||
        /**
 | 
			
		||||
        * Assertion is applied on column std deviation
 | 
			
		||||
        */
 | 
			
		||||
        MAX
 | 
			
		||||
        
 | 
			
		||||
        /**
 | 
			
		||||
        * Other
 | 
			
		||||
        */
 | 
			
		||||
        _NATIVE_
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
    * Native aggrgation function applied on column values
 | 
			
		||||
    */
 | 
			
		||||
    nativeAggFunc: optional string 
 | 
			
		||||
}
 | 
			
		||||
@ -0,0 +1,40 @@
 | 
			
		||||
namespace com.linkedin.assertion
 | 
			
		||||
 | 
			
		||||
import com.linkedin.common.Urn
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
  * Assertion on rows of a dataset
 | 
			
		||||
  */
 | 
			
		||||
record DatasetRowsAssertion {
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
    * Standardized assertion operator
 | 
			
		||||
    */
 | 
			
		||||
    stdOperator: AssertionStdOperator
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
    * Native assertion operator
 | 
			
		||||
    */
 | 
			
		||||
    nativeOperator: optional string
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
    * Standardized aggrgation function applied on rows
 | 
			
		||||
    */
 | 
			
		||||
    stdAggFunc: enum DatasetRowsStdAggFunc {
 | 
			
		||||
 | 
			
		||||
        /**
 | 
			
		||||
        * Assertion is applied on number of rows
 | 
			
		||||
        */
 | 
			
		||||
        ROW_COUNT
 | 
			
		||||
 | 
			
		||||
        /**
 | 
			
		||||
        * Other
 | 
			
		||||
        */
 | 
			
		||||
        _NATIVE_
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
    * Native aggrgation function applied on rows
 | 
			
		||||
    */
 | 
			
		||||
    nativeAggFunc: optional string
 | 
			
		||||
}
 | 
			
		||||
@ -0,0 +1,45 @@
 | 
			
		||||
namespace com.linkedin.assertion
 | 
			
		||||
 | 
			
		||||
import com.linkedin.common.Urn
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
  * Assertion on schema of a dataset
 | 
			
		||||
  */
 | 
			
		||||
record DatasetSchemaAssertion {
 | 
			
		||||
    
 | 
			
		||||
    /**
 | 
			
		||||
    * Standardized assertion operator
 | 
			
		||||
    */
 | 
			
		||||
    stdOperator: AssertionStdOperator
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
    * Native assertion operator
 | 
			
		||||
    */
 | 
			
		||||
    nativeOperator: optional string
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
    * Standardized aggrgation function applied on rows
 | 
			
		||||
    */
 | 
			
		||||
    stdAggFunc: enum DatasetSchemaStdAggFunc {
 | 
			
		||||
 | 
			
		||||
        /**
 | 
			
		||||
        * Assertion is applied on all columns
 | 
			
		||||
        */
 | 
			
		||||
        COLUMNS
 | 
			
		||||
 | 
			
		||||
        /**
 | 
			
		||||
        * Assertion is applied on number of columns
 | 
			
		||||
        */
 | 
			
		||||
        COLUMN_COUNT
 | 
			
		||||
 | 
			
		||||
        /**
 | 
			
		||||
        * Other
 | 
			
		||||
        */
 | 
			
		||||
        _NATIVE_
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
    * Native aggrgation function applied on rows
 | 
			
		||||
    */
 | 
			
		||||
    nativeAggFunc: optional string
 | 
			
		||||
}
 | 
			
		||||
@ -0,0 +1,14 @@
 | 
			
		||||
namespace com.linkedin.metadata.aspect
 | 
			
		||||
 | 
			
		||||
import com.linkedin.metadata.key.AssertionKey
 | 
			
		||||
import com.linkedin.common.DataPlatformInstance
 | 
			
		||||
import com.linkedin.assertion.AssertionInfo
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * A union of all supported metadata aspects for a Assertion
 | 
			
		||||
 */
 | 
			
		||||
typeref AssertionAspect = union[
 | 
			
		||||
    AssertionKey,
 | 
			
		||||
    DataPlatformInstance,
 | 
			
		||||
    AssertionInfo
 | 
			
		||||
]
 | 
			
		||||
@ -0,0 +1,20 @@
 | 
			
		||||
namespace com.linkedin.metadata.key
 | 
			
		||||
 | 
			
		||||
import com.linkedin.common.Urn
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Key for a Assertion
 | 
			
		||||
 */
 | 
			
		||||
@Aspect = {
 | 
			
		||||
  "name": "assertionKey",
 | 
			
		||||
}
 | 
			
		||||
record AssertionKey {
 | 
			
		||||
  
 | 
			
		||||
  //The name of the assertion platform such as greatExpectations etc.
 | 
			
		||||
  //assertionPlatform: Urn
 | 
			
		||||
 | 
			
		||||
  /**
 | 
			
		||||
  * Unique id for the assertion.
 | 
			
		||||
  */
 | 
			
		||||
  assertionId: string
 | 
			
		||||
}
 | 
			
		||||
@ -0,0 +1,24 @@
 | 
			
		||||
namespace com.linkedin.metadata.snapshot
 | 
			
		||||
 | 
			
		||||
import com.linkedin.common.Urn
 | 
			
		||||
import com.linkedin.metadata.aspect.AssertionAspect
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * A metadata snapshot for a specific Assertion entity.
 | 
			
		||||
 */
 | 
			
		||||
@Entity = {
 | 
			
		||||
  "name": "assertion",
 | 
			
		||||
  "keyAspect": "assertionKey"
 | 
			
		||||
}
 | 
			
		||||
record AssertionSnapshot {
 | 
			
		||||
 | 
			
		||||
  /**
 | 
			
		||||
   * URN for the entity the metadata snapshot is associated with.
 | 
			
		||||
   */
 | 
			
		||||
  urn: Urn
 | 
			
		||||
 | 
			
		||||
  /**
 | 
			
		||||
   * The list of metadata aspects associated with the assertion.
 | 
			
		||||
   */
 | 
			
		||||
  aspects: array[AssertionAspect]
 | 
			
		||||
}
 | 
			
		||||
@ -4,6 +4,13 @@ namespace com.linkedin.timeseries
 | 
			
		||||
 * Defines how the data is partitioned
 | 
			
		||||
 */
 | 
			
		||||
record PartitionSpec {
 | 
			
		||||
 | 
			
		||||
  type: enum PartitionType {
 | 
			
		||||
          FULL_TABLE,
 | 
			
		||||
          QUERY,
 | 
			
		||||
          PARTITION            
 | 
			
		||||
      } = "PARTITION"
 | 
			
		||||
      
 | 
			
		||||
  /**
 | 
			
		||||
   * String representation of the partition
 | 
			
		||||
   */
 | 
			
		||||
 | 
			
		||||
@ -14,7 +14,10 @@ record TimeseriesAspectBase {
 | 
			
		||||
  /**
 | 
			
		||||
   * The optional partition specification.
 | 
			
		||||
   */
 | 
			
		||||
  partitionSpec: optional PartitionSpec
 | 
			
		||||
  partitionSpec: optional PartitionSpec = {
 | 
			
		||||
    "type":"FULL_TABLE",
 | 
			
		||||
    "partition":"FULL_TABLE_SNAPSHOT"
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  /**
 | 
			
		||||
   * The optional messageId, if provided serves as a custom user-defined unique identifier for an aspect value.
 | 
			
		||||
 | 
			
		||||
@ -12,6 +12,7 @@ entities:
 | 
			
		||||
      - schemaMetadata
 | 
			
		||||
      - status
 | 
			
		||||
      - container
 | 
			
		||||
      - assertionResult
 | 
			
		||||
  - name: dataHubPolicy
 | 
			
		||||
    doc: DataHub Policies represent access policies granted to users or groups on metadata operations like edit, view etc.
 | 
			
		||||
    keyAspect: dataHubPolicyKey
 | 
			
		||||
@ -84,3 +85,9 @@ entities:
 | 
			
		||||
      - dataHubExecutionRequestInput
 | 
			
		||||
      - dataHubExecutionRequestSignal
 | 
			
		||||
      - dataHubExecutionRequestResult
 | 
			
		||||
  - name: assertion
 | 
			
		||||
    doc: Assertion represents a data quality rule applied on one or more dataset.
 | 
			
		||||
    keyAspect: assertionKey
 | 
			
		||||
    aspects:
 | 
			
		||||
      - assertionInfo
 | 
			
		||||
      - dataPlatformInstance
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										144
									
								
								smoke-test/test_data_quality.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										144
									
								
								smoke-test/test_data_quality.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,144 @@
 | 
			
		||||
import json
 | 
			
		||||
import urllib
 | 
			
		||||
import time
 | 
			
		||||
import pytest
 | 
			
		||||
import requests
 | 
			
		||||
from datahub.cli.docker import check_local_docker_containers
 | 
			
		||||
from tests.utils import ingest_file_via_rest
 | 
			
		||||
 | 
			
		||||
bootstrap_sample_data = "test_resources/bootstrap_data_quality.json"
 | 
			
		||||
GMS_ENDPOINT = "http://localhost:8080"
 | 
			
		||||
 | 
			
		||||
restli_default_headers = {
 | 
			
		||||
    "X-RestLi-Protocol-Version": "2.0.0",
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@pytest.fixture(scope="session")
 | 
			
		||||
def wait_for_healthchecks():
 | 
			
		||||
    # Simply assert that everything is healthy, but don't wait.
 | 
			
		||||
    assert not check_local_docker_containers()
 | 
			
		||||
    yield
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@pytest.mark.dependency()
 | 
			
		||||
def test_healthchecks(wait_for_healthchecks):
 | 
			
		||||
    # Call to wait_for_healthchecks fixture will do the actual functionality.
 | 
			
		||||
    pass
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@pytest.mark.dependency(depends=["test_healthchecks"])
 | 
			
		||||
def test_run_ingestion(wait_for_healthchecks):
 | 
			
		||||
    ingest_file_via_rest(bootstrap_sample_data)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@pytest.mark.dependency(depends=["test_healthchecks", "test_run_ingestion"])
 | 
			
		||||
def test_gms_get_latest_assertions_results_by_partition():
 | 
			
		||||
    urn = "urn:li:dataset:(urn:li:dataPlatform:postgres,fooTable,PROD)"
 | 
			
		||||
 | 
			
		||||
    # sleep for elasticsearch indices to be updated
 | 
			
		||||
    time.sleep(5)
 | 
			
		||||
 | 
			
		||||
    # Query
 | 
			
		||||
    # Given the dataset
 | 
			
		||||
    # show me latest assertion results grouped-by date, partition, assertionId
 | 
			
		||||
    query = json.dumps(
 | 
			
		||||
        {
 | 
			
		||||
            "entityName": "dataset",
 | 
			
		||||
            "aspectName": "assertionResult",
 | 
			
		||||
            "filter": {
 | 
			
		||||
                "or": [
 | 
			
		||||
                    {
 | 
			
		||||
                        "and": [
 | 
			
		||||
                            {
 | 
			
		||||
                                "field": "urn",
 | 
			
		||||
                                "value": urn,
 | 
			
		||||
                                "condition": "EQUAL",
 | 
			
		||||
                            }
 | 
			
		||||
                        ]
 | 
			
		||||
                    }
 | 
			
		||||
                ]
 | 
			
		||||
            },
 | 
			
		||||
            "metrics": [
 | 
			
		||||
                {"fieldPath": "batchAssertionResult", "aggregationType": "LATEST"}
 | 
			
		||||
            ],
 | 
			
		||||
            "buckets": [
 | 
			
		||||
                {"key": "asserteeUrn", "type": "STRING_GROUPING_BUCKET"},
 | 
			
		||||
                {"key": "partitionSpec.partition", "type": "STRING_GROUPING_BUCKET"},
 | 
			
		||||
                {
 | 
			
		||||
                    "key": "timestampMillis",
 | 
			
		||||
                    "type": "DATE_GROUPING_BUCKET",
 | 
			
		||||
                    "timeWindowSize": {"multiple": 1, "unit": "DAY"},
 | 
			
		||||
                },
 | 
			
		||||
                {"key": "assertionUrn", "type": "STRING_GROUPING_BUCKET"},
 | 
			
		||||
            ],
 | 
			
		||||
        }
 | 
			
		||||
    )
 | 
			
		||||
    response = requests.post(
 | 
			
		||||
        f"{GMS_ENDPOINT}/analytics?action=getTimeseriesStats",
 | 
			
		||||
        data=query,
 | 
			
		||||
        headers=restli_default_headers,
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    response.raise_for_status()
 | 
			
		||||
    data = response.json()
 | 
			
		||||
 | 
			
		||||
    assert data["value"]
 | 
			
		||||
    assert data["value"]["table"]
 | 
			
		||||
    assert sorted(data["value"]["table"]["columnNames"]) == [
 | 
			
		||||
        "asserteeUrn",
 | 
			
		||||
        "assertionUrn",
 | 
			
		||||
        "latest_batchAssertionResult",
 | 
			
		||||
        "partitionSpec.partition",
 | 
			
		||||
        "timestampMillis",
 | 
			
		||||
    ]
 | 
			
		||||
    assert len(data["value"]["table"]["rows"]) == 6
 | 
			
		||||
    assert (
 | 
			
		||||
        data["value"]["table"]["rows"][0][
 | 
			
		||||
            data["value"]["table"]["columnNames"].index("asserteeUrn")
 | 
			
		||||
        ]
 | 
			
		||||
        == urn
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@pytest.mark.dependency(depends=["test_healthchecks", "test_run_ingestion"])
 | 
			
		||||
def test_gms_get_assertions_on_dataset():
 | 
			
		||||
    """lists all assertion urns including those which may not have executed"""
 | 
			
		||||
    urn = "urn:li:dataset:(urn:li:dataPlatform:postgres,fooTable,PROD)"
 | 
			
		||||
    response = requests.get(
 | 
			
		||||
        f"{GMS_ENDPOINT}/relationships?direction=INCOMING&urn={urllib.parse.quote(urn)}&types=Asserts"
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    response.raise_for_status()
 | 
			
		||||
    data = response.json()
 | 
			
		||||
    assert len(data["relationships"]) == 1
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@pytest.mark.dependency(depends=["test_healthchecks", "test_run_ingestion"])
 | 
			
		||||
def test_gms_get_assertions_on_dataset_field():
 | 
			
		||||
    """lists all assertion urns including those which may not have executed"""
 | 
			
		||||
    urn = "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,fooTable,PROD), col1)"
 | 
			
		||||
    response = requests.get(
 | 
			
		||||
        f"{GMS_ENDPOINT}/relationships?direction=INCOMING&urn={urllib.parse.quote(urn)}&types=Asserts"
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    response.raise_for_status()
 | 
			
		||||
    data = response.json()
 | 
			
		||||
    assert len(data["relationships"]) == 1
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@pytest.mark.dependency(depends=["test_healthchecks", "test_run_ingestion"])
 | 
			
		||||
def test_gms_get_assertion_info():
 | 
			
		||||
    assertion_urn = "urn:li:assertion:2d3b06a6e77e1f24adc9860a05ea089b"
 | 
			
		||||
    response = requests.get(
 | 
			
		||||
        f"{GMS_ENDPOINT}/aspects/{urllib.parse.quote(assertion_urn)}\
 | 
			
		||||
            ?aspect=assertionInfo&version=0",
 | 
			
		||||
        headers=restli_default_headers,
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    response.raise_for_status()
 | 
			
		||||
    data = response.json()
 | 
			
		||||
 | 
			
		||||
    assert data["aspect"]
 | 
			
		||||
    assert data["aspect"]["com.linkedin.assertion.AssertionInfo"]
 | 
			
		||||
    assert data["aspect"]["com.linkedin.assertion.AssertionInfo"]["assertionType"]
 | 
			
		||||
							
								
								
									
										72
									
								
								smoke-test/test_resources/bootstrap_data_quality.json
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										72
									
								
								smoke-test/test_resources/bootstrap_data_quality.json
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,72 @@
 | 
			
		||||
[
 | 
			
		||||
  {
 | 
			
		||||
    "entityType": "assertion",
 | 
			
		||||
    "entityUrn": "urn:li:assertion:2d3b06a6e77e1f24adc9860a05ea089b",
 | 
			
		||||
    "changeType": "UPSERT",
 | 
			
		||||
    "aspectName": "assertionInfo",
 | 
			
		||||
    "aspect": {
 | 
			
		||||
      "value": "{\"customProperties\": {\"suite_name\": \"demo_suite\"}, \"datasetFields\": [\"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,fooTable,PROD), col1)\"], \"datasets\": [\"urn:li:dataset:(urn:li:dataPlatform:postgres,fooTable,PROD)\"], \"assertionType\": {\"scope\": \"DATASET_COLUMN\", \"datasetColumnAssertion\": {\"stdOperator\": \"LESS_THAN\", \"nativeOperator\": \"column_value_is_less_than\", \"stdAggFunc\": \"IDENTITY\"}}, \"assertionParameters\": {\"max_value\": \"99\"}}",
 | 
			
		||||
      "contentType": "application/json"
 | 
			
		||||
    }
 | 
			
		||||
  },
 | 
			
		||||
  {
 | 
			
		||||
    "entityType": "dataset",
 | 
			
		||||
    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,fooTable,PROD)",
 | 
			
		||||
    "changeType": "UPSERT",
 | 
			
		||||
    "aspectName": "assertionResult",
 | 
			
		||||
    "aspect": {
 | 
			
		||||
      "value": "{\"timestampMillis\": 1643794280350, \"partitionSpec\": {\"type\": \"PARTITION\", \"partition\": \"[{'country': 'IN'}]\"}, \"messageId\": \"1643794280350\", \"assertionUrn\": \"urn:li:assertion:2d3b06a6e77e1f24adc9860a05ea089b\", \"asserteeUrn\": \"urn:li:dataset:(urn:li:dataPlatform:postgres,fooTable,PROD)\", \"batchAssertionResult\": {\"success\": true, \"actualAggValue\": 90, \"nativeResults\": {}, \"externalUrl\": \"http://example.com/uuid1\"}, \"nativeEvaluatorRunId\": \"uuid1\", \"runtimeContext\": {}}",
 | 
			
		||||
      "contentType": "application/json"
 | 
			
		||||
    }
 | 
			
		||||
  },
 | 
			
		||||
  {
 | 
			
		||||
    "entityType": "dataset",
 | 
			
		||||
    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,fooTable,PROD)",
 | 
			
		||||
    "changeType": "UPSERT",
 | 
			
		||||
    "aspectName": "assertionResult",
 | 
			
		||||
    "aspect": {
 | 
			
		||||
      "value": "{\"timestampMillis\": 1643794280352, \"partitionSpec\": {\"type\": \"PARTITION\", \"partition\": \"[{'country': 'US'}]\"}, \"messageId\": \"1643794280352\", \"assertionUrn\": \"urn:li:assertion:2d3b06a6e77e1f24adc9860a05ea089b\", \"asserteeUrn\": \"urn:li:dataset:(urn:li:dataPlatform:postgres,fooTable,PROD)\", \"batchAssertionResult\": {\"success\": false, \"actualAggValue\": 101, \"nativeResults\": {}, \"externalUrl\": \"http://example.com/uuid1\"}, \"nativeEvaluatorRunId\": \"uuid1\", \"runtimeContext\": {}}",
 | 
			
		||||
      "contentType": "application/json"
 | 
			
		||||
    }
 | 
			
		||||
  },
 | 
			
		||||
  {
 | 
			
		||||
    "entityType": "dataset",
 | 
			
		||||
    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,fooTable,PROD)",
 | 
			
		||||
    "changeType": "UPSERT",
 | 
			
		||||
    "aspectName": "assertionResult",
 | 
			
		||||
    "aspect": {
 | 
			
		||||
      "value": "{\"timestampMillis\": 1643794280354, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"messageId\": \"1643794280354\", \"assertionUrn\": \"urn:li:assertion:2d3b06a6e77e1f24adc9860a05ea089b\", \"asserteeUrn\": \"urn:li:dataset:(urn:li:dataPlatform:postgres,fooTable,PROD)\", \"batchAssertionResult\": {\"success\": true, \"actualAggValue\": 93, \"nativeResults\": {}, \"externalUrl\": \"http://example.com/uuid1\"}, \"nativeEvaluatorRunId\": \"uuid1\", \"runtimeContext\": {}}",
 | 
			
		||||
      "contentType": "application/json"
 | 
			
		||||
    }
 | 
			
		||||
  },
 | 
			
		||||
  {
 | 
			
		||||
    "entityType": "dataset",
 | 
			
		||||
    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,fooTable,PROD)",
 | 
			
		||||
    "changeType": "UPSERT",
 | 
			
		||||
    "aspectName": "assertionResult",
 | 
			
		||||
    "aspect": {
 | 
			
		||||
      "value": "{\"timestampMillis\": 1643880726872, \"partitionSpec\": {\"type\": \"PARTITION\", \"partition\": \"[{'country': 'IN'}]\"}, \"messageId\": \"1643880726872\", \"assertionUrn\": \"urn:li:assertion:2d3b06a6e77e1f24adc9860a05ea089b\", \"asserteeUrn\": \"urn:li:dataset:(urn:li:dataPlatform:postgres,fooTable,PROD)\", \"batchAssertionResult\": {\"success\": true, \"actualAggValue\": 90, \"nativeResults\": {}, \"externalUrl\": \"http://example.com/uuid1\"}, \"nativeEvaluatorRunId\": \"uuid1\", \"runtimeContext\": {}}",
 | 
			
		||||
      "contentType": "application/json"
 | 
			
		||||
    }
 | 
			
		||||
  },
 | 
			
		||||
  {
 | 
			
		||||
    "entityType": "dataset",
 | 
			
		||||
    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,fooTable,PROD)",
 | 
			
		||||
    "changeType": "UPSERT",
 | 
			
		||||
    "aspectName": "assertionResult",
 | 
			
		||||
    "aspect": {
 | 
			
		||||
      "value": "{\"timestampMillis\": 1643880726874, \"partitionSpec\": {\"type\": \"PARTITION\", \"partition\": \"[{'country': 'US'}]\"}, \"messageId\": \"1643880726874\", \"assertionUrn\": \"urn:li:assertion:2d3b06a6e77e1f24adc9860a05ea089b\", \"asserteeUrn\": \"urn:li:dataset:(urn:li:dataPlatform:postgres,fooTable,PROD)\", \"batchAssertionResult\": {\"success\": false, \"actualAggValue\": 101, \"nativeResults\": {}, \"externalUrl\": \"http://example.com/uuid1\"}, \"nativeEvaluatorRunId\": \"uuid1\", \"runtimeContext\": {}}",
 | 
			
		||||
      "contentType": "application/json"
 | 
			
		||||
    }
 | 
			
		||||
  },
 | 
			
		||||
  {
 | 
			
		||||
    "entityType": "dataset",
 | 
			
		||||
    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,fooTable,PROD)",
 | 
			
		||||
    "changeType": "UPSERT",
 | 
			
		||||
    "aspectName": "assertionResult",
 | 
			
		||||
    "aspect": {
 | 
			
		||||
      "value": "{\"timestampMillis\": 1643880726875, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"messageId\": \"1643880726875\", \"assertionUrn\": \"urn:li:assertion:2d3b06a6e77e1f24adc9860a05ea089b\", \"asserteeUrn\": \"urn:li:dataset:(urn:li:dataPlatform:postgres,fooTable,PROD)\", \"batchAssertionResult\": {\"success\": true, \"actualAggValue\": 93, \"nativeResults\": {}, \"externalUrl\": \"http://example.com/uuid1\"}, \"nativeEvaluatorRunId\": \"uuid1\", \"runtimeContext\": {}}",
 | 
			
		||||
      "contentType": "application/json"
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
]
 | 
			
		||||
@ -23,4 +23,15 @@ record TestEntityProfile includes TimeseriesAspectBase {
 | 
			
		||||
    "key":"key"
 | 
			
		||||
  }
 | 
			
		||||
  componentProfiles: array[TestEntityComponentProfile]
 | 
			
		||||
 | 
			
		||||
  @TimeseriesField = {}
 | 
			
		||||
  aComplexNestedRecord: record ComplexNestedRecord {
 | 
			
		||||
      type: enum BatchType {
 | 
			
		||||
            QUERY_BATCH,
 | 
			
		||||
            PARTITION_BATCH,
 | 
			
		||||
            FULL_TABLE,
 | 
			
		||||
            _NATIVE_
 | 
			
		||||
      }
 | 
			
		||||
      partitions: array[map[string, string]]
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user