datahub/metadata-ingestion/tests/test_helpers/mce_helpers.py

import json
import os
from typing import Union

import deepdiff


def load_json_file(filename: Union[str, os.PathLike]) -> object:
    with open(str(filename)) as f:
        a = json.load(f)
    return a


def assert_mces_equal(output: object, golden: object) -> None:
    # This method assumes we're given a list of MCE json objects.

    ignore_paths = {
        # Ignore timestamps from the ETL pipeline. A couple examples:
        # root[0]['proposedSnapshot']['com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot']['aspects'][0]['com.linkedin.pegasus2avro.common.Ownership']['lastModified']['time']
        # root[69]['proposedSnapshot']['com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot']['aspects'][0]['com.linkedin.pegasus2avro.schema.SchemaMetadata']['lastModified']['time']"
        # root[0]['proposedSnapshot']['com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot']['aspects'][1]['com.linkedin.pegasus2avro.dataset.UpstreamLineage']['upstreams'][0]['auditStamp']['time']
        r"root\[\d+\]\['proposedSnapshot'\].+\['aspects'\].+\['created'\]\['time'\]",
        r"root\[\d+\]\['proposedSnapshot'\].+\['aspects'\].+\['lastModified'\]\['time'\]",
        r"root\[\d+\]\['proposedSnapshot'\].+\['aspects'\].+\['createStamp'\]\['time'\]",
        r"root\[\d+\]\['proposedSnapshot'\].+\['aspects'\].+\['auditStamp'\]\['time'\]",
    }
    diff = deepdiff.DeepDiff(golden, output, exclude_regex_paths=ignore_paths)
    assert not diff, str(diff)
Start checking files for mysql test 2021-02-11 12:53:44 -08:00			`import json`
fix(ingest): check mypy types for test helpers (#2561) 2021-05-17 11:42:12 -07:00			`import os`
			`from typing import Union`
Add isort to CI 2021-02-11 23:14:20 -08:00
Start checking files for mysql test 2021-02-11 12:53:44 -08:00			`import deepdiff`


fix(ingest): check mypy types for test helpers (#2561) 2021-05-17 11:42:12 -07:00			`def load_json_file(filename: Union[str, os.PathLike]) -> object:`
Start checking files for mysql test 2021-02-11 12:53:44 -08:00			`with open(str(filename)) as f:`
			`a = json.load(f)`
			`return a`


fix(ingest): check mypy types for test helpers (#2561) 2021-05-17 11:42:12 -07:00			`def assert_mces_equal(output: object, golden: object) -> None:`
Start checking files for mysql test 2021-02-11 12:53:44 -08:00			`# This method assumes we're given a list of MCE json objects.`

			`ignore_paths = {`
			`# Ignore timestamps from the ETL pipeline. A couple examples:`
			`# root[0]['proposedSnapshot']['com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot']['aspects'][0]['com.linkedin.pegasus2avro.common.Ownership']['lastModified']['time']`
			`# root[69]['proposedSnapshot']['com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot']['aspects'][0]['com.linkedin.pegasus2avro.schema.SchemaMetadata']['lastModified']['time']"`
refactor(ingest): use common get_sys_time method (#2782) 2021-06-28 20:40:10 -07:00			`# root[0]['proposedSnapshot']['com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot']['aspects'][1]['com.linkedin.pegasus2avro.dataset.UpstreamLineage']['upstreams'][0]['auditStamp']['time']`
Start checking files for mysql test 2021-02-11 12:53:44 -08:00			`r"root\[\d+\]\['proposedSnapshot'\].+\['aspects'\].+\['created'\]\['time'\]",`
			`r"root\[\d+\]\['proposedSnapshot'\].+\['aspects'\].+\['lastModified'\]\['time'\]",`
			`r"root\[\d+\]\['proposedSnapshot'\].+\['aspects'\].+\['createStamp'\]\['time'\]",`
refactor(ingest): use common get_sys_time method (#2782) 2021-06-28 20:40:10 -07:00			`r"root\[\d+\]\['proposedSnapshot'\].+\['aspects'\].+\['auditStamp'\]\['time'\]",`
Start checking files for mysql test 2021-02-11 12:53:44 -08:00			`}`
			`diff = deepdiff.DeepDiff(golden, output, exclude_regex_paths=ignore_paths)`
feat: usage stats (part 2) (#2762) Co-authored-by: Gabe Lyons <itsgabelyons@gmail.com> 2021-06-24 19:44:59 -07:00			`assert not diff, str(diff)`