From e79b4e8c2bc90e9f86f4f4c181da1363a7d96802 Mon Sep 17 00:00:00 2001 From: Mayuri Nehate <33225191+mayurinehate@users.noreply.github.com> Date: Mon, 14 Nov 2022 06:59:42 +0530 Subject: [PATCH] feat(ingest): s3 - add status aspect for detected s3 datasets (#6402) --- .../src/datahub/ingestion/source/s3/source.py | 3 +- .../golden_mces_file_without_extension.json | 5 +++ .../golden_mces_folder_no_partition.json | 10 ++++++ ...lden_mces_folder_no_partition_exclude.json | 5 +++ ...den_mces_folder_no_partition_filename.json | 10 ++++++ .../golden_mces_folder_no_partition_glob.json | 5 +++ .../golden_mces_folder_partition_basic.json | 5 +++ .../golden_mces_folder_partition_keyval.json | 5 +++ .../local/golden_mces_multiple_files.json | 35 +++++++++++++++++++ .../golden_mces_multiple_spec_for_files.json | 10 ++++++ .../local/golden_mces_single_file.json | 5 +++ .../golden_mces_file_without_extension.json | 5 +++ .../s3/golden_mces_folder_no_partition.json | 10 ++++++ ...lden_mces_folder_no_partition_exclude.json | 5 +++ ...den_mces_folder_no_partition_filename.json | 10 ++++++ .../golden_mces_folder_no_partition_glob.json | 5 +++ .../golden_mces_folder_partition_basic.json | 5 +++ .../golden_mces_folder_partition_keyval.json | 5 +++ .../s3/golden_mces_multiple_files.json | 35 +++++++++++++++++++ .../golden_mces_multiple_spec_for_files.json | 10 ++++++ .../s3/golden_mces_single_file.json | 5 +++ 21 files changed, 192 insertions(+), 1 deletion(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/s3/source.py b/metadata-ingestion/src/datahub/ingestion/source/s3/source.py index 7e3f633fac..f470f9999e 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/s3/source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/s3/source.py @@ -63,6 +63,7 @@ from datahub.ingestion.source.s3.data_lake_utils import ContainerWUCreator from datahub.ingestion.source.s3.profiling import _SingleTableProfiler from datahub.ingestion.source.s3.report import DataLakeSourceReport from datahub.ingestion.source.schema_inference import avro, csv_tsv, json, parquet +from datahub.metadata.com.linkedin.pegasus2avro.common import Status from datahub.metadata.com.linkedin.pegasus2avro.metadata.snapshot import DatasetSnapshot from datahub.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent from datahub.metadata.com.linkedin.pegasus2avro.schema import ( @@ -536,7 +537,7 @@ class S3Source(Source): dataset_snapshot = DatasetSnapshot( urn=dataset_urn, - aspects=[], + aspects=[Status(removed=False)], ) customProperties: Optional[Dict[str, str]] = None diff --git a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_file_without_extension.json b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_file_without_extension.json index ddb4f948f2..bc116e2aa5 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_file_without_extension.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_file_without_extension.json @@ -4,6 +4,11 @@ "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/no_extension/small,DEV)", "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { diff --git a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_no_partition.json b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_no_partition.json index 0410e67329..4d332f6f67 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_no_partition.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_no_partition.json @@ -4,6 +4,11 @@ "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { @@ -685,6 +690,11 @@ "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv,UAT)", "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { diff --git a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_no_partition_exclude.json b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_no_partition_exclude.json index 5bbb2c3e9f..8d6b28c17d 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_no_partition_exclude.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_no_partition_exclude.json @@ -4,6 +4,11 @@ "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { diff --git a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_no_partition_filename.json b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_no_partition_filename.json index 08525c1e8a..4503e00a86 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_no_partition_filename.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_no_partition_filename.json @@ -4,6 +4,11 @@ "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { @@ -685,6 +690,11 @@ "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv,UAT)", "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { diff --git a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_no_partition_glob.json b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_no_partition_glob.json index 32de240662..54479cb76c 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_no_partition_glob.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_no_partition_glob.json @@ -4,6 +4,11 @@ "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { diff --git a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_partition_basic.json b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_partition_basic.json index be006dfc17..d129b6496c 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_partition_basic.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_partition_basic.json @@ -4,6 +4,11 @@ "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { diff --git a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_partition_keyval.json b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_partition_keyval.json index a750b4f1f7..4ff35cc706 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_partition_keyval.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_partition_keyval.json @@ -4,6 +4,11 @@ "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { diff --git a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_multiple_files.json b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_multiple_files.json index 54d628e135..2009879164 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_multiple_files.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_multiple_files.json @@ -4,6 +4,11 @@ "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/NPS.7.1.package_data_NPS.6.1_ARCN_Lakes_ChemistryData_v1_csv.csv,DEV)", "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { @@ -997,6 +1002,11 @@ "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro,DEV)", "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { @@ -1132,6 +1142,11 @@ "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/chord_progressions_csv.csv,DEV)", "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { @@ -1267,6 +1282,11 @@ "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/countries_json.json,DEV)", "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { @@ -1378,6 +1398,11 @@ "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet.parquet,DEV)", "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { @@ -1513,6 +1538,11 @@ "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/small.csv,DEV)", "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { @@ -1648,6 +1678,11 @@ "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/wa_fn_usec_hr_employee_attrition_csv.csv,DEV)", "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { diff --git a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_multiple_spec_for_files.json b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_multiple_spec_for_files.json index 0c5d617c1d..6eac5e7e8a 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_multiple_spec_for_files.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_multiple_spec_for_files.json @@ -4,6 +4,11 @@ "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro,PROD)", "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { @@ -685,6 +690,11 @@ "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/chord_progressions_csv.csv,PROD)", "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { diff --git a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_single_file.json b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_single_file.json index f9efd10aff..baca587233 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_single_file.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_single_file.json @@ -4,6 +4,11 @@ "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro,PROD)", "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { diff --git a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_file_without_extension.json b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_file_without_extension.json index fbb52b73b2..1e6d780809 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_file_without_extension.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_file_without_extension.json @@ -4,6 +4,11 @@ "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/no_extension/small,DEV)", "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { diff --git a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition.json b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition.json index 1ffddd0b96..d444b8c014 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition.json @@ -4,6 +4,11 @@ "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_csv,UAT)", "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": {}, @@ -388,6 +393,11 @@ "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": {}, diff --git a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition_exclude.json b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition_exclude.json index c1ab518c30..a9a47abf4a 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition_exclude.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition_exclude.json @@ -4,6 +4,11 @@ "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": {}, diff --git a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition_filename.json b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition_filename.json index 8b0bf13722..a6a330c577 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition_filename.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition_filename.json @@ -4,6 +4,11 @@ "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_csv,UAT)", "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": {}, @@ -388,6 +393,11 @@ "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": {}, diff --git a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition_glob.json b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition_glob.json index 109012ea6a..44fea59224 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition_glob.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition_glob.json @@ -4,6 +4,11 @@ "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": {}, diff --git a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_partition_basic.json b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_partition_basic.json index 8fec03f8e4..5c63bf5f52 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_partition_basic.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_partition_basic.json @@ -4,6 +4,11 @@ "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": {}, diff --git a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_partition_keyval.json b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_partition_keyval.json index 6bc8bac01b..308fde167b 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_partition_keyval.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_partition_keyval.json @@ -4,6 +4,11 @@ "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": {}, diff --git a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_multiple_files.json b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_multiple_files.json index 3a68e0f853..f32449ebe2 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_multiple_files.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_multiple_files.json @@ -4,6 +4,11 @@ "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/NPS.7.1.package_data_NPS.6.1_ARCN_Lakes_ChemistryData_v1_csv.csv,DEV)", "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { @@ -704,6 +709,11 @@ "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro,DEV)", "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { @@ -826,6 +836,11 @@ "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_csv.csv,DEV)", "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { @@ -948,6 +963,11 @@ "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/countries_json.json,DEV)", "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { @@ -1046,6 +1066,11 @@ "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet.parquet,DEV)", "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { @@ -1168,6 +1193,11 @@ "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/small.csv,DEV)", "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { @@ -1290,6 +1320,11 @@ "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/wa_fn_usec_hr_employee_attrition_csv.csv,DEV)", "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { diff --git a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_multiple_spec_for_files.json b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_multiple_spec_for_files.json index 18c4596001..19f1a7837a 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_multiple_spec_for_files.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_multiple_spec_for_files.json @@ -4,6 +4,11 @@ "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro,PROD)", "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { @@ -404,6 +409,11 @@ "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_csv.csv,PROD)", "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { diff --git a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_single_file.json b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_single_file.json index 255a1ae95e..7e7a875905 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_single_file.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_single_file.json @@ -4,6 +4,11 @@ "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro,PROD)", "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": {