From bb33f015ca1f42eb83efa4a3f5d5889f8a9d30b8 Mon Sep 17 00:00:00 2001 From: Aseem Bansal Date: Wed, 2 Aug 2023 09:54:33 +0530 Subject: [PATCH] fix(ingest/s3): wrong sorting in case of multi-partition key (#8536) --- .../src/datahub/ingestion/source/s3/source.py | 6 +++--- metadata-ingestion/tests/unit/s3/test_s3_source.py | 6 ++++++ 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/s3/source.py b/metadata-ingestion/src/datahub/ingestion/source/s3/source.py index db83c29370..61c6f070d9 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/s3/source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/s3/source.py @@ -178,9 +178,9 @@ def partitioned_folder_comparator(folder1: str, folder2: str) -> int: try: # Stripping = from the folder names as it most probably partition name part like year=2021 if "=" in folder1 and "=" in folder2: - if folder1.split("=", 1)[0] == folder2.split("=", 1)[0]: - folder1 = folder1.split("=", 1)[1] - folder2 = folder2.split("=", 1)[1] + if folder1.rsplit("=", 1)[0] == folder2.rsplit("=", 1)[0]: + folder1 = folder1.rsplit("=", 1)[-1] + folder2 = folder2.rsplit("=", 1)[-1] num_folder1 = int(folder1) num_folder2 = int(folder2) diff --git a/metadata-ingestion/tests/unit/s3/test_s3_source.py b/metadata-ingestion/tests/unit/s3/test_s3_source.py index 61bde6ca4f..eeee037965 100644 --- a/metadata-ingestion/tests/unit/s3/test_s3_source.py +++ b/metadata-ingestion/tests/unit/s3/test_s3_source.py @@ -8,6 +8,12 @@ def test_partition_comparator_numeric_folder_name(): assert partitioned_folder_comparator(folder1, folder2) == -1 +def test_partition_multi_level_key(): + folder1 = "backup/metadata_aspect_v2/year=2023/month=01" + folder2 = "backup/metadata_aspect_v2/year=2023/month=2" + assert partitioned_folder_comparator(folder1, folder2) == -1 + + def test_partition_comparator_numeric_folder_name2(): folder1 = "12" folder2 = "3"