From b3a25d6fbd9414c7c65f6aa6152406d46f59632d Mon Sep 17 00:00:00 2001 From: Aseem Bansal Date: Fri, 20 Jun 2025 18:45:41 +0530 Subject: [PATCH] fix(ingest/bigquery): use email as user urn (#13831) --- .../ingestion/source/bigquery_v2/common.py | 2 +- .../bigquery_lineage_usage_golden.json | 23 ++++++++++--------- .../unit/bigquery/test_bigquery_usage.py | 2 +- 3 files changed, 14 insertions(+), 13 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/common.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/common.py index 83484e3a6a..f1c5641a6b 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/common.py @@ -63,7 +63,7 @@ class BigQueryIdentifierBuilder: ) def gen_user_urn(self, user_email: str) -> str: - return make_user_urn(user_email.split("@")[0]) + return make_user_urn(user_email) def make_data_platform_urn(self) -> str: return make_data_platform_urn(self.platform) diff --git a/metadata-ingestion/tests/integration/bigquery_v2/bigquery_lineage_usage_golden.json b/metadata-ingestion/tests/integration/bigquery_v2/bigquery_lineage_usage_golden.json index caafd70b7c..cddc5628f2 100644 --- a/metadata-ingestion/tests/integration/bigquery_v2/bigquery_lineage_usage_golden.json +++ b/metadata-ingestion/tests/integration/bigquery_v2/bigquery_lineage_usage_golden.json @@ -14,7 +14,7 @@ }, "created": { "time": 1643871600000, - "actor": "urn:li:corpuser:foo" + "actor": "urn:li:corpuser:foo@xyz.com" }, "dataset": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.table-1,PROD)", "type": "TRANSFORMED", @@ -25,7 +25,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "bigquery-2022_02_03-07_00_00-6mhnuz", + "runId": "bigquery-2022_02_03-07_00_00-pj1cgp", "lastRunId": "no-run-id-provided" } }, @@ -117,7 +117,7 @@ "uniqueUserCount": 1, "userCounts": [ { - "user": "urn:li:corpuser:foo", + "user": "urn:li:corpuser:foo@xyz.com", "count": 2 } ] @@ -125,7 +125,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "bigquery-2022_02_03-07_00_00-6mhnuz", + "runId": "bigquery-2022_02_03-07_00_00-pj1cgp", "lastRunId": "no-run-id-provided" } }, @@ -153,8 +153,9 @@ ], "userCounts": [ { - "user": "urn:li:corpuser:foo", - "count": 4 + "user": "urn:li:corpuser:foo@xyz.com", + "count": 4, + "userEmail": "foo@xyz.com" } ], "fieldCounts": [] @@ -162,7 +163,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "bigquery-2022_02_03-07_00_00-6mhnuz", + "runId": "bigquery-2022_02_03-07_00_00-pj1cgp", "lastRunId": "no-run-id-provided" } }, @@ -214,7 +215,7 @@ "partition": "FULL_TABLE_SNAPSHOT", "type": "FULL_TABLE" }, - "actor": "urn:li:corpuser:foo", + "actor": "urn:li:corpuser:foo@xyz.com", "operationType": "CREATE", "sourceType": "DATA_PLATFORM", "lastUpdatedTimestamp": 1643871600000, @@ -225,7 +226,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "bigquery-2022_02_03-07_00_00-6mhnuz", + "runId": "bigquery-2022_02_03-07_00_00-pj1cgp", "lastRunId": "no-run-id-provided" } }, @@ -314,7 +315,7 @@ "uniqueUserCount": 1, "userCounts": [ { - "user": "urn:li:corpuser:foo", + "user": "urn:li:corpuser:foo@xyz.com", "count": 2 } ] @@ -322,7 +323,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "bigquery-2022_02_03-07_00_00-6mhnuz", + "runId": "bigquery-2022_02_03-07_00_00-pj1cgp", "lastRunId": "no-run-id-provided" } }, diff --git a/metadata-ingestion/tests/unit/bigquery/test_bigquery_usage.py b/metadata-ingestion/tests/unit/bigquery/test_bigquery_usage.py index 7ff83bff4a..7e1120c8ce 100644 --- a/metadata-ingestion/tests/unit/bigquery/test_bigquery_usage.py +++ b/metadata-ingestion/tests/unit/bigquery/test_bigquery_usage.py @@ -1061,7 +1061,7 @@ def test_operational_stats( OperationClass( timestampMillis=int(FROZEN_TIME.timestamp() * 1000), lastUpdatedTimestamp=int(query.timestamp.timestamp() * 1000), - actor=f"urn:li:corpuser:{query.actor.split('@')[0]}", + actor=f"urn:li:corpuser:{query.actor}", operationType=( query.type if query.type in OPERATION_STATEMENT_TYPES.values()