feat(ingest): associate queries with operations (#13404)

This commit is contained in:
Harshal Sheth 2025-05-05 11:27:33 -07:00 committed by GitHub
parent 1b3173ace3
commit cb3988a5f3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 9070 additions and 8942 deletions

View File

@ -1753,8 +1753,9 @@ class SqlParsingAggregator(Closeable):
operationType=operation_type, operationType=operation_type,
lastUpdatedTimestamp=make_ts_millis(query.latest_timestamp), lastUpdatedTimestamp=make_ts_millis(query.latest_timestamp),
actor=query.actor.urn() if query.actor else None, actor=query.actor.urn() if query.actor else None,
customProperties=( sourceType=models.OperationSourceTypeClass.DATA_PLATFORM,
{"query_urn": self._query_urn(query_id)} queries=(
[self._query_urn(query_id)]
if self.can_generate_query(query_id) if self.can_generate_query(query_id)
else None else None
), ),

View File

@ -25,44 +25,7 @@
}, },
"systemMetadata": { "systemMetadata": {
"lastObserved": 1643871600000, "lastObserved": 1643871600000,
"runId": "bigquery-2022_02_03-07_00_00", "runId": "bigquery-2022_02_03-07_00_00-6mhnuz",
"lastRunId": "no-run-id-provided"
}
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.table-1,PROD)",
"changeType": "UPSERT",
"aspectName": "datasetUsageStatistics",
"aspect": {
"json": {
"timestampMillis": 1643846400000,
"eventGranularity": {
"unit": "DAY",
"multiple": 1
},
"partitionSpec": {
"partition": "FULL_TABLE_SNAPSHOT",
"type": "FULL_TABLE"
},
"uniqueUserCount": 1,
"totalSqlQueries": 4,
"topSqlQueries": [
"create view `bigquery-dataset-1`.`view-1` as select * from `bigquery-dataset-1`.`table-1`",
"select * from `bigquery-dataset-1`.`table-1`"
],
"userCounts": [
{
"user": "urn:li:corpuser:foo",
"count": 4
}
],
"fieldCounts": []
}
},
"systemMetadata": {
"lastObserved": 1643871600000,
"runId": "bigquery-2022_02_03-07_00_00",
"lastRunId": "no-run-id-provided" "lastRunId": "no-run-id-provided"
} }
}, },
@ -91,23 +54,46 @@
}, },
"systemMetadata": { "systemMetadata": {
"lastObserved": 1643871600000, "lastObserved": 1643871600000,
"runId": "bigquery-2022_02_03-07_00_00", "runId": "bigquery-2022_02_03-07_00_00-6mhnuz",
"lastRunId": "no-run-id-provided" "lastRunId": "no-run-id-provided"
} }
}, },
{ {
"entityType": "dataset", "entityType": "query",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.table-1,PROD)", "entityUrn": "urn:li:query:176428c30cc3197730c20f6d0161efe869a0a041876d23c044aa2cd60d4c7a12",
"changeType": "UPSERT", "changeType": "UPSERT",
"aspectName": "status", "aspectName": "querySubjects",
"aspect": { "aspect": {
"json": { "json": {
"removed": false "subjects": [
{
"entity": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.table-1,PROD)"
},
{
"entity": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.view-1,PROD)"
}
]
} }
}, },
"systemMetadata": { "systemMetadata": {
"lastObserved": 1643871600000, "lastObserved": 1643871600000,
"runId": "bigquery-2022_02_03-07_00_00", "runId": "bigquery-2022_02_03-07_00_00-6mhnuz",
"lastRunId": "no-run-id-provided"
}
},
{
"entityType": "query",
"entityUrn": "urn:li:query:176428c30cc3197730c20f6d0161efe869a0a041876d23c044aa2cd60d4c7a12",
"changeType": "UPSERT",
"aspectName": "dataPlatformInstance",
"aspect": {
"json": {
"platform": "urn:li:dataPlatform:bigquery"
}
},
"systemMetadata": {
"lastObserved": 1643871600000,
"runId": "bigquery-2022_02_03-07_00_00-6mhnuz",
"lastRunId": "no-run-id-provided" "lastRunId": "no-run-id-provided"
} }
}, },
@ -139,23 +125,44 @@
}, },
"systemMetadata": { "systemMetadata": {
"lastObserved": 1643871600000, "lastObserved": 1643871600000,
"runId": "bigquery-2022_02_03-07_00_00", "runId": "bigquery-2022_02_03-07_00_00-6mhnuz",
"lastRunId": "no-run-id-provided" "lastRunId": "no-run-id-provided"
} }
}, },
{ {
"entityType": "query", "entityType": "dataset",
"entityUrn": "urn:li:query:176428c30cc3197730c20f6d0161efe869a0a041876d23c044aa2cd60d4c7a12", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.table-1,PROD)",
"changeType": "UPSERT", "changeType": "UPSERT",
"aspectName": "dataPlatformInstance", "aspectName": "datasetUsageStatistics",
"aspect": { "aspect": {
"json": { "json": {
"platform": "urn:li:dataPlatform:bigquery" "timestampMillis": 1643846400000,
"eventGranularity": {
"unit": "DAY",
"multiple": 1
},
"partitionSpec": {
"partition": "FULL_TABLE_SNAPSHOT",
"type": "FULL_TABLE"
},
"uniqueUserCount": 1,
"totalSqlQueries": 4,
"topSqlQueries": [
"select * from `bigquery-dataset-1`.`table-1`",
"create view `bigquery-dataset-1`.`view-1` as select * from `bigquery-dataset-1`.`table-1`"
],
"userCounts": [
{
"user": "urn:li:corpuser:foo",
"count": 4
}
],
"fieldCounts": []
} }
}, },
"systemMetadata": { "systemMetadata": {
"lastObserved": 1643871600000, "lastObserved": 1643871600000,
"runId": "bigquery-2022_02_03-07_00_00", "runId": "bigquery-2022_02_03-07_00_00-6mhnuz",
"lastRunId": "no-run-id-provided" "lastRunId": "no-run-id-provided"
} }
}, },
@ -191,30 +198,34 @@
}, },
"systemMetadata": { "systemMetadata": {
"lastObserved": 1643871600000, "lastObserved": 1643871600000,
"runId": "bigquery-2022_02_03-07_00_00", "runId": "bigquery-2022_02_03-07_00_00-6mhnuz",
"lastRunId": "no-run-id-provided" "lastRunId": "no-run-id-provided"
} }
}, },
{ {
"entityType": "query", "entityType": "dataset",
"entityUrn": "urn:li:query:176428c30cc3197730c20f6d0161efe869a0a041876d23c044aa2cd60d4c7a12", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.view-1,PROD)",
"changeType": "UPSERT", "changeType": "UPSERT",
"aspectName": "querySubjects", "aspectName": "operation",
"aspect": { "aspect": {
"json": { "json": {
"subjects": [ "timestampMillis": 1643871600000,
{ "partitionSpec": {
"entity": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.table-1,PROD)" "partition": "FULL_TABLE_SNAPSHOT",
}, "type": "FULL_TABLE"
{ },
"entity": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.view-1,PROD)" "actor": "urn:li:corpuser:foo",
} "operationType": "CREATE",
"sourceType": "DATA_PLATFORM",
"lastUpdatedTimestamp": 1643871600000,
"queries": [
"urn:li:query:176428c30cc3197730c20f6d0161efe869a0a041876d23c044aa2cd60d4c7a12"
] ]
} }
}, },
"systemMetadata": { "systemMetadata": {
"lastObserved": 1643871600000, "lastObserved": 1643871600000,
"runId": "bigquery-2022_02_03-07_00_00", "runId": "bigquery-2022_02_03-07_00_00-6mhnuz",
"lastRunId": "no-run-id-provided" "lastRunId": "no-run-id-provided"
} }
}, },
@ -243,23 +254,7 @@
}, },
"systemMetadata": { "systemMetadata": {
"lastObserved": 1643871600000, "lastObserved": 1643871600000,
"runId": "bigquery-2022_02_03-07_00_00", "runId": "bigquery-2022_02_03-07_00_00-6mhnuz",
"lastRunId": "no-run-id-provided"
}
},
{
"entityType": "query",
"entityUrn": "urn:li:query:176428c30cc3197730c20f6d0161efe869a0a041876d23c044aa2cd60d4c7a12",
"changeType": "UPSERT",
"aspectName": "status",
"aspect": {
"json": {
"removed": false
}
},
"systemMetadata": {
"lastObserved": 1643871600000,
"runId": "bigquery-2022_02_03-07_00_00",
"lastRunId": "no-run-id-provided" "lastRunId": "no-run-id-provided"
} }
}, },
@ -279,7 +274,23 @@
}, },
"systemMetadata": { "systemMetadata": {
"lastObserved": 1643871600000, "lastObserved": 1643871600000,
"runId": "bigquery-2022_02_03-07_00_00", "runId": "bigquery-2022_02_03-07_00_00-6mhnuz",
"lastRunId": "no-run-id-provided"
}
},
{
"entityType": "query",
"entityUrn": "urn:li:query:23f1935934face229de381fad193e390153180bf1e7afaa6db58e91fc28d0021",
"changeType": "UPSERT",
"aspectName": "dataPlatformInstance",
"aspect": {
"json": {
"platform": "urn:li:dataPlatform:bigquery"
}
},
"systemMetadata": {
"lastObserved": 1643871600000,
"runId": "bigquery-2022_02_03-07_00_00-6mhnuz",
"lastRunId": "no-run-id-provided" "lastRunId": "no-run-id-provided"
} }
}, },
@ -311,7 +322,7 @@
}, },
"systemMetadata": { "systemMetadata": {
"lastObserved": 1643871600000, "lastObserved": 1643871600000,
"runId": "bigquery-2022_02_03-07_00_00", "runId": "bigquery-2022_02_03-07_00_00-6mhnuz",
"lastRunId": "no-run-id-provided" "lastRunId": "no-run-id-provided"
} }
}, },
@ -340,23 +351,7 @@
}, },
"systemMetadata": { "systemMetadata": {
"lastObserved": 1643871600000, "lastObserved": 1643871600000,
"runId": "bigquery-2022_02_03-07_00_00-y4a2wl", "runId": "bigquery-2022_02_03-07_00_00-6mhnuz",
"lastRunId": "no-run-id-provided"
}
},
{
"entityType": "query",
"entityUrn": "urn:li:query:23f1935934face229de381fad193e390153180bf1e7afaa6db58e91fc28d0021",
"changeType": "UPSERT",
"aspectName": "dataPlatformInstance",
"aspect": {
"json": {
"platform": "urn:li:dataPlatform:bigquery"
}
},
"systemMetadata": {
"lastObserved": 1643871600000,
"runId": "bigquery-2022_02_03-07_00_00",
"lastRunId": "no-run-id-provided" "lastRunId": "no-run-id-provided"
} }
}, },
@ -376,7 +371,7 @@
}, },
"systemMetadata": { "systemMetadata": {
"lastObserved": 1643871600000, "lastObserved": 1643871600000,
"runId": "bigquery-2022_02_03-07_00_00", "runId": "bigquery-2022_02_03-07_00_00-6mhnuz",
"lastRunId": "no-run-id-provided" "lastRunId": "no-run-id-provided"
} }
}, },
@ -392,7 +387,7 @@
}, },
"systemMetadata": { "systemMetadata": {
"lastObserved": 1643871600000, "lastObserved": 1643871600000,
"runId": "bigquery-2022_02_03-07_00_00", "runId": "bigquery-2022_02_03-07_00_00-6mhnuz",
"lastRunId": "no-run-id-provided" "lastRunId": "no-run-id-provided"
} }
}, },
@ -424,33 +419,23 @@
}, },
"systemMetadata": { "systemMetadata": {
"lastObserved": 1643871600000, "lastObserved": 1643871600000,
"runId": "bigquery-2022_02_03-07_00_00", "runId": "bigquery-2022_02_03-07_00_00-6mhnuz",
"lastRunId": "no-run-id-provided" "lastRunId": "no-run-id-provided"
} }
}, },
{ {
"entityType": "dataset", "entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.view-1,PROD)", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.table-1,PROD)",
"changeType": "UPSERT", "changeType": "UPSERT",
"aspectName": "operation", "aspectName": "status",
"aspect": { "aspect": {
"json": { "json": {
"timestampMillis": 1643871600000, "removed": false
"partitionSpec": {
"partition": "FULL_TABLE_SNAPSHOT",
"type": "FULL_TABLE"
},
"actor": "urn:li:corpuser:foo",
"operationType": "CREATE",
"customProperties": {
"query_urn": "urn:li:query:176428c30cc3197730c20f6d0161efe869a0a041876d23c044aa2cd60d4c7a12"
},
"lastUpdatedTimestamp": 1643871600000
} }
}, },
"systemMetadata": { "systemMetadata": {
"lastObserved": 1643871600000, "lastObserved": 1643871600000,
"runId": "bigquery-2022_02_03-07_00_00", "runId": "bigquery-2022_02_03-07_00_00-6mhnuz",
"lastRunId": "no-run-id-provided" "lastRunId": "no-run-id-provided"
} }
}, },
@ -466,7 +451,23 @@
}, },
"systemMetadata": { "systemMetadata": {
"lastObserved": 1643871600000, "lastObserved": 1643871600000,
"runId": "bigquery-2022_02_03-07_00_00", "runId": "bigquery-2022_02_03-07_00_00-6mhnuz",
"lastRunId": "no-run-id-provided"
}
},
{
"entityType": "query",
"entityUrn": "urn:li:query:176428c30cc3197730c20f6d0161efe869a0a041876d23c044aa2cd60d4c7a12",
"changeType": "UPSERT",
"aspectName": "status",
"aspect": {
"json": {
"removed": false
}
},
"systemMetadata": {
"lastObserved": 1643871600000,
"runId": "bigquery-2022_02_03-07_00_00-6mhnuz",
"lastRunId": "no-run-id-provided" "lastRunId": "no-run-id-provided"
} }
}, },
@ -482,7 +483,7 @@
}, },
"systemMetadata": { "systemMetadata": {
"lastObserved": 1643871600000, "lastObserved": 1643871600000,
"runId": "bigquery-2022_02_03-07_00_00", "runId": "bigquery-2022_02_03-07_00_00-6mhnuz",
"lastRunId": "no-run-id-provided" "lastRunId": "no-run-id-provided"
} }
}, },
@ -498,7 +499,7 @@
}, },
"systemMetadata": { "systemMetadata": {
"lastObserved": 1643871600000, "lastObserved": 1643871600000,
"runId": "bigquery-2022_02_03-07_00_00", "runId": "bigquery-2022_02_03-07_00_00-6mhnuz",
"lastRunId": "no-run-id-provided" "lastRunId": "no-run-id-provided"
} }
}, },
@ -520,7 +521,7 @@
}, },
"systemMetadata": { "systemMetadata": {
"lastObserved": 1643871600000, "lastObserved": 1643871600000,
"runId": "bigquery-2022_02_03-07_00_00", "runId": "bigquery-2022_02_03-07_00_00-6mhnuz",
"lastRunId": "no-run-id-provided" "lastRunId": "no-run-id-provided"
} }
} }

View File

@ -69,6 +69,7 @@
"aspectName": "queryProperties", "aspectName": "queryProperties",
"aspect": { "aspect": {
"json": { "json": {
"customProperties": {},
"statement": { "statement": {
"value": "INSERT INTO foo (\n a,\n b,\n c\n)\nSELECT\n a,\n b,\n c\nFROM bar", "value": "INSERT INTO foo (\n a,\n b,\n c\n)\nSELECT\n a,\n b,\n c\nFROM bar",
"language": "SQL" "language": "SQL"
@ -136,10 +137,11 @@
"type": "FULL_TABLE" "type": "FULL_TABLE"
}, },
"operationType": "INSERT", "operationType": "INSERT",
"customProperties": { "sourceType": "DATA_PLATFORM",
"query_urn": "urn:li:query:02e2ec36678bea2a8c4c855fed5255d087cfeb2710d326e95fd9b48a9c4fc0ae" "lastUpdatedTimestamp": 20000,
}, "queries": [
"lastUpdatedTimestamp": 20000 "urn:li:query:02e2ec36678bea2a8c4c855fed5255d087cfeb2710d326e95fd9b48a9c4fc0ae"
]
} }
} }
} }

View File

@ -8,11 +8,12 @@
"json": { "json": {
"timestampMillis": 1707182625000, "timestampMillis": 1707182625000,
"partitionSpec": { "partitionSpec": {
"type": "FULL_TABLE", "partition": "FULL_TABLE_SNAPSHOT",
"partition": "FULL_TABLE_SNAPSHOT" "type": "FULL_TABLE"
}, },
"actor": "urn:li:corpuser:user2", "actor": "urn:li:corpuser:user2",
"operationType": "CREATE", "operationType": "CREATE",
"sourceType": "DATA_PLATFORM",
"lastUpdatedTimestamp": 25000 "lastUpdatedTimestamp": 25000
} }
} }
@ -26,11 +27,12 @@
"json": { "json": {
"timestampMillis": 1707182625000, "timestampMillis": 1707182625000,
"partitionSpec": { "partitionSpec": {
"type": "FULL_TABLE", "partition": "FULL_TABLE_SNAPSHOT",
"partition": "FULL_TABLE_SNAPSHOT" "type": "FULL_TABLE"
}, },
"actor": "urn:li:corpuser:user3", "actor": "urn:li:corpuser:user3",
"operationType": "CREATE", "operationType": "CREATE",
"sourceType": "DATA_PLATFORM",
"lastUpdatedTimestamp": 26000 "lastUpdatedTimestamp": 26000
} }
} }

View File

@ -12,10 +12,11 @@
"type": "FULL_TABLE" "type": "FULL_TABLE"
}, },
"operationType": "CREATE", "operationType": "CREATE",
"customProperties": { "sourceType": "DATA_PLATFORM",
"query_urn": "urn:li:query:f2e61c641cf14eae74147b6280ae40648516c4b7b58cfca6c4f7fb14ab255ce2" "lastUpdatedTimestamp": 1707182625000,
}, "queries": [
"lastUpdatedTimestamp": 1707182625000 "urn:li:query:f2e61c641cf14eae74147b6280ae40648516c4b7b58cfca6c4f7fb14ab255ce2"
]
} }
} }
}, },
@ -26,6 +27,7 @@
"aspectName": "queryProperties", "aspectName": "queryProperties",
"aspect": { "aspect": {
"json": { "json": {
"customProperties": {},
"statement": { "statement": {
"value": "CREATE OR REPLACE TABLE `dataset.foo` (\n date_utc TIMESTAMP,\n revenue INT64\n)", "value": "CREATE OR REPLACE TABLE `dataset.foo` (\n date_utc TIMESTAMP,\n revenue INT64\n)",
"language": "SQL" "language": "SQL"

View File

@ -45,12 +45,12 @@ record Operation includes TimeseriesAspectBase {
* Source Type * Source Type
*/ */
@TimeseriesField = {} @TimeseriesField = {}
sourceType: optional OperationSourceType sourceType: optional OperationSourceType
/** /**
* Custom properties * Custom properties
*/ */
customProperties: optional map[string, string] customProperties: optional map[string, string]
/** /**
* The time at which the operation occurred. Would be better named 'operationTime' * The time at which the operation occurred. Would be better named 'operationTime'
@ -58,4 +58,10 @@ record Operation includes TimeseriesAspectBase {
@TimeseriesField = {} @TimeseriesField = {}
@Searchable = { "fieldType": "DATETIME", "fieldName": "lastOperationTime" } @Searchable = { "fieldType": "DATETIME", "fieldName": "lastOperationTime" }
lastUpdatedTimestamp: long lastUpdatedTimestamp: long
/**
* Which queries were used in this operation.
*/
@TimeseriesFieldCollection = {"key":"query"}
queries: optional array[Urn]
} }