feat(ingest): associate queries with operations (#13404)

This commit is contained in:
Harshal Sheth 2025-05-05 11:27:33 -07:00 committed by GitHub
parent 1b3173ace3
commit cb3988a5f3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 9070 additions and 8942 deletions

View File

@ -1753,8 +1753,9 @@ class SqlParsingAggregator(Closeable):
operationType=operation_type,
lastUpdatedTimestamp=make_ts_millis(query.latest_timestamp),
actor=query.actor.urn() if query.actor else None,
customProperties=(
{"query_urn": self._query_urn(query_id)}
sourceType=models.OperationSourceTypeClass.DATA_PLATFORM,
queries=(
[self._query_urn(query_id)]
if self.can_generate_query(query_id)
else None
),

View File

@ -25,44 +25,7 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
"runId": "bigquery-2022_02_03-07_00_00",
"lastRunId": "no-run-id-provided"
}
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.table-1,PROD)",
"changeType": "UPSERT",
"aspectName": "datasetUsageStatistics",
"aspect": {
"json": {
"timestampMillis": 1643846400000,
"eventGranularity": {
"unit": "DAY",
"multiple": 1
},
"partitionSpec": {
"partition": "FULL_TABLE_SNAPSHOT",
"type": "FULL_TABLE"
},
"uniqueUserCount": 1,
"totalSqlQueries": 4,
"topSqlQueries": [
"create view `bigquery-dataset-1`.`view-1` as select * from `bigquery-dataset-1`.`table-1`",
"select * from `bigquery-dataset-1`.`table-1`"
],
"userCounts": [
{
"user": "urn:li:corpuser:foo",
"count": 4
}
],
"fieldCounts": []
}
},
"systemMetadata": {
"lastObserved": 1643871600000,
"runId": "bigquery-2022_02_03-07_00_00",
"runId": "bigquery-2022_02_03-07_00_00-6mhnuz",
"lastRunId": "no-run-id-provided"
}
},
@ -91,23 +54,46 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
"runId": "bigquery-2022_02_03-07_00_00",
"runId": "bigquery-2022_02_03-07_00_00-6mhnuz",
"lastRunId": "no-run-id-provided"
}
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.table-1,PROD)",
"entityType": "query",
"entityUrn": "urn:li:query:176428c30cc3197730c20f6d0161efe869a0a041876d23c044aa2cd60d4c7a12",
"changeType": "UPSERT",
"aspectName": "status",
"aspectName": "querySubjects",
"aspect": {
"json": {
"removed": false
"subjects": [
{
"entity": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.table-1,PROD)"
},
{
"entity": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.view-1,PROD)"
}
]
}
},
"systemMetadata": {
"lastObserved": 1643871600000,
"runId": "bigquery-2022_02_03-07_00_00",
"runId": "bigquery-2022_02_03-07_00_00-6mhnuz",
"lastRunId": "no-run-id-provided"
}
},
{
"entityType": "query",
"entityUrn": "urn:li:query:176428c30cc3197730c20f6d0161efe869a0a041876d23c044aa2cd60d4c7a12",
"changeType": "UPSERT",
"aspectName": "dataPlatformInstance",
"aspect": {
"json": {
"platform": "urn:li:dataPlatform:bigquery"
}
},
"systemMetadata": {
"lastObserved": 1643871600000,
"runId": "bigquery-2022_02_03-07_00_00-6mhnuz",
"lastRunId": "no-run-id-provided"
}
},
@ -139,23 +125,44 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
"runId": "bigquery-2022_02_03-07_00_00",
"runId": "bigquery-2022_02_03-07_00_00-6mhnuz",
"lastRunId": "no-run-id-provided"
}
},
{
"entityType": "query",
"entityUrn": "urn:li:query:176428c30cc3197730c20f6d0161efe869a0a041876d23c044aa2cd60d4c7a12",
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.table-1,PROD)",
"changeType": "UPSERT",
"aspectName": "dataPlatformInstance",
"aspectName": "datasetUsageStatistics",
"aspect": {
"json": {
"platform": "urn:li:dataPlatform:bigquery"
"timestampMillis": 1643846400000,
"eventGranularity": {
"unit": "DAY",
"multiple": 1
},
"partitionSpec": {
"partition": "FULL_TABLE_SNAPSHOT",
"type": "FULL_TABLE"
},
"uniqueUserCount": 1,
"totalSqlQueries": 4,
"topSqlQueries": [
"select * from `bigquery-dataset-1`.`table-1`",
"create view `bigquery-dataset-1`.`view-1` as select * from `bigquery-dataset-1`.`table-1`"
],
"userCounts": [
{
"user": "urn:li:corpuser:foo",
"count": 4
}
],
"fieldCounts": []
}
},
"systemMetadata": {
"lastObserved": 1643871600000,
"runId": "bigquery-2022_02_03-07_00_00",
"runId": "bigquery-2022_02_03-07_00_00-6mhnuz",
"lastRunId": "no-run-id-provided"
}
},
@ -191,30 +198,34 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
"runId": "bigquery-2022_02_03-07_00_00",
"runId": "bigquery-2022_02_03-07_00_00-6mhnuz",
"lastRunId": "no-run-id-provided"
}
},
{
"entityType": "query",
"entityUrn": "urn:li:query:176428c30cc3197730c20f6d0161efe869a0a041876d23c044aa2cd60d4c7a12",
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.view-1,PROD)",
"changeType": "UPSERT",
"aspectName": "querySubjects",
"aspectName": "operation",
"aspect": {
"json": {
"subjects": [
{
"entity": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.table-1,PROD)"
},
{
"entity": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.view-1,PROD)"
}
"timestampMillis": 1643871600000,
"partitionSpec": {
"partition": "FULL_TABLE_SNAPSHOT",
"type": "FULL_TABLE"
},
"actor": "urn:li:corpuser:foo",
"operationType": "CREATE",
"sourceType": "DATA_PLATFORM",
"lastUpdatedTimestamp": 1643871600000,
"queries": [
"urn:li:query:176428c30cc3197730c20f6d0161efe869a0a041876d23c044aa2cd60d4c7a12"
]
}
},
"systemMetadata": {
"lastObserved": 1643871600000,
"runId": "bigquery-2022_02_03-07_00_00",
"runId": "bigquery-2022_02_03-07_00_00-6mhnuz",
"lastRunId": "no-run-id-provided"
}
},
@ -243,23 +254,7 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
"runId": "bigquery-2022_02_03-07_00_00",
"lastRunId": "no-run-id-provided"
}
},
{
"entityType": "query",
"entityUrn": "urn:li:query:176428c30cc3197730c20f6d0161efe869a0a041876d23c044aa2cd60d4c7a12",
"changeType": "UPSERT",
"aspectName": "status",
"aspect": {
"json": {
"removed": false
}
},
"systemMetadata": {
"lastObserved": 1643871600000,
"runId": "bigquery-2022_02_03-07_00_00",
"runId": "bigquery-2022_02_03-07_00_00-6mhnuz",
"lastRunId": "no-run-id-provided"
}
},
@ -279,7 +274,23 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
"runId": "bigquery-2022_02_03-07_00_00",
"runId": "bigquery-2022_02_03-07_00_00-6mhnuz",
"lastRunId": "no-run-id-provided"
}
},
{
"entityType": "query",
"entityUrn": "urn:li:query:23f1935934face229de381fad193e390153180bf1e7afaa6db58e91fc28d0021",
"changeType": "UPSERT",
"aspectName": "dataPlatformInstance",
"aspect": {
"json": {
"platform": "urn:li:dataPlatform:bigquery"
}
},
"systemMetadata": {
"lastObserved": 1643871600000,
"runId": "bigquery-2022_02_03-07_00_00-6mhnuz",
"lastRunId": "no-run-id-provided"
}
},
@ -311,7 +322,7 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
"runId": "bigquery-2022_02_03-07_00_00",
"runId": "bigquery-2022_02_03-07_00_00-6mhnuz",
"lastRunId": "no-run-id-provided"
}
},
@ -340,23 +351,7 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
"runId": "bigquery-2022_02_03-07_00_00-y4a2wl",
"lastRunId": "no-run-id-provided"
}
},
{
"entityType": "query",
"entityUrn": "urn:li:query:23f1935934face229de381fad193e390153180bf1e7afaa6db58e91fc28d0021",
"changeType": "UPSERT",
"aspectName": "dataPlatformInstance",
"aspect": {
"json": {
"platform": "urn:li:dataPlatform:bigquery"
}
},
"systemMetadata": {
"lastObserved": 1643871600000,
"runId": "bigquery-2022_02_03-07_00_00",
"runId": "bigquery-2022_02_03-07_00_00-6mhnuz",
"lastRunId": "no-run-id-provided"
}
},
@ -376,7 +371,7 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
"runId": "bigquery-2022_02_03-07_00_00",
"runId": "bigquery-2022_02_03-07_00_00-6mhnuz",
"lastRunId": "no-run-id-provided"
}
},
@ -392,7 +387,7 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
"runId": "bigquery-2022_02_03-07_00_00",
"runId": "bigquery-2022_02_03-07_00_00-6mhnuz",
"lastRunId": "no-run-id-provided"
}
},
@ -424,33 +419,23 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
"runId": "bigquery-2022_02_03-07_00_00",
"runId": "bigquery-2022_02_03-07_00_00-6mhnuz",
"lastRunId": "no-run-id-provided"
}
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.view-1,PROD)",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.table-1,PROD)",
"changeType": "UPSERT",
"aspectName": "operation",
"aspectName": "status",
"aspect": {
"json": {
"timestampMillis": 1643871600000,
"partitionSpec": {
"partition": "FULL_TABLE_SNAPSHOT",
"type": "FULL_TABLE"
},
"actor": "urn:li:corpuser:foo",
"operationType": "CREATE",
"customProperties": {
"query_urn": "urn:li:query:176428c30cc3197730c20f6d0161efe869a0a041876d23c044aa2cd60d4c7a12"
},
"lastUpdatedTimestamp": 1643871600000
"removed": false
}
},
"systemMetadata": {
"lastObserved": 1643871600000,
"runId": "bigquery-2022_02_03-07_00_00",
"runId": "bigquery-2022_02_03-07_00_00-6mhnuz",
"lastRunId": "no-run-id-provided"
}
},
@ -466,7 +451,23 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
"runId": "bigquery-2022_02_03-07_00_00",
"runId": "bigquery-2022_02_03-07_00_00-6mhnuz",
"lastRunId": "no-run-id-provided"
}
},
{
"entityType": "query",
"entityUrn": "urn:li:query:176428c30cc3197730c20f6d0161efe869a0a041876d23c044aa2cd60d4c7a12",
"changeType": "UPSERT",
"aspectName": "status",
"aspect": {
"json": {
"removed": false
}
},
"systemMetadata": {
"lastObserved": 1643871600000,
"runId": "bigquery-2022_02_03-07_00_00-6mhnuz",
"lastRunId": "no-run-id-provided"
}
},
@ -482,7 +483,7 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
"runId": "bigquery-2022_02_03-07_00_00",
"runId": "bigquery-2022_02_03-07_00_00-6mhnuz",
"lastRunId": "no-run-id-provided"
}
},
@ -498,7 +499,7 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
"runId": "bigquery-2022_02_03-07_00_00",
"runId": "bigquery-2022_02_03-07_00_00-6mhnuz",
"lastRunId": "no-run-id-provided"
}
},
@ -520,7 +521,7 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
"runId": "bigquery-2022_02_03-07_00_00",
"runId": "bigquery-2022_02_03-07_00_00-6mhnuz",
"lastRunId": "no-run-id-provided"
}
}

View File

@ -69,6 +69,7 @@
"aspectName": "queryProperties",
"aspect": {
"json": {
"customProperties": {},
"statement": {
"value": "INSERT INTO foo (\n a,\n b,\n c\n)\nSELECT\n a,\n b,\n c\nFROM bar",
"language": "SQL"
@ -136,10 +137,11 @@
"type": "FULL_TABLE"
},
"operationType": "INSERT",
"customProperties": {
"query_urn": "urn:li:query:02e2ec36678bea2a8c4c855fed5255d087cfeb2710d326e95fd9b48a9c4fc0ae"
},
"lastUpdatedTimestamp": 20000
"sourceType": "DATA_PLATFORM",
"lastUpdatedTimestamp": 20000,
"queries": [
"urn:li:query:02e2ec36678bea2a8c4c855fed5255d087cfeb2710d326e95fd9b48a9c4fc0ae"
]
}
}
}

View File

@ -8,11 +8,12 @@
"json": {
"timestampMillis": 1707182625000,
"partitionSpec": {
"type": "FULL_TABLE",
"partition": "FULL_TABLE_SNAPSHOT"
"partition": "FULL_TABLE_SNAPSHOT",
"type": "FULL_TABLE"
},
"actor": "urn:li:corpuser:user2",
"operationType": "CREATE",
"sourceType": "DATA_PLATFORM",
"lastUpdatedTimestamp": 25000
}
}
@ -26,11 +27,12 @@
"json": {
"timestampMillis": 1707182625000,
"partitionSpec": {
"type": "FULL_TABLE",
"partition": "FULL_TABLE_SNAPSHOT"
"partition": "FULL_TABLE_SNAPSHOT",
"type": "FULL_TABLE"
},
"actor": "urn:li:corpuser:user3",
"operationType": "CREATE",
"sourceType": "DATA_PLATFORM",
"lastUpdatedTimestamp": 26000
}
}

View File

@ -12,10 +12,11 @@
"type": "FULL_TABLE"
},
"operationType": "CREATE",
"customProperties": {
"query_urn": "urn:li:query:f2e61c641cf14eae74147b6280ae40648516c4b7b58cfca6c4f7fb14ab255ce2"
},
"lastUpdatedTimestamp": 1707182625000
"sourceType": "DATA_PLATFORM",
"lastUpdatedTimestamp": 1707182625000,
"queries": [
"urn:li:query:f2e61c641cf14eae74147b6280ae40648516c4b7b58cfca6c4f7fb14ab255ce2"
]
}
}
},
@ -26,6 +27,7 @@
"aspectName": "queryProperties",
"aspect": {
"json": {
"customProperties": {},
"statement": {
"value": "CREATE OR REPLACE TABLE `dataset.foo` (\n date_utc TIMESTAMP,\n revenue INT64\n)",
"language": "SQL"

View File

@ -45,12 +45,12 @@ record Operation includes TimeseriesAspectBase {
* Source Type
*/
@TimeseriesField = {}
sourceType: optional OperationSourceType
sourceType: optional OperationSourceType
/**
* Custom properties
*/
customProperties: optional map[string, string]
customProperties: optional map[string, string]
/**
* The time at which the operation occurred. Would be better named 'operationTime'
@ -58,4 +58,10 @@ record Operation includes TimeseriesAspectBase {
@TimeseriesField = {}
@Searchable = { "fieldType": "DATETIME", "fieldName": "lastOperationTime" }
lastUpdatedTimestamp: long
/**
* Which queries were used in this operation.
*/
@TimeseriesFieldCollection = {"key":"query"}
queries: optional array[Urn]
}