mirror of
https://github.com/datahub-project/datahub.git
synced 2025-08-22 16:18:10 +00:00
fix(profiles): prevent NoneType exception when profiling empty datasets (#3144)
Co-authored-by: Sergio Gómez <sergio.gomez.villamor@adevinta.com>
This commit is contained in:
parent
eb26b2f59d
commit
dd7bead514
@ -218,7 +218,10 @@ class DatahubGEProfiler:
|
|||||||
column_profile.uniqueProportion = res["observed_value"]
|
column_profile.uniqueProportion = res["observed_value"]
|
||||||
elif exp == "expect_column_values_to_not_be_null":
|
elif exp == "expect_column_values_to_not_be_null":
|
||||||
column_profile.nullCount = res["unexpected_count"]
|
column_profile.nullCount = res["unexpected_count"]
|
||||||
if "unexpected_percent" in res:
|
if (
|
||||||
|
"unexpected_percent" in res
|
||||||
|
and res["unexpected_percent"] is not None
|
||||||
|
):
|
||||||
column_profile.nullProportion = res["unexpected_percent"] / 100
|
column_profile.nullProportion = res["unexpected_percent"] / 100
|
||||||
elif exp == "expect_column_values_to_not_match_regex":
|
elif exp == "expect_column_values_to_not_match_regex":
|
||||||
# ignore; generally used for whitespace checks using regex r"^\s+|\s+$"
|
# ignore; generally used for whitespace checks using regex r"^\s+|\s+$"
|
||||||
|
@ -584,5 +584,80 @@
|
|||||||
"contentType": "application/json"
|
"contentType": "application/json"
|
||||||
},
|
},
|
||||||
"systemMetadata": null
|
"systemMetadata": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"auditHeader": null,
|
||||||
|
"proposedSnapshot": {
|
||||||
|
"com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
|
||||||
|
"urn": "urn:li:dataset:(urn:li:dataPlatform:mysql,test_cases.test_empty,PROD)",
|
||||||
|
"aspects": [
|
||||||
|
{
|
||||||
|
"com.linkedin.pegasus2avro.schema.SchemaMetadata": {
|
||||||
|
"schemaName": "test_cases.test_empty",
|
||||||
|
"platform": "urn:li:dataPlatform:mysql",
|
||||||
|
"version": 0,
|
||||||
|
"created": {
|
||||||
|
"time": 0,
|
||||||
|
"actor": "urn:li:corpuser:unknown",
|
||||||
|
"impersonator": null
|
||||||
|
},
|
||||||
|
"lastModified": {
|
||||||
|
"time": 0,
|
||||||
|
"actor": "urn:li:corpuser:unknown",
|
||||||
|
"impersonator": null
|
||||||
|
},
|
||||||
|
"deleted": null,
|
||||||
|
"dataset": null,
|
||||||
|
"cluster": null,
|
||||||
|
"hash": "",
|
||||||
|
"platformSchema": {
|
||||||
|
"com.linkedin.pegasus2avro.schema.MySqlDDL": {
|
||||||
|
"tableSchema": ""
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"fields": [
|
||||||
|
{
|
||||||
|
"fieldPath": "dummy",
|
||||||
|
"jsonPath": null,
|
||||||
|
"nullable": true,
|
||||||
|
"description": null,
|
||||||
|
"type": {
|
||||||
|
"type": {
|
||||||
|
"com.linkedin.pegasus2avro.schema.StringType": {}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nativeDataType": "VARCHAR(length=50)",
|
||||||
|
"recursive": false,
|
||||||
|
"globalTags": null,
|
||||||
|
"glossaryTerms": null,
|
||||||
|
"isPartOfKey": false
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"primaryKeys": null,
|
||||||
|
"foreignKeysSpecs": null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"proposedDelta": null,
|
||||||
|
"systemMetadata": {
|
||||||
|
"lastObserved": 1586847600000,
|
||||||
|
"runId": "mysql-test",
|
||||||
|
"properties": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"auditHeader": null,
|
||||||
|
"entityType": "dataset",
|
||||||
|
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mysql,test_cases.test_empty,PROD)",
|
||||||
|
"entityKeyAspect": null,
|
||||||
|
"changeType": "UPSERT",
|
||||||
|
"aspectName": "datasetProfile",
|
||||||
|
"aspect": {
|
||||||
|
"value": "{\"timestampMillis\": 1586847600000, \"rowCount\": 0, \"columnCount\": 1, \"fieldProfiles\": [{\"fieldPath\": \"dummy\", \"uniqueCount\": 0, \"nullCount\": 0, \"sampleValues\": []}]}",
|
||||||
|
"contentType": "application/json"
|
||||||
|
},
|
||||||
|
"systemMetadata": null
|
||||||
}
|
}
|
||||||
]
|
]
|
@ -11,9 +11,11 @@ source:
|
|||||||
allow:
|
allow:
|
||||||
- "^metagalaxy"
|
- "^metagalaxy"
|
||||||
- "^northwind"
|
- "^northwind"
|
||||||
|
- "^test_cases"
|
||||||
profile_pattern:
|
profile_pattern:
|
||||||
allow:
|
allow:
|
||||||
- "^northwind.*\$"
|
- "^northwind.*\$"
|
||||||
|
- "^test_cases.*\$"
|
||||||
profiling:
|
profiling:
|
||||||
enabled: True
|
enabled: True
|
||||||
|
|
||||||
|
@ -89,5 +89,21 @@ INSERT INTO `customers` (`id`, `company`, `last_name`, `first_name`, `email_addr
|
|||||||
INSERT INTO `customers` (`id`, `company`, `last_name`, `first_name`, `email_address`) VALUES (5, 'Company E', 'Donnell', 'Martin', NULL);
|
INSERT INTO `customers` (`id`, `company`, `last_name`, `first_name`, `email_address`) VALUES (5, 'Company E', 'Donnell', 'Martin', NULL);
|
||||||
# 5 records
|
# 5 records
|
||||||
|
|
||||||
|
-- -----------------------------------------------------
|
||||||
|
-- Schema for testing different scenarios
|
||||||
|
-- -----------------------------------------------------
|
||||||
|
|
||||||
|
DROP SCHEMA IF EXISTS `test_cases` ;
|
||||||
|
CREATE SCHEMA IF NOT EXISTS `test_cases` DEFAULT CHARACTER SET latin1 ;
|
||||||
|
USE `test_cases` ;
|
||||||
|
|
||||||
|
-- no data in `test_cases`.`test_empty`
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS `test_cases`.`test_empty` (
|
||||||
|
`dummy` VARCHAR(50) NULL DEFAULT NULL)
|
||||||
|
ENGINE = InnoDB
|
||||||
|
DEFAULT CHARACTER SET = utf8;
|
||||||
|
|
||||||
|
|
||||||
SET FOREIGN_KEY_CHECKS=@OLD_FOREIGN_KEY_CHECKS;
|
SET FOREIGN_KEY_CHECKS=@OLD_FOREIGN_KEY_CHECKS;
|
||||||
SET UNIQUE_CHECKS=@OLD_UNIQUE_CHECKS;
|
SET UNIQUE_CHECKS=@OLD_UNIQUE_CHECKS;
|
Loading…
x
Reference in New Issue
Block a user