mirror of
https://github.com/datahub-project/datahub.git
synced 2025-08-21 23:58:06 +00:00
fix(profiles): prevent NoneType exception when profiling empty datasets (#3144)
Co-authored-by: Sergio Gómez <sergio.gomez.villamor@adevinta.com>
This commit is contained in:
parent
eb26b2f59d
commit
dd7bead514
@ -218,7 +218,10 @@ class DatahubGEProfiler:
|
||||
column_profile.uniqueProportion = res["observed_value"]
|
||||
elif exp == "expect_column_values_to_not_be_null":
|
||||
column_profile.nullCount = res["unexpected_count"]
|
||||
if "unexpected_percent" in res:
|
||||
if (
|
||||
"unexpected_percent" in res
|
||||
and res["unexpected_percent"] is not None
|
||||
):
|
||||
column_profile.nullProportion = res["unexpected_percent"] / 100
|
||||
elif exp == "expect_column_values_to_not_match_regex":
|
||||
# ignore; generally used for whitespace checks using regex r"^\s+|\s+$"
|
||||
|
@ -584,5 +584,80 @@
|
||||
"contentType": "application/json"
|
||||
},
|
||||
"systemMetadata": null
|
||||
},
|
||||
{
|
||||
"auditHeader": null,
|
||||
"proposedSnapshot": {
|
||||
"com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
|
||||
"urn": "urn:li:dataset:(urn:li:dataPlatform:mysql,test_cases.test_empty,PROD)",
|
||||
"aspects": [
|
||||
{
|
||||
"com.linkedin.pegasus2avro.schema.SchemaMetadata": {
|
||||
"schemaName": "test_cases.test_empty",
|
||||
"platform": "urn:li:dataPlatform:mysql",
|
||||
"version": 0,
|
||||
"created": {
|
||||
"time": 0,
|
||||
"actor": "urn:li:corpuser:unknown",
|
||||
"impersonator": null
|
||||
},
|
||||
"lastModified": {
|
||||
"time": 0,
|
||||
"actor": "urn:li:corpuser:unknown",
|
||||
"impersonator": null
|
||||
},
|
||||
"deleted": null,
|
||||
"dataset": null,
|
||||
"cluster": null,
|
||||
"hash": "",
|
||||
"platformSchema": {
|
||||
"com.linkedin.pegasus2avro.schema.MySqlDDL": {
|
||||
"tableSchema": ""
|
||||
}
|
||||
},
|
||||
"fields": [
|
||||
{
|
||||
"fieldPath": "dummy",
|
||||
"jsonPath": null,
|
||||
"nullable": true,
|
||||
"description": null,
|
||||
"type": {
|
||||
"type": {
|
||||
"com.linkedin.pegasus2avro.schema.StringType": {}
|
||||
}
|
||||
},
|
||||
"nativeDataType": "VARCHAR(length=50)",
|
||||
"recursive": false,
|
||||
"globalTags": null,
|
||||
"glossaryTerms": null,
|
||||
"isPartOfKey": false
|
||||
}
|
||||
],
|
||||
"primaryKeys": null,
|
||||
"foreignKeysSpecs": null
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"proposedDelta": null,
|
||||
"systemMetadata": {
|
||||
"lastObserved": 1586847600000,
|
||||
"runId": "mysql-test",
|
||||
"properties": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"auditHeader": null,
|
||||
"entityType": "dataset",
|
||||
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mysql,test_cases.test_empty,PROD)",
|
||||
"entityKeyAspect": null,
|
||||
"changeType": "UPSERT",
|
||||
"aspectName": "datasetProfile",
|
||||
"aspect": {
|
||||
"value": "{\"timestampMillis\": 1586847600000, \"rowCount\": 0, \"columnCount\": 1, \"fieldProfiles\": [{\"fieldPath\": \"dummy\", \"uniqueCount\": 0, \"nullCount\": 0, \"sampleValues\": []}]}",
|
||||
"contentType": "application/json"
|
||||
},
|
||||
"systemMetadata": null
|
||||
}
|
||||
]
|
||||
]
|
||||
|
@ -11,9 +11,11 @@ source:
|
||||
allow:
|
||||
- "^metagalaxy"
|
||||
- "^northwind"
|
||||
- "^test_cases"
|
||||
profile_pattern:
|
||||
allow:
|
||||
- "^northwind.*\$"
|
||||
- "^test_cases.*\$"
|
||||
profiling:
|
||||
enabled: True
|
||||
|
||||
|
@ -89,5 +89,21 @@ INSERT INTO `customers` (`id`, `company`, `last_name`, `first_name`, `email_addr
|
||||
INSERT INTO `customers` (`id`, `company`, `last_name`, `first_name`, `email_address`) VALUES (5, 'Company E', 'Donnell', 'Martin', NULL);
|
||||
# 5 records
|
||||
|
||||
-- -----------------------------------------------------
|
||||
-- Schema for testing different scenarios
|
||||
-- -----------------------------------------------------
|
||||
|
||||
DROP SCHEMA IF EXISTS `test_cases` ;
|
||||
CREATE SCHEMA IF NOT EXISTS `test_cases` DEFAULT CHARACTER SET latin1 ;
|
||||
USE `test_cases` ;
|
||||
|
||||
-- no data in `test_cases`.`test_empty`
|
||||
|
||||
CREATE TABLE IF NOT EXISTS `test_cases`.`test_empty` (
|
||||
`dummy` VARCHAR(50) NULL DEFAULT NULL)
|
||||
ENGINE = InnoDB
|
||||
DEFAULT CHARACTER SET = utf8;
|
||||
|
||||
|
||||
SET FOREIGN_KEY_CHECKS=@OLD_FOREIGN_KEY_CHECKS;
|
||||
SET UNIQUE_CHECKS=@OLD_UNIQUE_CHECKS;
|
Loading…
x
Reference in New Issue
Block a user