mirror of
https://github.com/datahub-project/datahub.git
synced 2025-08-31 12:52:13 +00:00
fix(mysql): getting table name correctly (#8476)
Co-authored-by: Ellie O'Neil <oneile729@gmail.com>
This commit is contained in:
parent
a97ac52481
commit
8fb5912978
@ -360,18 +360,22 @@ class _SingleDatasetProfiler(BasicDatasetProfilerBase):
|
||||
@_run_with_query_combiner
|
||||
def _get_dataset_rows(self, dataset_profile: DatasetProfileClass) -> None:
|
||||
if self.config.profile_table_row_count_estimate_only:
|
||||
schema_name = self.dataset_name.split(".")[1]
|
||||
table_name = self.dataset_name.split(".")[2]
|
||||
logger.debug(
|
||||
f"Getting estimated rowcounts for table:{self.dataset_name}, schema:{schema_name}, table:{table_name}"
|
||||
)
|
||||
|
||||
dialect_name = self.dataset.engine.dialect.name.lower()
|
||||
if dialect_name == "postgresql":
|
||||
schema_name = self.dataset_name.split(".")[1]
|
||||
table_name = self.dataset_name.split(".")[2]
|
||||
logger.debug(
|
||||
f"Getting estimated rowcounts for table:{self.dataset_name}, schema:{schema_name}, table:{table_name}"
|
||||
)
|
||||
get_estimate_script = sa.text(
|
||||
f"SELECT c.reltuples AS estimate FROM pg_class c JOIN pg_namespace n ON n.oid = c.relnamespace WHERE c.relname = '{table_name}' AND n.nspname = '{schema_name}'"
|
||||
)
|
||||
elif dialect_name == "mysql":
|
||||
schema_name = self.dataset_name.split(".")[0]
|
||||
table_name = self.dataset_name.split(".")[1]
|
||||
logger.debug(
|
||||
f"Getting estimated rowcounts for table:{self.dataset_name}, schema:{schema_name}, table:{table_name}"
|
||||
)
|
||||
get_estimate_script = sa.text(
|
||||
f"SELECT table_rows AS estimate FROM information_schema.tables WHERE table_schema = '{schema_name}' AND table_name = '{table_name}'"
|
||||
)
|
||||
|
@ -400,5 +400,159 @@
|
||||
"lastObserved": 1586847600000,
|
||||
"runId": "mysql-2020_04_14-07_00_00"
|
||||
}
|
||||
},
|
||||
{
|
||||
"entityType": "dataset",
|
||||
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mysql,northwind.customers,PROD)",
|
||||
"changeType": "UPSERT",
|
||||
"aspectName": "datasetProfile",
|
||||
"aspect": {
|
||||
"json": {
|
||||
"timestampMillis": 1586847600000,
|
||||
"partitionSpec": {
|
||||
"type": "FULL_TABLE",
|
||||
"partition": "FULL_TABLE_SNAPSHOT"
|
||||
},
|
||||
"rowCount": 0,
|
||||
"columnCount": 6,
|
||||
"fieldProfiles": [
|
||||
{
|
||||
"fieldPath": "id",
|
||||
"uniqueCount": 5,
|
||||
"uniqueProportion": 1,
|
||||
"nullCount": 0,
|
||||
"min": "1",
|
||||
"max": "5",
|
||||
"mean": "3.0",
|
||||
"median": "3",
|
||||
"stdev": "1.5811388300841898",
|
||||
"sampleValues": [
|
||||
"1",
|
||||
"2",
|
||||
"3",
|
||||
"4",
|
||||
"5"
|
||||
]
|
||||
},
|
||||
{
|
||||
"fieldPath": "company",
|
||||
"uniqueCount": 5,
|
||||
"uniqueProportion": 1,
|
||||
"nullCount": 0,
|
||||
"sampleValues": [
|
||||
"Company A",
|
||||
"Company B",
|
||||
"Company C",
|
||||
"Company D",
|
||||
"Company E"
|
||||
]
|
||||
},
|
||||
{
|
||||
"fieldPath": "last_name",
|
||||
"uniqueCount": 5,
|
||||
"uniqueProportion": 1,
|
||||
"nullCount": 0,
|
||||
"sampleValues": [
|
||||
"Axen",
|
||||
"Bedecs",
|
||||
"Donnell",
|
||||
"Gratacos Solsona",
|
||||
"Lee"
|
||||
]
|
||||
},
|
||||
{
|
||||
"fieldPath": "first_name",
|
||||
"uniqueCount": 5,
|
||||
"uniqueProportion": 1,
|
||||
"nullCount": 0,
|
||||
"sampleValues": [
|
||||
"Anna",
|
||||
"Antonio",
|
||||
"Christina",
|
||||
"Martin",
|
||||
"Thomas"
|
||||
]
|
||||
},
|
||||
{
|
||||
"fieldPath": "email_address",
|
||||
"uniqueCount": 0,
|
||||
"nullCount": 0,
|
||||
"sampleValues": []
|
||||
},
|
||||
{
|
||||
"fieldPath": "priority",
|
||||
"uniqueCount": 3,
|
||||
"uniqueProportion": 0.75,
|
||||
"nullCount": 0,
|
||||
"min": "3.8",
|
||||
"max": "4.9",
|
||||
"mean": "4.175000011920929",
|
||||
"median": "4.0",
|
||||
"stdev": "0.49244294899530355",
|
||||
"sampleValues": [
|
||||
"4.0",
|
||||
"4.9",
|
||||
"4.0",
|
||||
"3.8"
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"systemMetadata": {
|
||||
"lastObserved": 1586847600000,
|
||||
"runId": "mysql-2020_04_14-07_00_00"
|
||||
}
|
||||
},
|
||||
{
|
||||
"entityType": "dataset",
|
||||
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mysql,northwind.orders,PROD)",
|
||||
"changeType": "UPSERT",
|
||||
"aspectName": "datasetProfile",
|
||||
"aspect": {
|
||||
"json": {
|
||||
"timestampMillis": 1586847600000,
|
||||
"partitionSpec": {
|
||||
"type": "FULL_TABLE",
|
||||
"partition": "FULL_TABLE_SNAPSHOT"
|
||||
},
|
||||
"rowCount": 0,
|
||||
"columnCount": 3,
|
||||
"fieldProfiles": [
|
||||
{
|
||||
"fieldPath": "id",
|
||||
"uniqueCount": 0,
|
||||
"nullCount": 0,
|
||||
"min": "None",
|
||||
"max": "None",
|
||||
"mean": "None",
|
||||
"median": "None",
|
||||
"stdev": "0.0",
|
||||
"sampleValues": []
|
||||
},
|
||||
{
|
||||
"fieldPath": "description",
|
||||
"uniqueCount": 0,
|
||||
"nullCount": 0,
|
||||
"sampleValues": []
|
||||
},
|
||||
{
|
||||
"fieldPath": "customer_id",
|
||||
"uniqueCount": 0,
|
||||
"nullCount": 0,
|
||||
"min": "None",
|
||||
"max": "None",
|
||||
"mean": "None",
|
||||
"median": "None",
|
||||
"stdev": "0.0",
|
||||
"sampleValues": []
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"systemMetadata": {
|
||||
"lastObserved": 1586847600000,
|
||||
"runId": "mysql-2020_04_14-07_00_00"
|
||||
}
|
||||
}
|
||||
]
|
Loading…
x
Reference in New Issue
Block a user