mirror of
https://github.com/datahub-project/datahub.git
synced 2025-09-03 06:13:14 +00:00
fix(mysql): getting table name correctly (#8476)
Co-authored-by: Ellie O'Neil <oneile729@gmail.com>
This commit is contained in:
parent
a97ac52481
commit
8fb5912978
@ -360,18 +360,22 @@ class _SingleDatasetProfiler(BasicDatasetProfilerBase):
|
|||||||
@_run_with_query_combiner
|
@_run_with_query_combiner
|
||||||
def _get_dataset_rows(self, dataset_profile: DatasetProfileClass) -> None:
|
def _get_dataset_rows(self, dataset_profile: DatasetProfileClass) -> None:
|
||||||
if self.config.profile_table_row_count_estimate_only:
|
if self.config.profile_table_row_count_estimate_only:
|
||||||
schema_name = self.dataset_name.split(".")[1]
|
|
||||||
table_name = self.dataset_name.split(".")[2]
|
|
||||||
logger.debug(
|
|
||||||
f"Getting estimated rowcounts for table:{self.dataset_name}, schema:{schema_name}, table:{table_name}"
|
|
||||||
)
|
|
||||||
|
|
||||||
dialect_name = self.dataset.engine.dialect.name.lower()
|
dialect_name = self.dataset.engine.dialect.name.lower()
|
||||||
if dialect_name == "postgresql":
|
if dialect_name == "postgresql":
|
||||||
|
schema_name = self.dataset_name.split(".")[1]
|
||||||
|
table_name = self.dataset_name.split(".")[2]
|
||||||
|
logger.debug(
|
||||||
|
f"Getting estimated rowcounts for table:{self.dataset_name}, schema:{schema_name}, table:{table_name}"
|
||||||
|
)
|
||||||
get_estimate_script = sa.text(
|
get_estimate_script = sa.text(
|
||||||
f"SELECT c.reltuples AS estimate FROM pg_class c JOIN pg_namespace n ON n.oid = c.relnamespace WHERE c.relname = '{table_name}' AND n.nspname = '{schema_name}'"
|
f"SELECT c.reltuples AS estimate FROM pg_class c JOIN pg_namespace n ON n.oid = c.relnamespace WHERE c.relname = '{table_name}' AND n.nspname = '{schema_name}'"
|
||||||
)
|
)
|
||||||
elif dialect_name == "mysql":
|
elif dialect_name == "mysql":
|
||||||
|
schema_name = self.dataset_name.split(".")[0]
|
||||||
|
table_name = self.dataset_name.split(".")[1]
|
||||||
|
logger.debug(
|
||||||
|
f"Getting estimated rowcounts for table:{self.dataset_name}, schema:{schema_name}, table:{table_name}"
|
||||||
|
)
|
||||||
get_estimate_script = sa.text(
|
get_estimate_script = sa.text(
|
||||||
f"SELECT table_rows AS estimate FROM information_schema.tables WHERE table_schema = '{schema_name}' AND table_name = '{table_name}'"
|
f"SELECT table_rows AS estimate FROM information_schema.tables WHERE table_schema = '{schema_name}' AND table_name = '{table_name}'"
|
||||||
)
|
)
|
||||||
|
@ -400,5 +400,159 @@
|
|||||||
"lastObserved": 1586847600000,
|
"lastObserved": 1586847600000,
|
||||||
"runId": "mysql-2020_04_14-07_00_00"
|
"runId": "mysql-2020_04_14-07_00_00"
|
||||||
}
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"entityType": "dataset",
|
||||||
|
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mysql,northwind.customers,PROD)",
|
||||||
|
"changeType": "UPSERT",
|
||||||
|
"aspectName": "datasetProfile",
|
||||||
|
"aspect": {
|
||||||
|
"json": {
|
||||||
|
"timestampMillis": 1586847600000,
|
||||||
|
"partitionSpec": {
|
||||||
|
"type": "FULL_TABLE",
|
||||||
|
"partition": "FULL_TABLE_SNAPSHOT"
|
||||||
|
},
|
||||||
|
"rowCount": 0,
|
||||||
|
"columnCount": 6,
|
||||||
|
"fieldProfiles": [
|
||||||
|
{
|
||||||
|
"fieldPath": "id",
|
||||||
|
"uniqueCount": 5,
|
||||||
|
"uniqueProportion": 1,
|
||||||
|
"nullCount": 0,
|
||||||
|
"min": "1",
|
||||||
|
"max": "5",
|
||||||
|
"mean": "3.0",
|
||||||
|
"median": "3",
|
||||||
|
"stdev": "1.5811388300841898",
|
||||||
|
"sampleValues": [
|
||||||
|
"1",
|
||||||
|
"2",
|
||||||
|
"3",
|
||||||
|
"4",
|
||||||
|
"5"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"fieldPath": "company",
|
||||||
|
"uniqueCount": 5,
|
||||||
|
"uniqueProportion": 1,
|
||||||
|
"nullCount": 0,
|
||||||
|
"sampleValues": [
|
||||||
|
"Company A",
|
||||||
|
"Company B",
|
||||||
|
"Company C",
|
||||||
|
"Company D",
|
||||||
|
"Company E"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"fieldPath": "last_name",
|
||||||
|
"uniqueCount": 5,
|
||||||
|
"uniqueProportion": 1,
|
||||||
|
"nullCount": 0,
|
||||||
|
"sampleValues": [
|
||||||
|
"Axen",
|
||||||
|
"Bedecs",
|
||||||
|
"Donnell",
|
||||||
|
"Gratacos Solsona",
|
||||||
|
"Lee"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"fieldPath": "first_name",
|
||||||
|
"uniqueCount": 5,
|
||||||
|
"uniqueProportion": 1,
|
||||||
|
"nullCount": 0,
|
||||||
|
"sampleValues": [
|
||||||
|
"Anna",
|
||||||
|
"Antonio",
|
||||||
|
"Christina",
|
||||||
|
"Martin",
|
||||||
|
"Thomas"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"fieldPath": "email_address",
|
||||||
|
"uniqueCount": 0,
|
||||||
|
"nullCount": 0,
|
||||||
|
"sampleValues": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"fieldPath": "priority",
|
||||||
|
"uniqueCount": 3,
|
||||||
|
"uniqueProportion": 0.75,
|
||||||
|
"nullCount": 0,
|
||||||
|
"min": "3.8",
|
||||||
|
"max": "4.9",
|
||||||
|
"mean": "4.175000011920929",
|
||||||
|
"median": "4.0",
|
||||||
|
"stdev": "0.49244294899530355",
|
||||||
|
"sampleValues": [
|
||||||
|
"4.0",
|
||||||
|
"4.9",
|
||||||
|
"4.0",
|
||||||
|
"3.8"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"systemMetadata": {
|
||||||
|
"lastObserved": 1586847600000,
|
||||||
|
"runId": "mysql-2020_04_14-07_00_00"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"entityType": "dataset",
|
||||||
|
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mysql,northwind.orders,PROD)",
|
||||||
|
"changeType": "UPSERT",
|
||||||
|
"aspectName": "datasetProfile",
|
||||||
|
"aspect": {
|
||||||
|
"json": {
|
||||||
|
"timestampMillis": 1586847600000,
|
||||||
|
"partitionSpec": {
|
||||||
|
"type": "FULL_TABLE",
|
||||||
|
"partition": "FULL_TABLE_SNAPSHOT"
|
||||||
|
},
|
||||||
|
"rowCount": 0,
|
||||||
|
"columnCount": 3,
|
||||||
|
"fieldProfiles": [
|
||||||
|
{
|
||||||
|
"fieldPath": "id",
|
||||||
|
"uniqueCount": 0,
|
||||||
|
"nullCount": 0,
|
||||||
|
"min": "None",
|
||||||
|
"max": "None",
|
||||||
|
"mean": "None",
|
||||||
|
"median": "None",
|
||||||
|
"stdev": "0.0",
|
||||||
|
"sampleValues": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"fieldPath": "description",
|
||||||
|
"uniqueCount": 0,
|
||||||
|
"nullCount": 0,
|
||||||
|
"sampleValues": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"fieldPath": "customer_id",
|
||||||
|
"uniqueCount": 0,
|
||||||
|
"nullCount": 0,
|
||||||
|
"min": "None",
|
||||||
|
"max": "None",
|
||||||
|
"mean": "None",
|
||||||
|
"median": "None",
|
||||||
|
"stdev": "0.0",
|
||||||
|
"sampleValues": []
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"systemMetadata": {
|
||||||
|
"lastObserved": 1586847600000,
|
||||||
|
"runId": "mysql-2020_04_14-07_00_00"
|
||||||
|
}
|
||||||
}
|
}
|
||||||
]
|
]
|
Loading…
x
Reference in New Issue
Block a user