fix(mysql): getting table name correctly (#8476)

Co-authored-by: Ellie O'Neil <oneile729@gmail.com>
This commit is contained in:
Aseem Bansal 2023-07-23 17:10:54 +05:30 committed by GitHub
parent a97ac52481
commit 8fb5912978
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 164 additions and 6 deletions

View File

@ -360,18 +360,22 @@ class _SingleDatasetProfiler(BasicDatasetProfilerBase):
@_run_with_query_combiner
def _get_dataset_rows(self, dataset_profile: DatasetProfileClass) -> None:
if self.config.profile_table_row_count_estimate_only:
schema_name = self.dataset_name.split(".")[1]
table_name = self.dataset_name.split(".")[2]
logger.debug(
f"Getting estimated rowcounts for table:{self.dataset_name}, schema:{schema_name}, table:{table_name}"
)
dialect_name = self.dataset.engine.dialect.name.lower()
if dialect_name == "postgresql":
schema_name = self.dataset_name.split(".")[1]
table_name = self.dataset_name.split(".")[2]
logger.debug(
f"Getting estimated rowcounts for table:{self.dataset_name}, schema:{schema_name}, table:{table_name}"
)
get_estimate_script = sa.text(
f"SELECT c.reltuples AS estimate FROM pg_class c JOIN pg_namespace n ON n.oid = c.relnamespace WHERE c.relname = '{table_name}' AND n.nspname = '{schema_name}'"
)
elif dialect_name == "mysql":
schema_name = self.dataset_name.split(".")[0]
table_name = self.dataset_name.split(".")[1]
logger.debug(
f"Getting estimated rowcounts for table:{self.dataset_name}, schema:{schema_name}, table:{table_name}"
)
get_estimate_script = sa.text(
f"SELECT table_rows AS estimate FROM information_schema.tables WHERE table_schema = '{schema_name}' AND table_name = '{table_name}'"
)

View File

@ -400,5 +400,159 @@
"lastObserved": 1586847600000,
"runId": "mysql-2020_04_14-07_00_00"
}
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mysql,northwind.customers,PROD)",
"changeType": "UPSERT",
"aspectName": "datasetProfile",
"aspect": {
"json": {
"timestampMillis": 1586847600000,
"partitionSpec": {
"type": "FULL_TABLE",
"partition": "FULL_TABLE_SNAPSHOT"
},
"rowCount": 0,
"columnCount": 6,
"fieldProfiles": [
{
"fieldPath": "id",
"uniqueCount": 5,
"uniqueProportion": 1,
"nullCount": 0,
"min": "1",
"max": "5",
"mean": "3.0",
"median": "3",
"stdev": "1.5811388300841898",
"sampleValues": [
"1",
"2",
"3",
"4",
"5"
]
},
{
"fieldPath": "company",
"uniqueCount": 5,
"uniqueProportion": 1,
"nullCount": 0,
"sampleValues": [
"Company A",
"Company B",
"Company C",
"Company D",
"Company E"
]
},
{
"fieldPath": "last_name",
"uniqueCount": 5,
"uniqueProportion": 1,
"nullCount": 0,
"sampleValues": [
"Axen",
"Bedecs",
"Donnell",
"Gratacos Solsona",
"Lee"
]
},
{
"fieldPath": "first_name",
"uniqueCount": 5,
"uniqueProportion": 1,
"nullCount": 0,
"sampleValues": [
"Anna",
"Antonio",
"Christina",
"Martin",
"Thomas"
]
},
{
"fieldPath": "email_address",
"uniqueCount": 0,
"nullCount": 0,
"sampleValues": []
},
{
"fieldPath": "priority",
"uniqueCount": 3,
"uniqueProportion": 0.75,
"nullCount": 0,
"min": "3.8",
"max": "4.9",
"mean": "4.175000011920929",
"median": "4.0",
"stdev": "0.49244294899530355",
"sampleValues": [
"4.0",
"4.9",
"4.0",
"3.8"
]
}
]
}
},
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "mysql-2020_04_14-07_00_00"
}
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mysql,northwind.orders,PROD)",
"changeType": "UPSERT",
"aspectName": "datasetProfile",
"aspect": {
"json": {
"timestampMillis": 1586847600000,
"partitionSpec": {
"type": "FULL_TABLE",
"partition": "FULL_TABLE_SNAPSHOT"
},
"rowCount": 0,
"columnCount": 3,
"fieldProfiles": [
{
"fieldPath": "id",
"uniqueCount": 0,
"nullCount": 0,
"min": "None",
"max": "None",
"mean": "None",
"median": "None",
"stdev": "0.0",
"sampleValues": []
},
{
"fieldPath": "description",
"uniqueCount": 0,
"nullCount": 0,
"sampleValues": []
},
{
"fieldPath": "customer_id",
"uniqueCount": 0,
"nullCount": 0,
"min": "None",
"max": "None",
"mean": "None",
"median": "None",
"stdev": "0.0",
"sampleValues": []
}
]
}
},
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "mysql-2020_04_14-07_00_00"
}
}
]