feat(ingest/profiler): use approx_count_distinct on Databricks (#14337)

This commit is contained in:
Michael Maltese 2025-08-06 03:30:06 -04:00 committed by GitHub
parent 560b5ec54e
commit 867fc01fc9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -216,6 +216,14 @@ def get_column_unique_count_dh_patch(self: SqlAlchemyDataset, column: str) -> in
)
).scalar()
)
elif self.engine.dialect.name.lower() == DATABRICKS:
return convert_to_json_serializable(
self.engine.execute(
sa.select(sa.func.approx_count_distinct(sa.column(column))).select_from(
self._table
)
).scalar()
)
return convert_to_json_serializable(
self.engine.execute(
sa.select([sa.func.count(sa.func.distinct(sa.column(column)))]).select_from(