mirror of
https://github.com/datahub-project/datahub.git
synced 2025-08-27 10:35:58 +00:00
feat(ingest/profiler): use approx_count_distinct on Databricks (#14337)
This commit is contained in:
parent
560b5ec54e
commit
867fc01fc9
@ -216,6 +216,14 @@ def get_column_unique_count_dh_patch(self: SqlAlchemyDataset, column: str) -> in
|
||||
)
|
||||
).scalar()
|
||||
)
|
||||
elif self.engine.dialect.name.lower() == DATABRICKS:
|
||||
return convert_to_json_serializable(
|
||||
self.engine.execute(
|
||||
sa.select(sa.func.approx_count_distinct(sa.column(column))).select_from(
|
||||
self._table
|
||||
)
|
||||
).scalar()
|
||||
)
|
||||
return convert_to_json_serializable(
|
||||
self.engine.execute(
|
||||
sa.select([sa.func.count(sa.func.distinct(sa.column(column)))]).select_from(
|
||||
|
Loading…
x
Reference in New Issue
Block a user