mirror of
https://github.com/datahub-project/datahub.git
synced 2025-11-06 22:04:16 +00:00
fix(ingest): remove detailed profiler event (#5616)
This commit is contained in:
parent
bf189d980d
commit
d854798b96
@ -528,14 +528,6 @@ class _SingleDatasetProfiler(BasicDatasetProfilerBase):
|
|||||||
assert profile.rowCount is not None
|
assert profile.rowCount is not None
|
||||||
row_count: int = profile.rowCount
|
row_count: int = profile.rowCount
|
||||||
|
|
||||||
telemetry.telemetry_instance.ping(
|
|
||||||
"profile_sql_table",
|
|
||||||
# bucket by taking floor of log of the number of rows scanned
|
|
||||||
{
|
|
||||||
"rows_profiled": stats.discretize(row_count),
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|
||||||
for column_spec in columns_profiling_queue:
|
for column_spec in columns_profiling_queue:
|
||||||
column = column_spec.column
|
column = column_spec.column
|
||||||
column_profile = column_spec.column_profile
|
column_profile = column_spec.column_profile
|
||||||
@ -663,6 +655,7 @@ class DatahubGEProfiler:
|
|||||||
report: SQLSourceReport
|
report: SQLSourceReport
|
||||||
config: GEProfilingConfig
|
config: GEProfilingConfig
|
||||||
times_taken: List[float]
|
times_taken: List[float]
|
||||||
|
total_row_count: int
|
||||||
|
|
||||||
base_engine: Engine
|
base_engine: Engine
|
||||||
platform: str # passed from parent source config
|
platform: str # passed from parent source config
|
||||||
@ -680,6 +673,7 @@ class DatahubGEProfiler:
|
|||||||
self.report = report
|
self.report = report
|
||||||
self.config = config
|
self.config = config
|
||||||
self.times_taken = []
|
self.times_taken = []
|
||||||
|
self.total_row_count = 0
|
||||||
|
|
||||||
# TRICKY: The call to `.engine` is quite important here. Connection.connect()
|
# TRICKY: The call to `.engine` is quite important here. Connection.connect()
|
||||||
# returns a "branched" connection, which does not actually use a new underlying
|
# returns a "branched" connection, which does not actually use a new underlying
|
||||||
@ -797,6 +791,7 @@ class DatahubGEProfiler:
|
|||||||
{
|
{
|
||||||
"total_time_taken": stats.discretize(total_time_taken),
|
"total_time_taken": stats.discretize(total_time_taken),
|
||||||
"count": stats.discretize(len(self.times_taken)),
|
"count": stats.discretize(len(self.times_taken)),
|
||||||
|
"total_row_count": stats.discretize(self.total_row_count),
|
||||||
"platform": self.platform,
|
"platform": self.platform,
|
||||||
**time_percentiles,
|
**time_percentiles,
|
||||||
},
|
},
|
||||||
@ -930,6 +925,8 @@ class DatahubGEProfiler:
|
|||||||
f"Finished profiling {pretty_name}; took {time_taken:.3f} seconds"
|
f"Finished profiling {pretty_name}; took {time_taken:.3f} seconds"
|
||||||
)
|
)
|
||||||
self.times_taken.append(time_taken)
|
self.times_taken.append(time_taken)
|
||||||
|
if profile.rowCount is not None:
|
||||||
|
self.total_row_count += profile.rowCount
|
||||||
|
|
||||||
return profile
|
return profile
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user