mirror of
https://github.com/datahub-project/datahub.git
synced 2025-09-03 14:23:03 +00:00
fix(ingest): profiling - memory usage reduction (#5830)
This commit is contained in:
parent
a8c1397e5e
commit
f4eea5dfd5
@ -1,3 +1,4 @@
|
|||||||
|
import collections
|
||||||
import concurrent.futures
|
import concurrent.futures
|
||||||
import contextlib
|
import contextlib
|
||||||
import dataclasses
|
import dataclasses
|
||||||
@ -747,7 +748,7 @@ class DatahubGEProfiler:
|
|||||||
"great_expectations.dataset.sqlalchemy_dataset.SqlAlchemyDataset._get_column_quantiles_bigquery",
|
"great_expectations.dataset.sqlalchemy_dataset.SqlAlchemyDataset._get_column_quantiles_bigquery",
|
||||||
_get_column_quantiles_bigquery_patch,
|
_get_column_quantiles_bigquery_patch,
|
||||||
):
|
):
|
||||||
async_profiles = [
|
async_profiles = collections.deque(
|
||||||
async_executor.submit(
|
async_executor.submit(
|
||||||
self._generate_profile_from_request,
|
self._generate_profile_from_request,
|
||||||
query_combiner,
|
query_combiner,
|
||||||
@ -756,12 +757,13 @@ class DatahubGEProfiler:
|
|||||||
profiler_args=profiler_args,
|
profiler_args=profiler_args,
|
||||||
)
|
)
|
||||||
for request in requests
|
for request in requests
|
||||||
]
|
)
|
||||||
|
|
||||||
# Avoid using as_completed so that the results are yielded in the
|
# Avoid using as_completed so that the results are yielded in the
|
||||||
# same order as the requests.
|
# same order as the requests.
|
||||||
# for async_profile in concurrent.futures.as_completed(async_profiles):
|
# for async_profile in concurrent.futures.as_completed(async_profiles):
|
||||||
for async_profile in async_profiles:
|
while len(async_profiles) > 0:
|
||||||
|
async_profile = async_profiles.popleft()
|
||||||
yield async_profile.result()
|
yield async_profile.result()
|
||||||
|
|
||||||
total_time_taken = timer.elapsed_seconds()
|
total_time_taken = timer.elapsed_seconds()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user