fix(ingest): profiling - memory usage reduction (#5830)

This commit is contained in:
Shirshanka Das 2022-09-05 23:37:19 -07:00 committed by GitHub
parent a8c1397e5e
commit f4eea5dfd5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -1,3 +1,4 @@
import collections
import concurrent.futures
import contextlib
import dataclasses
@ -747,7 +748,7 @@ class DatahubGEProfiler:
"great_expectations.dataset.sqlalchemy_dataset.SqlAlchemyDataset._get_column_quantiles_bigquery",
_get_column_quantiles_bigquery_patch,
):
async_profiles = [
async_profiles = collections.deque(
async_executor.submit(
self._generate_profile_from_request,
query_combiner,
@ -756,12 +757,13 @@ class DatahubGEProfiler:
profiler_args=profiler_args,
)
for request in requests
]
)
# Avoid using as_completed so that the results are yielded in the
# same order as the requests.
# for async_profile in concurrent.futures.as_completed(async_profiles):
for async_profile in async_profiles:
while len(async_profiles) > 0:
async_profile = async_profiles.popleft()
yield async_profile.result()
total_time_taken = timer.elapsed_seconds()