mirror of
https://github.com/datahub-project/datahub.git
synced 2025-07-04 07:34:44 +00:00
22 lines
626 B
Python
22 lines
626 B
Python
import os
|
|
from typing import Iterable, Tuple
|
|
|
|
import psutil
|
|
|
|
from datahub.ingestion.api.workunit import MetadataWorkUnit
|
|
|
|
|
|
def workunit_sink(workunits: Iterable[MetadataWorkUnit]) -> Tuple[int, int]:
|
|
peak_memory_usage = psutil.Process(os.getpid()).memory_info().rss
|
|
i: int = 0
|
|
for i, _wu in enumerate(workunits):
|
|
if i % 10_000 == 0:
|
|
peak_memory_usage = max(
|
|
peak_memory_usage, psutil.Process(os.getpid()).memory_info().rss
|
|
)
|
|
peak_memory_usage = max(
|
|
peak_memory_usage, psutil.Process(os.getpid()).memory_info().rss
|
|
)
|
|
|
|
return i, peak_memory_usage
|