diff --git a/pdelfin/beakerpipeline.py b/pdelfin/beakerpipeline.py index a624bd1..e7e6e51 100644 --- a/pdelfin/beakerpipeline.py +++ b/pdelfin/beakerpipeline.py @@ -244,8 +244,8 @@ async def load_pdf_work_queue(args) -> asyncio.Queue: } # Determine remaining work - remaining_work_hashes = set(work_queue) - done_work_hashes - #remaining_work_hashes = set(["0e779f21fbb75d38ed4242c7e5fe57fa9a636bac"]) # If you want to debug with a specific work hash + #remaining_work_hashes = set(work_queue) - done_work_hashes + remaining_work_hashes = set(["0e779f21fbb75d38ed4242c7e5fe57fa9a636bac"]) # If you want to debug with a specific work hash remaining_work_queue = { hash_: work_queue[hash_] for hash_ in remaining_work_hashes @@ -443,7 +443,7 @@ async def worker(args, queue, semaphore, worker_id): else: logger.info(f"Proceeding with {work_hash} on worker {worker_id}") - async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=60), + async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=600), connector=aiohttp.TCPConnector(limit=1000)) as session: async with asyncio.TaskGroup() as tg: dolma_tasks = [tg.create_task(process_pdf(args, session, worker_id, pdf)) for pdf in pdfs] diff --git a/pdelfin/version.py b/pdelfin/version.py index 9be8574..4776749 100644 --- a/pdelfin/version.py +++ b/pdelfin/version.py @@ -2,7 +2,7 @@ _MAJOR = "0" _MINOR = "1" # On main and in a nightly release the patch should be one ahead of the last # released build. -_PATCH = "16" +_PATCH = "17" # This is mainly for nightly builds which have the suffix ".dev$DATE". See # https://semver.org/#is-v123-a-semantic-version for the semantics. _SUFFIX = ""