This commit is contained in:
Jake Poznanski 2024-11-21 16:56:20 +00:00
commit 219cc7eca8

View File

@ -326,7 +326,7 @@ async def worker(args, work_queue: S3WorkQueue, semaphore, worker_id):
await tracker.clear_work(worker_id) await tracker.clear_work(worker_id)
try: try:
async with httpx.AsyncClient(timeout=600, limits=httpx.Limits(max_connections=1000)) as session: async with httpx.AsyncClient(timeout=600, limits=httpx.Limits(max_keepalive_connections=0, max_connections=1000)) as session:
async with asyncio.TaskGroup() as tg: async with asyncio.TaskGroup() as tg:
dolma_tasks = [tg.create_task(process_pdf(args, session, worker_id, pdf)) for pdf in work_item.s3_work_paths] dolma_tasks = [tg.create_task(process_pdf(args, session, worker_id, pdf)) for pdf in work_item.s3_work_paths]
logger.info(f"Created all tasks for {work_item.hash}") logger.info(f"Created all tasks for {work_item.hash}")