More realistic results

This commit is contained in:
Jake Poznanski 2024-11-15 11:35:10 -08:00
parent 770da2b7ae
commit ae1e4bc07e
2 changed files with 4 additions and 4 deletions

View File

@ -244,8 +244,8 @@ async def load_pdf_work_queue(args) -> asyncio.Queue:
}
# Determine remaining work
remaining_work_hashes = set(work_queue) - done_work_hashes
#remaining_work_hashes = set(["0e779f21fbb75d38ed4242c7e5fe57fa9a636bac"]) # If you want to debug with a specific work hash
#remaining_work_hashes = set(work_queue) - done_work_hashes
remaining_work_hashes = set(["0e779f21fbb75d38ed4242c7e5fe57fa9a636bac"]) # If you want to debug with a specific work hash
remaining_work_queue = {
hash_: work_queue[hash_]
for hash_ in remaining_work_hashes
@ -443,7 +443,7 @@ async def worker(args, queue, semaphore, worker_id):
else:
logger.info(f"Proceeding with {work_hash} on worker {worker_id}")
async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=60),
async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=600),
connector=aiohttp.TCPConnector(limit=1000)) as session:
async with asyncio.TaskGroup() as tg:
dolma_tasks = [tg.create_task(process_pdf(args, session, worker_id, pdf)) for pdf in pdfs]

View File

@ -2,7 +2,7 @@ _MAJOR = "0"
_MINOR = "1"
# On main and in a nightly release the patch should be one ahead of the last
# released build.
_PATCH = "16"
_PATCH = "17"
# This is mainly for nightly builds which have the suffix ".dev$DATE". See
# https://semver.org/#is-v123-a-semantic-version for the semantics.
_SUFFIX = ""