mirror of
https://github.com/allenai/olmocr.git
synced 2025-08-19 14:22:26 +00:00
More realistic results
This commit is contained in:
parent
770da2b7ae
commit
ae1e4bc07e
@ -244,8 +244,8 @@ async def load_pdf_work_queue(args) -> asyncio.Queue:
|
|||||||
}
|
}
|
||||||
|
|
||||||
# Determine remaining work
|
# Determine remaining work
|
||||||
remaining_work_hashes = set(work_queue) - done_work_hashes
|
#remaining_work_hashes = set(work_queue) - done_work_hashes
|
||||||
#remaining_work_hashes = set(["0e779f21fbb75d38ed4242c7e5fe57fa9a636bac"]) # If you want to debug with a specific work hash
|
remaining_work_hashes = set(["0e779f21fbb75d38ed4242c7e5fe57fa9a636bac"]) # If you want to debug with a specific work hash
|
||||||
remaining_work_queue = {
|
remaining_work_queue = {
|
||||||
hash_: work_queue[hash_]
|
hash_: work_queue[hash_]
|
||||||
for hash_ in remaining_work_hashes
|
for hash_ in remaining_work_hashes
|
||||||
@ -443,7 +443,7 @@ async def worker(args, queue, semaphore, worker_id):
|
|||||||
else:
|
else:
|
||||||
logger.info(f"Proceeding with {work_hash} on worker {worker_id}")
|
logger.info(f"Proceeding with {work_hash} on worker {worker_id}")
|
||||||
|
|
||||||
async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=60),
|
async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=600),
|
||||||
connector=aiohttp.TCPConnector(limit=1000)) as session:
|
connector=aiohttp.TCPConnector(limit=1000)) as session:
|
||||||
async with asyncio.TaskGroup() as tg:
|
async with asyncio.TaskGroup() as tg:
|
||||||
dolma_tasks = [tg.create_task(process_pdf(args, session, worker_id, pdf)) for pdf in pdfs]
|
dolma_tasks = [tg.create_task(process_pdf(args, session, worker_id, pdf)) for pdf in pdfs]
|
||||||
|
@ -2,7 +2,7 @@ _MAJOR = "0"
|
|||||||
_MINOR = "1"
|
_MINOR = "1"
|
||||||
# On main and in a nightly release the patch should be one ahead of the last
|
# On main and in a nightly release the patch should be one ahead of the last
|
||||||
# released build.
|
# released build.
|
||||||
_PATCH = "16"
|
_PATCH = "17"
|
||||||
# This is mainly for nightly builds which have the suffix ".dev$DATE". See
|
# This is mainly for nightly builds which have the suffix ".dev$DATE". See
|
||||||
# https://semver.org/#is-v123-a-semantic-version for the semantics.
|
# https://semver.org/#is-v123-a-semantic-version for the semantics.
|
||||||
_SUFFIX = ""
|
_SUFFIX = ""
|
||||||
|
Loading…
x
Reference in New Issue
Block a user