mirror of
https://github.com/allenai/olmocr.git
synced 2025-08-19 14:22:26 +00:00
Fixing work queue population
This commit is contained in:
parent
827b77e8df
commit
b67d8e7555
@ -194,7 +194,7 @@ async def populate_pdf_work_queue(args):
|
|||||||
|
|
||||||
async def load_pdf_work_queue(args) -> asyncio.Queue:
|
async def load_pdf_work_queue(args) -> asyncio.Queue:
|
||||||
index_file_s3_path = os.path.join(args.workspace, "pdf_index_list.csv.zstd")
|
index_file_s3_path = os.path.join(args.workspace, "pdf_index_list.csv.zstd")
|
||||||
output_glob = f"{args.workspace}/dolma_documents/output_*.jsonl"
|
output_glob = os.path.join(args.workspace, "dolma_documents", "*.jsonl")
|
||||||
|
|
||||||
# Define the two blocking I/O operations
|
# Define the two blocking I/O operations
|
||||||
download_task = asyncio.to_thread(download_zstd_csv, workspace_s3, index_file_s3_path)
|
download_task = asyncio.to_thread(download_zstd_csv, workspace_s3, index_file_s3_path)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user