mirror of
https://github.com/allenai/olmocr.git
synced 2025-11-01 18:43:45 +00:00
Fixing async stuff
This commit is contained in:
parent
3ca305d0b8
commit
d21a164bac
@ -62,7 +62,7 @@ def image_to_base64_data_url(image):
|
||||
return f"data:image/png;base64,{img_str}"
|
||||
|
||||
|
||||
def load_pdf_prompts(num_samples: int = 100, seed: int = 42, max_length: int = 2048) -> List[Dict[str, str]]:
|
||||
async def load_pdf_prompts(num_samples: int = 100, seed: int = 42, max_length: int = 2048) -> List[Dict[str, str]]:
|
||||
"""Load prompts and images from olmOCR-mix-0225-benchmarkset dataset with fixed random seed."""
|
||||
print(f"Loading olmOCR-mix-0225-benchmarkset dataset with {num_samples} samples and seed {seed}")
|
||||
|
||||
@ -101,12 +101,12 @@ def load_pdf_prompts(num_samples: int = 100, seed: int = 42, max_length: int = 2
|
||||
for pdf_path in sampled_pdfs:
|
||||
try:
|
||||
# Build page query for page 1 of each PDF
|
||||
query = asyncio.run(build_page_query(
|
||||
query = await build_page_query(
|
||||
local_pdf_path=pdf_path,
|
||||
page=1,
|
||||
target_longest_image_dim=1280,
|
||||
image_rotation=0
|
||||
))
|
||||
)
|
||||
queries.append(query)
|
||||
except Exception as e:
|
||||
print(f"Error processing {os.path.basename(pdf_path)}: {e}")
|
||||
@ -316,7 +316,7 @@ async def async_main():
|
||||
model_path = await download_model(args.model)
|
||||
|
||||
# Load prompts and images
|
||||
samples = load_pdf_prompts(num_samples=args.num_prompts, seed=args.seed)
|
||||
samples = await load_pdf_prompts(num_samples=args.num_prompts, seed=args.seed)
|
||||
|
||||
# Create vLLM engine
|
||||
print("\n=== Creating vLLM Engine ===")
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user