mirror of
https://github.com/allenai/olmocr.git
synced 2025-11-12 16:39:40 +00:00
Fixing async stuff
This commit is contained in:
parent
3ca305d0b8
commit
d21a164bac
@ -62,7 +62,7 @@ def image_to_base64_data_url(image):
|
|||||||
return f"data:image/png;base64,{img_str}"
|
return f"data:image/png;base64,{img_str}"
|
||||||
|
|
||||||
|
|
||||||
def load_pdf_prompts(num_samples: int = 100, seed: int = 42, max_length: int = 2048) -> List[Dict[str, str]]:
|
async def load_pdf_prompts(num_samples: int = 100, seed: int = 42, max_length: int = 2048) -> List[Dict[str, str]]:
|
||||||
"""Load prompts and images from olmOCR-mix-0225-benchmarkset dataset with fixed random seed."""
|
"""Load prompts and images from olmOCR-mix-0225-benchmarkset dataset with fixed random seed."""
|
||||||
print(f"Loading olmOCR-mix-0225-benchmarkset dataset with {num_samples} samples and seed {seed}")
|
print(f"Loading olmOCR-mix-0225-benchmarkset dataset with {num_samples} samples and seed {seed}")
|
||||||
|
|
||||||
@ -101,12 +101,12 @@ def load_pdf_prompts(num_samples: int = 100, seed: int = 42, max_length: int = 2
|
|||||||
for pdf_path in sampled_pdfs:
|
for pdf_path in sampled_pdfs:
|
||||||
try:
|
try:
|
||||||
# Build page query for page 1 of each PDF
|
# Build page query for page 1 of each PDF
|
||||||
query = asyncio.run(build_page_query(
|
query = await build_page_query(
|
||||||
local_pdf_path=pdf_path,
|
local_pdf_path=pdf_path,
|
||||||
page=1,
|
page=1,
|
||||||
target_longest_image_dim=1280,
|
target_longest_image_dim=1280,
|
||||||
image_rotation=0
|
image_rotation=0
|
||||||
))
|
)
|
||||||
queries.append(query)
|
queries.append(query)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Error processing {os.path.basename(pdf_path)}: {e}")
|
print(f"Error processing {os.path.basename(pdf_path)}: {e}")
|
||||||
@ -316,7 +316,7 @@ async def async_main():
|
|||||||
model_path = await download_model(args.model)
|
model_path = await download_model(args.model)
|
||||||
|
|
||||||
# Load prompts and images
|
# Load prompts and images
|
||||||
samples = load_pdf_prompts(num_samples=args.num_prompts, seed=args.seed)
|
samples = await load_pdf_prompts(num_samples=args.num_prompts, seed=args.seed)
|
||||||
|
|
||||||
# Create vLLM engine
|
# Create vLLM engine
|
||||||
print("\n=== Creating vLLM Engine ===")
|
print("\n=== Creating vLLM Engine ===")
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user