mirror of
https://github.com/allenai/olmocr.git
synced 2025-06-27 04:00:02 +00:00
Some qwen25 work
This commit is contained in:
parent
a85571c047
commit
725aa834fb
@ -22,6 +22,7 @@ async def run_server(
|
||||
target_longest_image_dim: int = 1024,
|
||||
prompt_template: Literal["full", "basic", "finetune"] = "finetune",
|
||||
response_template: Literal["plain", "json"] = "json",
|
||||
prompt_image_first: bool = False,
|
||||
) -> str:
|
||||
"""
|
||||
Convert page of a PDF file to markdown by calling a request
|
||||
@ -48,20 +49,36 @@ async def run_server(
|
||||
else:
|
||||
raise ValueError("Unknown prompt template")
|
||||
|
||||
request = {
|
||||
"model": model,
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "text", "text": prompt},
|
||||
{"type": "image_url", "image_url": {"url": f"data:image/png;base64,{image_base64}"}},
|
||||
],
|
||||
}
|
||||
],
|
||||
"temperature": temperature,
|
||||
"max_tokens": 3000,
|
||||
}
|
||||
if prompt_image_first:
|
||||
request = {
|
||||
"model": model,
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "image_url", "image_url": {"url": f"data:image/png;base64,{image_base64}"}},
|
||||
{"type": "text", "text": prompt},
|
||||
],
|
||||
}
|
||||
],
|
||||
"temperature": temperature,
|
||||
"max_tokens": 3000,
|
||||
}
|
||||
else:
|
||||
request = {
|
||||
"model": model,
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "text", "text": prompt},
|
||||
{"type": "image_url", "image_url": {"url": f"data:image/png;base64,{image_base64}"}},
|
||||
],
|
||||
}
|
||||
],
|
||||
"temperature": temperature,
|
||||
"max_tokens": 3000,
|
||||
}
|
||||
|
||||
# Make request and get response using httpx
|
||||
url = f"http://{server}/v1/chat/completions"
|
||||
|
@ -138,8 +138,8 @@ async def build_page_query(local_pdf_path: str, page: int, target_longest_image_
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "text", "text": build_finetuning_prompt(anchor_text)},
|
||||
{"type": "image_url", "image_url": {"url": f"data:image/png;base64,{image_base64}"}},
|
||||
{"type": "text", "text": build_finetuning_prompt(anchor_text)},
|
||||
],
|
||||
}
|
||||
],
|
||||
@ -500,7 +500,7 @@ async def worker(args, work_queue: WorkQueue, semaphore, worker_id):
|
||||
async def sglang_server_task(model_name_or_path, args, semaphore):
|
||||
# Check GPU memory, lower mem devices need a bit less KV cache space because the VLM takes additional memory
|
||||
gpu_memory = torch.cuda.get_device_properties(0).total_memory / (1024**3) # Convert to GB
|
||||
mem_fraction_arg = ["--mem-fraction-static", "0.80"] if gpu_memory < 60 else []
|
||||
mem_fraction_arg = ["--mem-fraction-static", "0.70"] if gpu_memory < 60 else []
|
||||
|
||||
cmd = [
|
||||
"python3",
|
||||
|
@ -98,7 +98,7 @@ def openai_response_format_schema() -> dict:
|
||||
|
||||
# This is a base prompt that will be used for training and running the fine tuned model
|
||||
# It's simplified from the prompt which was used to generate the silver data, and can change from dataset to dataset
|
||||
def build_finetuning_prompt_qwen2(base_text: str) -> str:
|
||||
def build_finetuning_prompt(base_text: str) -> str:
|
||||
return (
|
||||
f"Below is the image of one page of a document, as well as some raw textual content that was previously extracted for it. "
|
||||
f"Just return the plain text representation of this document as if you were reading it naturally.\n"
|
||||
@ -107,11 +107,11 @@ def build_finetuning_prompt_qwen2(base_text: str) -> str:
|
||||
)
|
||||
|
||||
# This is the new fine tuning prompt we are trying for qwen2.5 vl
|
||||
def build_finetuning_prompt(base_text: str) -> str:
|
||||
return (
|
||||
f"Below is the image of one page of a document. "
|
||||
f"Just return the plain text representation of this document as if you were reading it naturally.\n"
|
||||
)
|
||||
# def build_finetuning_prompt(base_text: str) -> str:
|
||||
# return (
|
||||
# f"Below is the image of one page of a document. "
|
||||
# f"Just return the plain text representation of this document as if you were reading it naturally.\n"
|
||||
# )
|
||||
|
||||
|
||||
# Extracts the anchor text component from an existing prompt string
|
||||
|
Loading…
x
Reference in New Issue
Block a user