mirror of
https://github.com/allenai/olmocr.git
synced 2025-06-27 04:00:02 +00:00
Some qwen25 work
This commit is contained in:
parent
a85571c047
commit
725aa834fb
@ -22,6 +22,7 @@ async def run_server(
|
|||||||
target_longest_image_dim: int = 1024,
|
target_longest_image_dim: int = 1024,
|
||||||
prompt_template: Literal["full", "basic", "finetune"] = "finetune",
|
prompt_template: Literal["full", "basic", "finetune"] = "finetune",
|
||||||
response_template: Literal["plain", "json"] = "json",
|
response_template: Literal["plain", "json"] = "json",
|
||||||
|
prompt_image_first: bool = False,
|
||||||
) -> str:
|
) -> str:
|
||||||
"""
|
"""
|
||||||
Convert page of a PDF file to markdown by calling a request
|
Convert page of a PDF file to markdown by calling a request
|
||||||
@ -48,20 +49,36 @@ async def run_server(
|
|||||||
else:
|
else:
|
||||||
raise ValueError("Unknown prompt template")
|
raise ValueError("Unknown prompt template")
|
||||||
|
|
||||||
request = {
|
if prompt_image_first:
|
||||||
"model": model,
|
request = {
|
||||||
"messages": [
|
"model": model,
|
||||||
{
|
"messages": [
|
||||||
"role": "user",
|
{
|
||||||
"content": [
|
"role": "user",
|
||||||
{"type": "text", "text": prompt},
|
"content": [
|
||||||
{"type": "image_url", "image_url": {"url": f"data:image/png;base64,{image_base64}"}},
|
{"type": "image_url", "image_url": {"url": f"data:image/png;base64,{image_base64}"}},
|
||||||
],
|
{"type": "text", "text": prompt},
|
||||||
}
|
],
|
||||||
],
|
}
|
||||||
"temperature": temperature,
|
],
|
||||||
"max_tokens": 3000,
|
"temperature": temperature,
|
||||||
}
|
"max_tokens": 3000,
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
request = {
|
||||||
|
"model": model,
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": [
|
||||||
|
{"type": "text", "text": prompt},
|
||||||
|
{"type": "image_url", "image_url": {"url": f"data:image/png;base64,{image_base64}"}},
|
||||||
|
],
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"temperature": temperature,
|
||||||
|
"max_tokens": 3000,
|
||||||
|
}
|
||||||
|
|
||||||
# Make request and get response using httpx
|
# Make request and get response using httpx
|
||||||
url = f"http://{server}/v1/chat/completions"
|
url = f"http://{server}/v1/chat/completions"
|
||||||
|
@ -138,8 +138,8 @@ async def build_page_query(local_pdf_path: str, page: int, target_longest_image_
|
|||||||
{
|
{
|
||||||
"role": "user",
|
"role": "user",
|
||||||
"content": [
|
"content": [
|
||||||
{"type": "text", "text": build_finetuning_prompt(anchor_text)},
|
|
||||||
{"type": "image_url", "image_url": {"url": f"data:image/png;base64,{image_base64}"}},
|
{"type": "image_url", "image_url": {"url": f"data:image/png;base64,{image_base64}"}},
|
||||||
|
{"type": "text", "text": build_finetuning_prompt(anchor_text)},
|
||||||
],
|
],
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@ -500,7 +500,7 @@ async def worker(args, work_queue: WorkQueue, semaphore, worker_id):
|
|||||||
async def sglang_server_task(model_name_or_path, args, semaphore):
|
async def sglang_server_task(model_name_or_path, args, semaphore):
|
||||||
# Check GPU memory, lower mem devices need a bit less KV cache space because the VLM takes additional memory
|
# Check GPU memory, lower mem devices need a bit less KV cache space because the VLM takes additional memory
|
||||||
gpu_memory = torch.cuda.get_device_properties(0).total_memory / (1024**3) # Convert to GB
|
gpu_memory = torch.cuda.get_device_properties(0).total_memory / (1024**3) # Convert to GB
|
||||||
mem_fraction_arg = ["--mem-fraction-static", "0.80"] if gpu_memory < 60 else []
|
mem_fraction_arg = ["--mem-fraction-static", "0.70"] if gpu_memory < 60 else []
|
||||||
|
|
||||||
cmd = [
|
cmd = [
|
||||||
"python3",
|
"python3",
|
||||||
|
@ -98,7 +98,7 @@ def openai_response_format_schema() -> dict:
|
|||||||
|
|
||||||
# This is a base prompt that will be used for training and running the fine tuned model
|
# This is a base prompt that will be used for training and running the fine tuned model
|
||||||
# It's simplified from the prompt which was used to generate the silver data, and can change from dataset to dataset
|
# It's simplified from the prompt which was used to generate the silver data, and can change from dataset to dataset
|
||||||
def build_finetuning_prompt_qwen2(base_text: str) -> str:
|
def build_finetuning_prompt(base_text: str) -> str:
|
||||||
return (
|
return (
|
||||||
f"Below is the image of one page of a document, as well as some raw textual content that was previously extracted for it. "
|
f"Below is the image of one page of a document, as well as some raw textual content that was previously extracted for it. "
|
||||||
f"Just return the plain text representation of this document as if you were reading it naturally.\n"
|
f"Just return the plain text representation of this document as if you were reading it naturally.\n"
|
||||||
@ -107,11 +107,11 @@ def build_finetuning_prompt_qwen2(base_text: str) -> str:
|
|||||||
)
|
)
|
||||||
|
|
||||||
# This is the new fine tuning prompt we are trying for qwen2.5 vl
|
# This is the new fine tuning prompt we are trying for qwen2.5 vl
|
||||||
def build_finetuning_prompt(base_text: str) -> str:
|
# def build_finetuning_prompt(base_text: str) -> str:
|
||||||
return (
|
# return (
|
||||||
f"Below is the image of one page of a document. "
|
# f"Below is the image of one page of a document. "
|
||||||
f"Just return the plain text representation of this document as if you were reading it naturally.\n"
|
# f"Just return the plain text representation of this document as if you were reading it naturally.\n"
|
||||||
)
|
# )
|
||||||
|
|
||||||
|
|
||||||
# Extracts the anchor text component from an existing prompt string
|
# Extracts the anchor text component from an existing prompt string
|
||||||
|
Loading…
x
Reference in New Issue
Block a user