Some qwen25 work

This commit is contained in:
Jake Poznanski 2025-05-14 20:59:22 +00:00
parent a85571c047
commit 725aa834fb
3 changed files with 39 additions and 22 deletions

View File

@ -22,6 +22,7 @@ async def run_server(
target_longest_image_dim: int = 1024, target_longest_image_dim: int = 1024,
prompt_template: Literal["full", "basic", "finetune"] = "finetune", prompt_template: Literal["full", "basic", "finetune"] = "finetune",
response_template: Literal["plain", "json"] = "json", response_template: Literal["plain", "json"] = "json",
prompt_image_first: bool = False,
) -> str: ) -> str:
""" """
Convert page of a PDF file to markdown by calling a request Convert page of a PDF file to markdown by calling a request
@ -48,20 +49,36 @@ async def run_server(
else: else:
raise ValueError("Unknown prompt template") raise ValueError("Unknown prompt template")
request = { if prompt_image_first:
"model": model, request = {
"messages": [ "model": model,
{ "messages": [
"role": "user", {
"content": [ "role": "user",
{"type": "text", "text": prompt}, "content": [
{"type": "image_url", "image_url": {"url": f"data:image/png;base64,{image_base64}"}}, {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{image_base64}"}},
], {"type": "text", "text": prompt},
} ],
], }
"temperature": temperature, ],
"max_tokens": 3000, "temperature": temperature,
} "max_tokens": 3000,
}
else:
request = {
"model": model,
"messages": [
{
"role": "user",
"content": [
{"type": "text", "text": prompt},
{"type": "image_url", "image_url": {"url": f"data:image/png;base64,{image_base64}"}},
],
}
],
"temperature": temperature,
"max_tokens": 3000,
}
# Make request and get response using httpx # Make request and get response using httpx
url = f"http://{server}/v1/chat/completions" url = f"http://{server}/v1/chat/completions"

View File

@ -138,8 +138,8 @@ async def build_page_query(local_pdf_path: str, page: int, target_longest_image_
{ {
"role": "user", "role": "user",
"content": [ "content": [
{"type": "text", "text": build_finetuning_prompt(anchor_text)},
{"type": "image_url", "image_url": {"url": f"data:image/png;base64,{image_base64}"}}, {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{image_base64}"}},
{"type": "text", "text": build_finetuning_prompt(anchor_text)},
], ],
} }
], ],
@ -500,7 +500,7 @@ async def worker(args, work_queue: WorkQueue, semaphore, worker_id):
async def sglang_server_task(model_name_or_path, args, semaphore): async def sglang_server_task(model_name_or_path, args, semaphore):
# Check GPU memory, lower mem devices need a bit less KV cache space because the VLM takes additional memory # Check GPU memory, lower mem devices need a bit less KV cache space because the VLM takes additional memory
gpu_memory = torch.cuda.get_device_properties(0).total_memory / (1024**3) # Convert to GB gpu_memory = torch.cuda.get_device_properties(0).total_memory / (1024**3) # Convert to GB
mem_fraction_arg = ["--mem-fraction-static", "0.80"] if gpu_memory < 60 else [] mem_fraction_arg = ["--mem-fraction-static", "0.70"] if gpu_memory < 60 else []
cmd = [ cmd = [
"python3", "python3",

View File

@ -98,7 +98,7 @@ def openai_response_format_schema() -> dict:
# This is a base prompt that will be used for training and running the fine tuned model # This is a base prompt that will be used for training and running the fine tuned model
# It's simplified from the prompt which was used to generate the silver data, and can change from dataset to dataset # It's simplified from the prompt which was used to generate the silver data, and can change from dataset to dataset
def build_finetuning_prompt_qwen2(base_text: str) -> str: def build_finetuning_prompt(base_text: str) -> str:
return ( return (
f"Below is the image of one page of a document, as well as some raw textual content that was previously extracted for it. " f"Below is the image of one page of a document, as well as some raw textual content that was previously extracted for it. "
f"Just return the plain text representation of this document as if you were reading it naturally.\n" f"Just return the plain text representation of this document as if you were reading it naturally.\n"
@ -107,11 +107,11 @@ def build_finetuning_prompt_qwen2(base_text: str) -> str:
) )
# This is the new fine tuning prompt we are trying for qwen2.5 vl # This is the new fine tuning prompt we are trying for qwen2.5 vl
def build_finetuning_prompt(base_text: str) -> str: # def build_finetuning_prompt(base_text: str) -> str:
return ( # return (
f"Below is the image of one page of a document. " # f"Below is the image of one page of a document. "
f"Just return the plain text representation of this document as if you were reading it naturally.\n" # f"Just return the plain text representation of this document as if you were reading it naturally.\n"
) # )
# Extracts the anchor text component from an existing prompt string # Extracts the anchor text component from an existing prompt string