Some qwen25 work

This commit is contained in:
Jake Poznanski 2025-05-14 20:59:22 +00:00
parent a85571c047
commit 725aa834fb
3 changed files with 39 additions and 22 deletions

View File

@ -22,6 +22,7 @@ async def run_server(
target_longest_image_dim: int = 1024,
prompt_template: Literal["full", "basic", "finetune"] = "finetune",
response_template: Literal["plain", "json"] = "json",
prompt_image_first: bool = False,
) -> str:
"""
Convert page of a PDF file to markdown by calling a request
@ -48,20 +49,36 @@ async def run_server(
else:
raise ValueError("Unknown prompt template")
request = {
"model": model,
"messages": [
{
"role": "user",
"content": [
{"type": "text", "text": prompt},
{"type": "image_url", "image_url": {"url": f"data:image/png;base64,{image_base64}"}},
],
}
],
"temperature": temperature,
"max_tokens": 3000,
}
if prompt_image_first:
request = {
"model": model,
"messages": [
{
"role": "user",
"content": [
{"type": "image_url", "image_url": {"url": f"data:image/png;base64,{image_base64}"}},
{"type": "text", "text": prompt},
],
}
],
"temperature": temperature,
"max_tokens": 3000,
}
else:
request = {
"model": model,
"messages": [
{
"role": "user",
"content": [
{"type": "text", "text": prompt},
{"type": "image_url", "image_url": {"url": f"data:image/png;base64,{image_base64}"}},
],
}
],
"temperature": temperature,
"max_tokens": 3000,
}
# Make request and get response using httpx
url = f"http://{server}/v1/chat/completions"

View File

@ -138,8 +138,8 @@ async def build_page_query(local_pdf_path: str, page: int, target_longest_image_
{
"role": "user",
"content": [
{"type": "text", "text": build_finetuning_prompt(anchor_text)},
{"type": "image_url", "image_url": {"url": f"data:image/png;base64,{image_base64}"}},
{"type": "text", "text": build_finetuning_prompt(anchor_text)},
],
}
],
@ -500,7 +500,7 @@ async def worker(args, work_queue: WorkQueue, semaphore, worker_id):
async def sglang_server_task(model_name_or_path, args, semaphore):
# Check GPU memory, lower mem devices need a bit less KV cache space because the VLM takes additional memory
gpu_memory = torch.cuda.get_device_properties(0).total_memory / (1024**3) # Convert to GB
mem_fraction_arg = ["--mem-fraction-static", "0.80"] if gpu_memory < 60 else []
mem_fraction_arg = ["--mem-fraction-static", "0.70"] if gpu_memory < 60 else []
cmd = [
"python3",

View File

@ -98,7 +98,7 @@ def openai_response_format_schema() -> dict:
# This is a base prompt that will be used for training and running the fine tuned model
# It's simplified from the prompt which was used to generate the silver data, and can change from dataset to dataset
def build_finetuning_prompt_qwen2(base_text: str) -> str:
def build_finetuning_prompt(base_text: str) -> str:
return (
f"Below is the image of one page of a document, as well as some raw textual content that was previously extracted for it. "
f"Just return the plain text representation of this document as if you were reading it naturally.\n"
@ -107,11 +107,11 @@ def build_finetuning_prompt_qwen2(base_text: str) -> str:
)
# This is the new fine tuning prompt we are trying for qwen2.5 vl
def build_finetuning_prompt(base_text: str) -> str:
return (
f"Below is the image of one page of a document. "
f"Just return the plain text representation of this document as if you were reading it naturally.\n"
)
# def build_finetuning_prompt(base_text: str) -> str:
# return (
# f"Below is the image of one page of a document. "
# f"Just return the plain text representation of this document as if you were reading it naturally.\n"
# )
# Extracts the anchor text component from an existing prompt string