This commit is contained in:
Jake Poznanski 2025-06-17 15:58:16 +00:00
parent 6fcd26d66a
commit e489b28421
4 changed files with 32 additions and 32 deletions

View File

@ -1,10 +1,10 @@
import os
import tempfile
from marker.config.parser import ConfigParser
from marker.converters.pdf import PdfConverter
from marker.models import create_model_dict
from marker.output import text_from_rendered
from marker.config.parser import ConfigParser
from pypdf import PdfReader, PdfWriter
_marker_converter = None

View File

@ -91,11 +91,7 @@ class MetricsKeeper:
current_time = time.time()
elapsed_time = current_time - self.start_time
summary = {
"elapsed_time_seconds": elapsed_time,
"total_metrics": dict(self.total_metrics),
"rates": {}
}
summary = {"elapsed_time_seconds": elapsed_time, "total_metrics": dict(self.total_metrics), "rates": {}}
# Calculate rates for each metric
if elapsed_time > 0:

View File

@ -574,10 +574,13 @@ async def vllm_server_task(model_name_or_path, args, semaphore):
"vllm",
"serve",
model_name_or_path,
"--port", str(BASE_SERVER_PORT),
"--port",
str(BASE_SERVER_PORT),
"--disable-log-requests",
"--uvicorn-log-level", "warning",
"--served-model-name", "Qwen/Qwen2-VL-7B-Instruct",
"--uvicorn-log-level",
"warning",
"--served-model-name",
"Qwen/Qwen2-VL-7B-Instruct",
]
cmd.extend(mem_fraction_arg)
@ -615,11 +618,11 @@ async def vllm_server_task(model_name_or_path, args, semaphore):
server_printed_ready_message = True
last_semaphore_release = time.time()
match = re.search(r'Running: (\d+)', line)
match = re.search(r"Running: (\d+)", line)
if match:
last_running_req = int(match.group(1))
match = re.search(r'Waiting: (\d+)', line)
match = re.search(r"Waiting: (\d+)", line)
if match:
last_queue_req = int(match.group(1))
logger.info(f"vllm running req: {last_running_req} queue req: {last_queue_req}")
@ -675,7 +678,9 @@ async def vllm_server_host(model_name_or_path, args, semaphore):
if retry >= MAX_RETRIES:
logger.error(f"Ended up starting the vllm server more than {retry} times, cancelling pipeline")
logger.error("")
logger.error("Please make sure vllm is installed according to the latest instructions here: https://docs.vllm.ai/en/stable/getting_started/installation/gpu.html")
logger.error(
"Please make sure vllm is installed according to the latest instructions here: https://docs.vllm.ai/en/stable/getting_started/installation/gpu.html"
)
sys.exit(1)
@ -1140,7 +1145,7 @@ async def main():
return
# If you get this far, then you are doing inference and need a GPU
#check_sglang_version()
# check_sglang_version()
check_torch_gpu_available()
logger.info(f"Starting pipeline with PID {os.getpid()}")
@ -1189,8 +1194,8 @@ async def main():
logger.info(f"Total elapsed time: {metrics_summary['elapsed_time_seconds']:.2f} seconds")
# Output token counts and rates
total_metrics = metrics_summary['total_metrics']
rates = metrics_summary['rates']
total_metrics = metrics_summary["total_metrics"]
rates = metrics_summary["rates"]
logger.info(f"Total Server Input tokens: {total_metrics.get('server_input_tokens', 0):,}")
logger.info(f"Total Server Output tokens: {total_metrics.get('server_output_tokens', 0):,}")
@ -1199,9 +1204,9 @@ async def main():
logger.info(f"Finished output tokens: {total_metrics.get('finished_output_tokens', 0):,}")
# Output rates
if 'server_input_tokens_per_sec' in rates:
if "server_input_tokens_per_sec" in rates:
logger.info(f"Input tokens/sec rate: {rates['server_input_tokens_per_sec']:.2f}")
if 'server_output_tokens_per_sec' in rates:
if "server_output_tokens_per_sec" in rates:
logger.info(f"Output tokens/sec rate: {rates['server_output_tokens_per_sec']:.2f}")
logger.info("=" * 80)

View File

@ -4,8 +4,7 @@ from transformers import AutoTokenizer, Qwen2VLForConditionalGeneration
MODEL_ID = "/home/ubuntu/olmocr/olmOCR-7B-0225-preview"
model = Qwen2VLForConditionalGeneration.from_pretrained(
MODEL_ID, device_map="auto", torch_dtype="auto")
model = Qwen2VLForConditionalGeneration.from_pretrained(MODEL_ID, device_map="auto", torch_dtype="auto")
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
from llmcompressor import oneshot