mirror of
https://github.com/allenai/olmocr.git
synced 2025-06-27 04:00:02 +00:00
Lints
This commit is contained in:
parent
6fcd26d66a
commit
e489b28421
@ -1,10 +1,10 @@
|
||||
import os
|
||||
import tempfile
|
||||
|
||||
from marker.config.parser import ConfigParser
|
||||
from marker.converters.pdf import PdfConverter
|
||||
from marker.models import create_model_dict
|
||||
from marker.output import text_from_rendered
|
||||
from marker.config.parser import ConfigParser
|
||||
from pypdf import PdfReader, PdfWriter
|
||||
|
||||
_marker_converter = None
|
||||
@ -17,7 +17,7 @@ def run_marker(pdf_path: str, page_num: int = 1) -> str:
|
||||
# Create a configuration dictionary with the necessary settings
|
||||
config = {
|
||||
"force_ocr": True, # This enables conversion of inline math to LaTeX
|
||||
"use_llm": False, # We would prefer to run just plain marker for reporting bench results, not hybrid mode
|
||||
"use_llm": False, # We would prefer to run just plain marker for reporting bench results, not hybrid mode
|
||||
"disable_tqdm": True, # Disable tqdm for cleaner output
|
||||
"recognition_batch_size": 256,
|
||||
"layout_batch_size": 48,
|
||||
|
@ -75,7 +75,7 @@ class MetricsKeeper:
|
||||
def get_total_metrics(self):
|
||||
"""
|
||||
Returns the total cumulative metrics since the MetricsKeeper was created.
|
||||
|
||||
|
||||
Returns:
|
||||
dict: Dictionary of metric names to their total values.
|
||||
"""
|
||||
@ -84,24 +84,20 @@ class MetricsKeeper:
|
||||
def get_metrics_summary(self):
|
||||
"""
|
||||
Returns a summary of metrics including totals and rates.
|
||||
|
||||
|
||||
Returns:
|
||||
dict: Dictionary containing total metrics and overall rates.
|
||||
"""
|
||||
current_time = time.time()
|
||||
elapsed_time = current_time - self.start_time
|
||||
|
||||
summary = {
|
||||
"elapsed_time_seconds": elapsed_time,
|
||||
"total_metrics": dict(self.total_metrics),
|
||||
"rates": {}
|
||||
}
|
||||
|
||||
|
||||
summary = {"elapsed_time_seconds": elapsed_time, "total_metrics": dict(self.total_metrics), "rates": {}}
|
||||
|
||||
# Calculate rates for each metric
|
||||
if elapsed_time > 0:
|
||||
for key, value in self.total_metrics.items():
|
||||
summary["rates"][f"{key}_per_sec"] = value / elapsed_time
|
||||
|
||||
|
||||
return summary
|
||||
|
||||
|
||||
|
@ -574,10 +574,13 @@ async def vllm_server_task(model_name_or_path, args, semaphore):
|
||||
"vllm",
|
||||
"serve",
|
||||
model_name_or_path,
|
||||
"--port", str(BASE_SERVER_PORT),
|
||||
"--port",
|
||||
str(BASE_SERVER_PORT),
|
||||
"--disable-log-requests",
|
||||
"--uvicorn-log-level", "warning",
|
||||
"--served-model-name", "Qwen/Qwen2-VL-7B-Instruct",
|
||||
"--uvicorn-log-level",
|
||||
"warning",
|
||||
"--served-model-name",
|
||||
"Qwen/Qwen2-VL-7B-Instruct",
|
||||
]
|
||||
cmd.extend(mem_fraction_arg)
|
||||
|
||||
@ -615,11 +618,11 @@ async def vllm_server_task(model_name_or_path, args, semaphore):
|
||||
server_printed_ready_message = True
|
||||
last_semaphore_release = time.time()
|
||||
|
||||
match = re.search(r'Running: (\d+)', line)
|
||||
match = re.search(r"Running: (\d+)", line)
|
||||
if match:
|
||||
last_running_req = int(match.group(1))
|
||||
|
||||
match = re.search(r'Waiting: (\d+)', line)
|
||||
match = re.search(r"Waiting: (\d+)", line)
|
||||
if match:
|
||||
last_queue_req = int(match.group(1))
|
||||
logger.info(f"vllm running req: {last_running_req} queue req: {last_queue_req}")
|
||||
@ -675,7 +678,9 @@ async def vllm_server_host(model_name_or_path, args, semaphore):
|
||||
if retry >= MAX_RETRIES:
|
||||
logger.error(f"Ended up starting the vllm server more than {retry} times, cancelling pipeline")
|
||||
logger.error("")
|
||||
logger.error("Please make sure vllm is installed according to the latest instructions here: https://docs.vllm.ai/en/stable/getting_started/installation/gpu.html")
|
||||
logger.error(
|
||||
"Please make sure vllm is installed according to the latest instructions here: https://docs.vllm.ai/en/stable/getting_started/installation/gpu.html"
|
||||
)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
@ -1140,7 +1145,7 @@ async def main():
|
||||
return
|
||||
|
||||
# If you get this far, then you are doing inference and need a GPU
|
||||
#check_sglang_version()
|
||||
# check_sglang_version()
|
||||
check_torch_gpu_available()
|
||||
|
||||
logger.info(f"Starting pipeline with PID {os.getpid()}")
|
||||
@ -1180,30 +1185,30 @@ async def main():
|
||||
|
||||
vllm_server.cancel()
|
||||
metrics_task.cancel()
|
||||
|
||||
|
||||
# Output final metrics summary
|
||||
metrics_summary = metrics.get_metrics_summary()
|
||||
logger.info("=" * 80)
|
||||
logger.info("FINAL METRICS SUMMARY")
|
||||
logger.info("=" * 80)
|
||||
logger.info(f"Total elapsed time: {metrics_summary['elapsed_time_seconds']:.2f} seconds")
|
||||
|
||||
|
||||
# Output token counts and rates
|
||||
total_metrics = metrics_summary['total_metrics']
|
||||
rates = metrics_summary['rates']
|
||||
|
||||
total_metrics = metrics_summary["total_metrics"]
|
||||
rates = metrics_summary["rates"]
|
||||
|
||||
logger.info(f"Total Server Input tokens: {total_metrics.get('server_input_tokens', 0):,}")
|
||||
logger.info(f"Total Server Output tokens: {total_metrics.get('server_output_tokens', 0):,}")
|
||||
|
||||
|
||||
logger.info(f"Finished input tokens: {total_metrics.get('finished_input_tokens', 0):,}")
|
||||
logger.info(f"Finished output tokens: {total_metrics.get('finished_output_tokens', 0):,}")
|
||||
|
||||
|
||||
# Output rates
|
||||
if 'server_input_tokens_per_sec' in rates:
|
||||
if "server_input_tokens_per_sec" in rates:
|
||||
logger.info(f"Input tokens/sec rate: {rates['server_input_tokens_per_sec']:.2f}")
|
||||
if 'server_output_tokens_per_sec' in rates:
|
||||
if "server_output_tokens_per_sec" in rates:
|
||||
logger.info(f"Output tokens/sec rate: {rates['server_output_tokens_per_sec']:.2f}")
|
||||
|
||||
|
||||
logger.info("=" * 80)
|
||||
logger.info("Work done")
|
||||
|
||||
|
@ -4,8 +4,7 @@ from transformers import AutoTokenizer, Qwen2VLForConditionalGeneration
|
||||
|
||||
MODEL_ID = "/home/ubuntu/olmocr/olmOCR-7B-0225-preview"
|
||||
|
||||
model = Qwen2VLForConditionalGeneration.from_pretrained(
|
||||
MODEL_ID, device_map="auto", torch_dtype="auto")
|
||||
model = Qwen2VLForConditionalGeneration.from_pretrained(MODEL_ID, device_map="auto", torch_dtype="auto")
|
||||
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
|
||||
|
||||
from llmcompressor import oneshot
|
||||
@ -28,4 +27,4 @@ oneshot(model=model, recipe=recipe)
|
||||
# Save the model.
|
||||
SAVE_DIR = MODEL_ID.split("/")[1] + "-FP8-Dynamic-Recipe"
|
||||
model.save_pretrained(SAVE_DIR)
|
||||
tokenizer.save_pretrained(SAVE_DIR)
|
||||
tokenizer.save_pretrained(SAVE_DIR)
|
||||
|
Loading…
x
Reference in New Issue
Block a user