mirror of
https://github.com/allenai/olmocr.git
synced 2025-08-23 08:12:28 +00:00
A few items
This commit is contained in:
parent
4f2f4fda7d
commit
691cc5a13c
@ -64,8 +64,8 @@ class PageResult:
|
|||||||
page_num: int
|
page_num: int
|
||||||
response: PageResponse
|
response: PageResponse
|
||||||
|
|
||||||
total_input_tokens: int
|
input_tokens: int
|
||||||
total_output_tokens: int
|
output_tokens: int
|
||||||
|
|
||||||
|
|
||||||
async def build_page_query(local_pdf_path: str, page: int, target_longest_image_dim: int, target_anchor_text_len: int, image_rotation: int=0) -> dict:
|
async def build_page_query(local_pdf_path: str, page: int, target_longest_image_dim: int, target_anchor_text_len: int, image_rotation: int=0) -> dict:
|
||||||
@ -247,8 +247,8 @@ async def process_page(args, session: aiohttp.ClientSession, pdf_s3_path: str, p
|
|||||||
pdf_s3_path,
|
pdf_s3_path,
|
||||||
page_num,
|
page_num,
|
||||||
page_response,
|
page_response,
|
||||||
total_input_tokens=base_response_data["usage"].get("prompt_tokens", 0),
|
input_tokens=base_response_data["usage"].get("prompt_tokens", 0),
|
||||||
total_output_tokens=base_response_data["usage"].get("completion_tokens", 0)
|
output_tokens=base_response_data["usage"].get("completion_tokens", 0)
|
||||||
)
|
)
|
||||||
except aiohttp.ClientError as e:
|
except aiohttp.ClientError as e:
|
||||||
logger.warning(f"Client error on attempt {attempt} for {pdf_s3_path}-{page_num}:: {e}")
|
logger.warning(f"Client error on attempt {attempt} for {pdf_s3_path}-{page_num}:: {e}")
|
||||||
@ -312,8 +312,8 @@ async def process_pdf(args, pdf_s3_path: str):
|
|||||||
metadata = {
|
metadata = {
|
||||||
"Source-File": pdf_s3_path,
|
"Source-File": pdf_s3_path,
|
||||||
"pdf-total-pages": num_pages,
|
"pdf-total-pages": num_pages,
|
||||||
"total-input-tokens": sum(page.total_input_tokens for page in page_results),
|
"total-input-tokens": sum(page.input_tokens for page in page_results),
|
||||||
"total-output-tokens": sum(page.total_output_tokens for page in page_results)
|
"total-output-tokens": sum(page.output_tokens for page in page_results)
|
||||||
}
|
}
|
||||||
|
|
||||||
id_ = hashlib.sha1(document_text.encode()).hexdigest()
|
id_ = hashlib.sha1(document_text.encode()).hexdigest()
|
||||||
@ -411,11 +411,10 @@ async def sglang_server_task(args, semaphore):
|
|||||||
last_queue_req = None # To track transitions
|
last_queue_req = None # To track transitions
|
||||||
async def process_line(line):
|
async def process_line(line):
|
||||||
# Parse the line and update semaphore if necessary
|
# Parse the line and update semaphore if necessary
|
||||||
match = re.search(r'#running-req: (\d+), #queue-req: (\d+)', line)
|
match = re.search(r'#queue-req: (\d+)', line)
|
||||||
if match:
|
if match:
|
||||||
logger.info(line)
|
logger.info(line)
|
||||||
running_req = int(match.group(1))
|
queue_req = int(match.group(1))
|
||||||
queue_req = int(match.group(2))
|
|
||||||
|
|
||||||
nonlocal last_queue_req
|
nonlocal last_queue_req
|
||||||
if last_queue_req is not None and last_queue_req != 0 and queue_req == 0:
|
if last_queue_req is not None and last_queue_req != 0 and queue_req == 0:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user