mirror of
https://github.com/allenai/olmocr.git
synced 2025-09-27 01:10:31 +00:00
Better tracking of completion_errors
This commit is contained in:
parent
4ef14ec813
commit
9eb252f8f6
@ -422,9 +422,21 @@ def process_jsonl_content(inference_s3_path: str) -> List[DatabaseManager.BatchI
|
|||||||
data = json.loads(line_str)
|
data = json.loads(line_str)
|
||||||
pdf_s3_path, page_num = parse_custom_id(data["custom_id"])
|
pdf_s3_path, page_num = parse_custom_id(data["custom_id"])
|
||||||
|
|
||||||
|
if data.get("completion_error", None) is not None:
|
||||||
|
index_entries.append(DatabaseManager.BatchInferenceRecord(
|
||||||
|
inference_s3_path=inference_s3_path,
|
||||||
|
pdf_s3_path=pdf_s3_path,
|
||||||
|
page_num=page_num,
|
||||||
|
round=data["round"],
|
||||||
|
start_index=start_index, # Byte offset in the original file
|
||||||
|
length=line_length, # Length in bytes
|
||||||
|
finish_reason="completion_error",
|
||||||
|
error=data.get("completion_error", None)
|
||||||
|
))
|
||||||
|
else:
|
||||||
|
# Try to parse the actual model response JSON
|
||||||
assert "outputs" in data and len(data["outputs"]) > 0, "No outputs from model detected"
|
assert "outputs" in data and len(data["outputs"]) > 0, "No outputs from model detected"
|
||||||
|
|
||||||
# Try to parse the actual model response JSON
|
|
||||||
try:
|
try:
|
||||||
model_response_json = json.loads(data["outputs"][0]["text"])
|
model_response_json = json.loads(data["outputs"][0]["text"])
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user