From aea3f7f1fe8dc80226df589991888a9dbb6709db Mon Sep 17 00:00:00 2001 From: Jake Poznanski Date: Fri, 11 Oct 2024 15:01:01 +0000 Subject: [PATCH] Fix for anchor generation on pdfs with no text elements --- pdelfin/prompts/anchor.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/pdelfin/prompts/anchor.py b/pdelfin/prompts/anchor.py index 46d8964..36b91a6 100644 --- a/pdelfin/prompts/anchor.py +++ b/pdelfin/prompts/anchor.py @@ -302,11 +302,12 @@ def _linearize_pdf_report(report: PageReport, max_length: int = 4000) -> str: if report.text_elements: text_elements = [e for e in report.text_elements if len(e.text.strip()) > 0] - min_x_text = min(text_elements, key=lambda e: e.x) - max_x_text = max(text_elements, key=lambda e: e.x) - min_y_text = min(text_elements, key=lambda e: e.y) - max_y_text = max(text_elements, key=lambda e: e.y) - edge_elements.update([min_x_text, max_x_text, min_y_text, max_y_text]) + if text_elements: + min_x_text = min(text_elements, key=lambda e: e.x) + max_x_text = max(text_elements, key=lambda e: e.x) + min_y_text = min(text_elements, key=lambda e: e.y) + max_y_text = max(text_elements, key=lambda e: e.y) + edge_elements.update([min_x_text, max_x_text, min_y_text, max_y_text]) # Keep track of element IDs to prevent duplication selected_element_ids = set()