mirror of
				https://github.com/allenai/olmocr.git
				synced 2025-10-31 18:15:44 +00:00 
			
		
		
		
	Fix for anchor generation on pdfs with no text elements
This commit is contained in:
		
							parent
							
								
									af03358c47
								
							
						
					
					
						commit
						aea3f7f1fe
					
				| @ -302,11 +302,12 @@ def _linearize_pdf_report(report: PageReport, max_length: int = 4000) -> str: | |||||||
| 
 | 
 | ||||||
|     if report.text_elements: |     if report.text_elements: | ||||||
|         text_elements = [e for e in report.text_elements if len(e.text.strip()) > 0] |         text_elements = [e for e in report.text_elements if len(e.text.strip()) > 0] | ||||||
|         min_x_text = min(text_elements, key=lambda e: e.x) |         if text_elements: | ||||||
|         max_x_text = max(text_elements, key=lambda e: e.x) |             min_x_text = min(text_elements, key=lambda e: e.x) | ||||||
|         min_y_text = min(text_elements, key=lambda e: e.y) |             max_x_text = max(text_elements, key=lambda e: e.x) | ||||||
|         max_y_text = max(text_elements, key=lambda e: e.y) |             min_y_text = min(text_elements, key=lambda e: e.y) | ||||||
|         edge_elements.update([min_x_text, max_x_text, min_y_text, max_y_text]) |             max_y_text = max(text_elements, key=lambda e: e.y) | ||||||
|  |             edge_elements.update([min_x_text, max_x_text, min_y_text, max_y_text]) | ||||||
| 
 | 
 | ||||||
|     # Keep track of element IDs to prevent duplication |     # Keep track of element IDs to prevent duplication | ||||||
|     selected_element_ids = set() |     selected_element_ids = set() | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 Jake Poznanski
						Jake Poznanski