This commit is contained in:
Marek Połom 2024-05-08 17:43:24 +02:00
parent de2b6ef718
commit d2f62abe56
2 changed files with 3 additions and 2 deletions

View File

@ -87,7 +87,7 @@ def process_data_with_pdfminer(
x2 * coef,
y2 * coef,
text=_text,
source="pdftext",
source=Source.PDFTEXT,
)
if text_region.bbox is not None and text_region.bbox.area > 0:
@ -104,7 +104,7 @@ def process_data_with_pdfminer(
x2 * coef,
y2 * coef,
text=None,
source="pdftext",
source=Source.PDFTEXT,
)
if image_region.bbox is not None and image_region.bbox.area > 0:
layout.append(image_region)

View File

@ -3,6 +3,7 @@ from enum import Enum
class Source(Enum):
PDFTEXT = "pdftext"
PDFMINER = "pdfminer"
OCR_TESSERACT = "ocr_tesseract"
OCR_PADDLE = "ocr_paddle"