mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-12-08 04:55:36 +00:00
Fix
This commit is contained in:
parent
de2b6ef718
commit
d2f62abe56
@ -87,7 +87,7 @@ def process_data_with_pdfminer(
|
|||||||
x2 * coef,
|
x2 * coef,
|
||||||
y2 * coef,
|
y2 * coef,
|
||||||
text=_text,
|
text=_text,
|
||||||
source="pdftext",
|
source=Source.PDFTEXT,
|
||||||
)
|
)
|
||||||
|
|
||||||
if text_region.bbox is not None and text_region.bbox.area > 0:
|
if text_region.bbox is not None and text_region.bbox.area > 0:
|
||||||
@ -104,7 +104,7 @@ def process_data_with_pdfminer(
|
|||||||
x2 * coef,
|
x2 * coef,
|
||||||
y2 * coef,
|
y2 * coef,
|
||||||
text=None,
|
text=None,
|
||||||
source="pdftext",
|
source=Source.PDFTEXT,
|
||||||
)
|
)
|
||||||
if image_region.bbox is not None and image_region.bbox.area > 0:
|
if image_region.bbox is not None and image_region.bbox.area > 0:
|
||||||
layout.append(image_region)
|
layout.append(image_region)
|
||||||
|
|||||||
@ -3,6 +3,7 @@ from enum import Enum
|
|||||||
|
|
||||||
|
|
||||||
class Source(Enum):
|
class Source(Enum):
|
||||||
|
PDFTEXT = "pdftext"
|
||||||
PDFMINER = "pdfminer"
|
PDFMINER = "pdfminer"
|
||||||
OCR_TESSERACT = "ocr_tesseract"
|
OCR_TESSERACT = "ocr_tesseract"
|
||||||
OCR_PADDLE = "ocr_paddle"
|
OCR_PADDLE = "ocr_paddle"
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user