mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-09-01 04:43:59 +00:00
Refactor: support layout analysis (#2273)
### Summary This PR is the second part of the "layout analysis" refactor to move it from unstructured-inference repo to unstructured repo, the first part is done in https://github.com/Unstructured-IO/unstructured-inference/pull/305. This PR adds logic to support annotating `inferred` and `extracted` elements. ### Testing ``` PYTHONPATH=. python examples/layout-analysis/visualization.py <file_path> <strategy> <document_type> ``` e.g. ``` PYTHONPATH=. python examples/layout-analysis/visualization.py example-docs/layout-parser-paper-fast.pdf hi_res pdf ```
This commit is contained in:
parent
09f86f28fb
commit
096d23bc28
@ -1,7 +1,9 @@
|
|||||||
## 0.11.6-dev1
|
## 0.11.6-dev2
|
||||||
|
|
||||||
### Enhancements
|
### Enhancements
|
||||||
|
|
||||||
|
* **Update the layout analysis script.** The previous script only supported annotating `final` elements. The updated script also supports annotating `inferred` and `extracted` elements.
|
||||||
|
|
||||||
### Features
|
### Features
|
||||||
|
|
||||||
### Fixes
|
### Fixes
|
||||||
|
@ -3,7 +3,8 @@ import pathlib
|
|||||||
import sys
|
import sys
|
||||||
|
|
||||||
import pdf2image
|
import pdf2image
|
||||||
from unstructured_inference.inference.elements import Rectangle
|
from PIL import Image
|
||||||
|
from unstructured_inference.inference.elements import TextRegion
|
||||||
from unstructured_inference.visualize import draw_bbox
|
from unstructured_inference.visualize import draw_bbox
|
||||||
|
|
||||||
from unstructured.documents.elements import PageBreak
|
from unstructured.documents.elements import PageBreak
|
||||||
@ -29,11 +30,14 @@ def extract_element_coordinates(elements):
|
|||||||
return elements_coordinates
|
return elements_coordinates
|
||||||
|
|
||||||
|
|
||||||
def run_partition_pdf(f_path, strategy, images, output_dir):
|
def run_partition_pdf(f_path, strategy, images, output_dir, output_f_basename, is_image):
|
||||||
elements = partition_pdf(
|
elements = partition_pdf(
|
||||||
f_path,
|
f_path,
|
||||||
strategy=strategy,
|
strategy=strategy,
|
||||||
|
is_image=is_image,
|
||||||
include_page_breaks=True,
|
include_page_breaks=True,
|
||||||
|
analysis=True,
|
||||||
|
analyzed_image_output_dir_path=output_dir,
|
||||||
)
|
)
|
||||||
|
|
||||||
elements_coordinates = extract_element_coordinates(elements)
|
elements_coordinates = extract_element_coordinates(elements)
|
||||||
@ -44,22 +48,28 @@ def run_partition_pdf(f_path, strategy, images, output_dir):
|
|||||||
points = coordinate.points
|
points = coordinate.points
|
||||||
x1, y1 = points[0]
|
x1, y1 = points[0]
|
||||||
x2, y2 = points[2]
|
x2, y2 = points[2]
|
||||||
rect = Rectangle(x1, y1, x2, y2)
|
el = TextRegion.from_coords(x1, y1, x2, y2)
|
||||||
img = draw_bbox(img, rect, color="red")
|
img = draw_bbox(img, el, color="red")
|
||||||
|
|
||||||
output_image_path = os.path.join(output_dir, f"{strategy}-{idx + 1}.jpg")
|
output_image_path = os.path.join(output_dir, f"{output_f_basename}_{idx + 1}_final.jpg")
|
||||||
|
img.save(output_image_path)
|
||||||
print(f"output_image_path: {output_image_path}")
|
print(f"output_image_path: {output_image_path}")
|
||||||
|
|
||||||
img.save(output_image_path)
|
|
||||||
|
|
||||||
|
def run(f_path, strategy, document_type):
|
||||||
def run(f_path, strategy):
|
|
||||||
f_basename = os.path.splitext(os.path.basename(f_path))[0]
|
f_basename = os.path.splitext(os.path.basename(f_path))[0]
|
||||||
output_dir_path = os.path.join(output_basedir_path, f_basename)
|
output_dir_path = os.path.join(output_basedir_path, f_basename)
|
||||||
os.makedirs(output_dir_path, exist_ok=True)
|
os.makedirs(output_dir_path, exist_ok=True)
|
||||||
|
|
||||||
images = pdf2image.convert_from_path(f_path)
|
is_image = document_type == "image"
|
||||||
run_partition_pdf(f_path, strategy, images, output_dir_path)
|
if is_image:
|
||||||
|
with Image.open(f_path) as img:
|
||||||
|
img = img.convert("RGB")
|
||||||
|
images = [img]
|
||||||
|
else:
|
||||||
|
images = pdf2image.convert_from_path(f_path)
|
||||||
|
|
||||||
|
run_partition_pdf(f_path, strategy, images, output_dir_path, f_basename, is_image)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
@ -74,7 +84,11 @@ if __name__ == "__main__":
|
|||||||
print("Invalid strategy")
|
print("Invalid strategy")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
|
if sys.argv[3] not in ["pdf", "image"]:
|
||||||
|
print("Invalid document type")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
output_basedir_path = os.path.join(CUR_DIR, "output")
|
output_basedir_path = os.path.join(CUR_DIR, "output")
|
||||||
os.makedirs(output_basedir_path, exist_ok=True)
|
os.makedirs(output_basedir_path, exist_ok=True)
|
||||||
|
|
||||||
run(f_path=sys.argv[1], strategy=sys.argv[2])
|
run(f_path=sys.argv[1], strategy=sys.argv[2], document_type=sys.argv[3])
|
||||||
|
@ -1 +1 @@
|
|||||||
__version__ = "0.11.6-dev1" # pragma: no cover
|
__version__ = "0.11.6-dev2" # pragma: no cover
|
||||||
|
@ -74,9 +74,13 @@ from unstructured.partition.lang import (
|
|||||||
prepare_languages_for_tesseract,
|
prepare_languages_for_tesseract,
|
||||||
)
|
)
|
||||||
from unstructured.partition.pdf_image.pdf_image_utils import (
|
from unstructured.partition.pdf_image.pdf_image_utils import (
|
||||||
|
annotate_layout_elements,
|
||||||
check_element_types_to_extract,
|
check_element_types_to_extract,
|
||||||
save_elements,
|
save_elements,
|
||||||
)
|
)
|
||||||
|
from unstructured.partition.pdf_image.pdfminer_processing import (
|
||||||
|
merge_inferred_with_extracted_layout,
|
||||||
|
)
|
||||||
from unstructured.partition.pdf_image.pdfminer_utils import (
|
from unstructured.partition.pdf_image.pdfminer_utils import (
|
||||||
open_pdfminer_pages_generator,
|
open_pdfminer_pages_generator,
|
||||||
rect_to_bbox,
|
rect_to_bbox,
|
||||||
@ -247,6 +251,8 @@ def _partition_pdf_or_image_local(
|
|||||||
extract_element_types: Optional[List[str]] = None,
|
extract_element_types: Optional[List[str]] = None,
|
||||||
image_output_dir_path: Optional[str] = None,
|
image_output_dir_path: Optional[str] = None,
|
||||||
pdf_image_dpi: Optional[int] = None,
|
pdf_image_dpi: Optional[int] = None,
|
||||||
|
analysis: bool = False,
|
||||||
|
analyzed_image_output_dir_path: Optional[str] = None,
|
||||||
**kwargs,
|
**kwargs,
|
||||||
) -> List[Element]:
|
) -> List[Element]:
|
||||||
"""Partition using package installed locally"""
|
"""Partition using package installed locally"""
|
||||||
@ -286,14 +292,27 @@ def _partition_pdf_or_image_local(
|
|||||||
pdf_image_dpi=pdf_image_dpi,
|
pdf_image_dpi=pdf_image_dpi,
|
||||||
)
|
)
|
||||||
|
|
||||||
if pdf_text_extractable is True:
|
extracted_layout = (
|
||||||
# NOTE(christine): merged_document_layout = extracted_layout + inferred_layout
|
process_file_with_pdfminer(filename=filename, dpi=pdf_image_dpi)
|
||||||
merged_document_layout = process_file_with_pdfminer(
|
if pdf_text_extractable
|
||||||
inferred_document_layout,
|
else []
|
||||||
filename,
|
)
|
||||||
|
|
||||||
|
if analysis:
|
||||||
|
annotate_layout_elements(
|
||||||
|
inferred_document_layout=inferred_document_layout,
|
||||||
|
extracted_layout=extracted_layout,
|
||||||
|
filename=filename,
|
||||||
|
output_dir_path=analyzed_image_output_dir_path,
|
||||||
|
pdf_image_dpi=pdf_image_dpi,
|
||||||
|
is_image=is_image,
|
||||||
)
|
)
|
||||||
else:
|
|
||||||
merged_document_layout = inferred_document_layout
|
# NOTE(christine): merged_document_layout = extracted_layout + inferred_layout
|
||||||
|
merged_document_layout = merge_inferred_with_extracted_layout(
|
||||||
|
inferred_document_layout=inferred_document_layout,
|
||||||
|
extracted_layout=extracted_layout,
|
||||||
|
)
|
||||||
|
|
||||||
if model_name.startswith("chipper"):
|
if model_name.startswith("chipper"):
|
||||||
# NOTE(alan): We shouldn't do OCR with chipper
|
# NOTE(alan): We shouldn't do OCR with chipper
|
||||||
@ -317,14 +336,16 @@ def _partition_pdf_or_image_local(
|
|||||||
)
|
)
|
||||||
if hasattr(file, "seek"):
|
if hasattr(file, "seek"):
|
||||||
file.seek(0)
|
file.seek(0)
|
||||||
if pdf_text_extractable is True:
|
|
||||||
# NOTE(christine): merged_document_layout = extracted_layout + inferred_layout
|
extracted_layout = (
|
||||||
merged_document_layout = process_data_with_pdfminer(
|
process_data_with_pdfminer(file=file, dpi=pdf_image_dpi) if pdf_text_extractable else []
|
||||||
inferred_document_layout,
|
)
|
||||||
file,
|
|
||||||
)
|
# NOTE(christine): merged_document_layout = extracted_layout + inferred_layout
|
||||||
else:
|
merged_document_layout = merge_inferred_with_extracted_layout(
|
||||||
merged_document_layout = inferred_document_layout
|
inferred_document_layout=inferred_document_layout,
|
||||||
|
extracted_layout=extracted_layout,
|
||||||
|
)
|
||||||
|
|
||||||
if model_name.startswith("chipper"):
|
if model_name.startswith("chipper"):
|
||||||
# NOTE(alan): We shouldn't do OCR with chipper
|
# NOTE(alan): We shouldn't do OCR with chipper
|
||||||
@ -655,7 +676,7 @@ def _process_pdfminer_pages(
|
|||||||
urls_metadata.append(map_bbox_and_index(words, annot))
|
urls_metadata.append(map_bbox_and_index(words, annot))
|
||||||
|
|
||||||
if hasattr(obj, "get_text"):
|
if hasattr(obj, "get_text"):
|
||||||
_text_snippets: List[str | Any] = [obj.get_text()] # type: ignore
|
_text_snippets: List = [obj.get_text()]
|
||||||
else:
|
else:
|
||||||
_text = _extract_text(obj)
|
_text = _extract_text(obj)
|
||||||
_text_snippets = re.split(PARAGRAPH_PATTERN, _text)
|
_text_snippets = re.split(PARAGRAPH_PATTERN, _text)
|
||||||
|
@ -13,6 +13,8 @@ from unstructured.logger import logger
|
|||||||
from unstructured.partition.common import convert_to_bytes
|
from unstructured.partition.common import convert_to_bytes
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
|
from unstructured_inference.inference.layout import DocumentLayout, PageLayout, TextRegion
|
||||||
|
|
||||||
from unstructured.documents.elements import Element
|
from unstructured.documents.elements import Element
|
||||||
|
|
||||||
|
|
||||||
@ -159,3 +161,118 @@ def valid_text(text: str) -> bool:
|
|||||||
if not text:
|
if not text:
|
||||||
return False
|
return False
|
||||||
return "(cid:" not in text
|
return "(cid:" not in text
|
||||||
|
|
||||||
|
|
||||||
|
def annotate_layout_elements_with_image(
|
||||||
|
inferred_page_layout: "PageLayout",
|
||||||
|
extracted_page_layout: Optional["PageLayout"],
|
||||||
|
output_dir_path: str,
|
||||||
|
output_f_basename: str,
|
||||||
|
page_number: int,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Annotates a page image with both inferred and extracted layout elements.
|
||||||
|
|
||||||
|
This function takes the layout elements of a single page, either extracted from or inferred
|
||||||
|
for the document, and annotates them on the page image. It creates two separate annotated
|
||||||
|
images, one for each set of layout elements: 'inferred' and 'extracted'.
|
||||||
|
These annotated images are saved to a specified directory.
|
||||||
|
"""
|
||||||
|
|
||||||
|
layout_map = {"inferred": {"layout": inferred_page_layout, "color": "blue"}}
|
||||||
|
if extracted_page_layout:
|
||||||
|
layout_map["extracted"] = {"layout": extracted_page_layout, "color": "green"}
|
||||||
|
|
||||||
|
for label, layout_data in layout_map.items():
|
||||||
|
page_layout = layout_data.get("layout")
|
||||||
|
color = layout_data.get("color")
|
||||||
|
|
||||||
|
img = page_layout.annotate(colors=color)
|
||||||
|
output_f_path = os.path.join(
|
||||||
|
output_dir_path, f"{output_f_basename}_{page_number}_{label}.jpg"
|
||||||
|
)
|
||||||
|
write_image(img, output_f_path)
|
||||||
|
print(f"output_image_path: {output_f_path}")
|
||||||
|
|
||||||
|
|
||||||
|
def annotate_layout_elements(
|
||||||
|
inferred_document_layout: "DocumentLayout",
|
||||||
|
extracted_layout: List["TextRegion"],
|
||||||
|
filename: str,
|
||||||
|
output_dir_path: str,
|
||||||
|
pdf_image_dpi: int,
|
||||||
|
is_image: bool = False,
|
||||||
|
) -> None:
|
||||||
|
"""
|
||||||
|
Annotates layout elements on images extracted from a PDF or an image file.
|
||||||
|
|
||||||
|
This function processes a given document (PDF or image) and annotates layout elements based
|
||||||
|
on the inferred and extracted layout information.
|
||||||
|
It handles both PDF documents and standalone image files. For PDFs, it converts each page
|
||||||
|
into an image, whereas for image files, it processes the single image.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from unstructured_inference.inference.layout import PageLayout
|
||||||
|
|
||||||
|
output_f_basename = os.path.splitext(os.path.basename(filename))[0]
|
||||||
|
images = []
|
||||||
|
try:
|
||||||
|
if is_image:
|
||||||
|
with Image.open(filename) as img:
|
||||||
|
img = img.convert("RGB")
|
||||||
|
images.append(img)
|
||||||
|
|
||||||
|
extracted_page_layout = None
|
||||||
|
if extracted_layout:
|
||||||
|
extracted_page_layout = PageLayout(
|
||||||
|
number=1,
|
||||||
|
image=img,
|
||||||
|
)
|
||||||
|
extracted_page_layout.elements = extracted_layout[0]
|
||||||
|
|
||||||
|
inferred_page_layout = inferred_document_layout.pages[0]
|
||||||
|
inferred_page_layout.image = img
|
||||||
|
|
||||||
|
annotate_layout_elements_with_image(
|
||||||
|
inferred_page_layout=inferred_document_layout.pages[0],
|
||||||
|
extracted_page_layout=extracted_page_layout,
|
||||||
|
output_dir_path=output_dir_path,
|
||||||
|
output_f_basename=output_f_basename,
|
||||||
|
page_number=1,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
with tempfile.TemporaryDirectory() as temp_dir:
|
||||||
|
_image_paths = pdf2image.convert_from_path(
|
||||||
|
filename,
|
||||||
|
dpi=pdf_image_dpi,
|
||||||
|
output_folder=temp_dir,
|
||||||
|
paths_only=True,
|
||||||
|
)
|
||||||
|
image_paths = cast(List[str], _image_paths)
|
||||||
|
for i, image_path in enumerate(image_paths):
|
||||||
|
with Image.open(image_path) as img:
|
||||||
|
page_number = i + 1
|
||||||
|
|
||||||
|
extracted_page_layout = None
|
||||||
|
if extracted_layout:
|
||||||
|
extracted_page_layout = PageLayout(
|
||||||
|
number=page_number,
|
||||||
|
image=img,
|
||||||
|
)
|
||||||
|
extracted_page_layout.elements = extracted_layout[i]
|
||||||
|
|
||||||
|
inferred_page_layout = inferred_document_layout.pages[i]
|
||||||
|
inferred_page_layout.image = img
|
||||||
|
|
||||||
|
annotate_layout_elements_with_image(
|
||||||
|
inferred_page_layout=inferred_document_layout.pages[i],
|
||||||
|
extracted_page_layout=extracted_page_layout,
|
||||||
|
output_dir_path=output_dir_path,
|
||||||
|
output_f_basename=output_f_basename,
|
||||||
|
page_number=page_number,
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
if os.path.isdir(filename) or os.path.isfile(filename):
|
||||||
|
raise e
|
||||||
|
else:
|
||||||
|
raise FileNotFoundError(f'File "{filename}" not found!') from e
|
||||||
|
@ -7,7 +7,7 @@ from unstructured_inference.inference.elements import (
|
|||||||
TextRegion,
|
TextRegion,
|
||||||
)
|
)
|
||||||
from unstructured_inference.inference.layoutelement import (
|
from unstructured_inference.inference.layoutelement import (
|
||||||
merge_inferred_layout_with_extracted_layout,
|
merge_inferred_layout_with_extracted_layout as merge_inferred_with_extracted_page,
|
||||||
)
|
)
|
||||||
from unstructured_inference.inference.ordering import order_layout
|
from unstructured_inference.inference.ordering import order_layout
|
||||||
from unstructured_inference.models.detectron2onnx import UnstructuredDetectronONNXModel
|
from unstructured_inference.models.detectron2onnx import UnstructuredDetectronONNXModel
|
||||||
@ -25,62 +25,20 @@ if TYPE_CHECKING:
|
|||||||
|
|
||||||
|
|
||||||
def process_file_with_pdfminer(
|
def process_file_with_pdfminer(
|
||||||
inferred_document_layout: "DocumentLayout",
|
|
||||||
filename: str = "",
|
filename: str = "",
|
||||||
) -> "DocumentLayout":
|
dpi: int = 200,
|
||||||
|
) -> List[List[TextRegion]]:
|
||||||
with open_filename(filename, "rb") as fp:
|
with open_filename(filename, "rb") as fp:
|
||||||
fp = cast(BinaryIO, fp)
|
fp = cast(BinaryIO, fp)
|
||||||
inferred_document_layout = process_data_with_pdfminer(
|
extracted_layout = process_data_with_pdfminer(
|
||||||
inferred_document_layout=inferred_document_layout,
|
|
||||||
file=fp,
|
file=fp,
|
||||||
|
dpi=dpi,
|
||||||
)
|
)
|
||||||
return inferred_document_layout
|
return extracted_layout
|
||||||
|
|
||||||
|
|
||||||
def process_data_with_pdfminer(
|
def process_data_with_pdfminer(
|
||||||
inferred_document_layout: "DocumentLayout",
|
|
||||||
file: Optional[Union[bytes, BinaryIO]] = None,
|
file: Optional[Union[bytes, BinaryIO]] = None,
|
||||||
) -> "DocumentLayout":
|
|
||||||
"""Process document data using PDFMiner to extract layout information."""
|
|
||||||
|
|
||||||
extracted_layouts = get_regions_by_pdfminer(file)
|
|
||||||
|
|
||||||
inferred_pages = inferred_document_layout.pages
|
|
||||||
for i, (inferred_page, extracted_layout) in enumerate(zip(inferred_pages, extracted_layouts)):
|
|
||||||
inferred_layout = inferred_page.elements
|
|
||||||
image_metadata = inferred_page.image_metadata
|
|
||||||
w = image_metadata.get("width")
|
|
||||||
h = image_metadata.get("height")
|
|
||||||
image_size = (w, h)
|
|
||||||
|
|
||||||
threshold_kwargs = {}
|
|
||||||
# NOTE(Benjamin): With this the thresholds are only changed for detextron2_mask_rcnn
|
|
||||||
# In other case the default values for the functions are used
|
|
||||||
if (
|
|
||||||
isinstance(inferred_page.detection_model, UnstructuredDetectronONNXModel)
|
|
||||||
and "R_50" not in inferred_page.detection_model.model_path
|
|
||||||
):
|
|
||||||
threshold_kwargs = {"same_region_threshold": 0.5, "subregion_threshold": 0.5}
|
|
||||||
|
|
||||||
merged_layout = merge_inferred_layout_with_extracted_layout(
|
|
||||||
inferred_layout=inferred_layout,
|
|
||||||
extracted_layout=extracted_layout,
|
|
||||||
page_image_size=image_size,
|
|
||||||
**threshold_kwargs,
|
|
||||||
)
|
|
||||||
|
|
||||||
elements = inferred_page.get_elements_from_layout(
|
|
||||||
layout=cast(List[TextRegion], merged_layout),
|
|
||||||
pdf_objects=extracted_layout,
|
|
||||||
)
|
|
||||||
|
|
||||||
inferred_page.elements[:] = elements
|
|
||||||
|
|
||||||
return inferred_document_layout
|
|
||||||
|
|
||||||
|
|
||||||
def get_regions_by_pdfminer(
|
|
||||||
fp: Optional[Union[bytes, BinaryIO]],
|
|
||||||
dpi: int = 200,
|
dpi: int = 200,
|
||||||
) -> List[List[TextRegion]]:
|
) -> List[List[TextRegion]]:
|
||||||
"""Loads the image and word objects from a pdf using pdfplumber and the image renderings of the
|
"""Loads the image and word objects from a pdf using pdfplumber and the image renderings of the
|
||||||
@ -89,7 +47,7 @@ def get_regions_by_pdfminer(
|
|||||||
layouts = []
|
layouts = []
|
||||||
# Coefficient to rescale bounding box to be compatible with images
|
# Coefficient to rescale bounding box to be compatible with images
|
||||||
coef = dpi / 72
|
coef = dpi / 72
|
||||||
for page, page_layout in open_pdfminer_pages_generator(fp):
|
for page, page_layout in open_pdfminer_pages_generator(file):
|
||||||
height = page_layout.height
|
height = page_layout.height
|
||||||
|
|
||||||
layout: List["TextRegion"] = []
|
layout: List["TextRegion"] = []
|
||||||
@ -129,3 +87,43 @@ def get_regions_by_pdfminer(
|
|||||||
layouts.append(layout)
|
layouts.append(layout)
|
||||||
|
|
||||||
return layouts
|
return layouts
|
||||||
|
|
||||||
|
|
||||||
|
def merge_inferred_with_extracted_layout(
|
||||||
|
inferred_document_layout: "DocumentLayout",
|
||||||
|
extracted_layout: List[List[TextRegion]],
|
||||||
|
) -> "DocumentLayout":
|
||||||
|
inferred_pages = inferred_document_layout.pages
|
||||||
|
for i, (inferred_page, extracted_page_layout) in enumerate(
|
||||||
|
zip(inferred_pages, extracted_layout)
|
||||||
|
):
|
||||||
|
inferred_layout = inferred_page.elements
|
||||||
|
image_metadata = inferred_page.image_metadata
|
||||||
|
w = image_metadata.get("width")
|
||||||
|
h = image_metadata.get("height")
|
||||||
|
image_size = (w, h)
|
||||||
|
|
||||||
|
threshold_kwargs = {}
|
||||||
|
# NOTE(Benjamin): With this the thresholds are only changed for detextron2_mask_rcnn
|
||||||
|
# In other case the default values for the functions are used
|
||||||
|
if (
|
||||||
|
isinstance(inferred_page.detection_model, UnstructuredDetectronONNXModel)
|
||||||
|
and "R_50" not in inferred_page.detection_model.model_path
|
||||||
|
):
|
||||||
|
threshold_kwargs = {"same_region_threshold": 0.5, "subregion_threshold": 0.5}
|
||||||
|
|
||||||
|
merged_layout = merge_inferred_with_extracted_page(
|
||||||
|
inferred_layout=inferred_layout,
|
||||||
|
extracted_layout=extracted_page_layout,
|
||||||
|
page_image_size=image_size,
|
||||||
|
**threshold_kwargs,
|
||||||
|
)
|
||||||
|
|
||||||
|
elements = inferred_page.get_elements_from_layout(
|
||||||
|
layout=cast(List[TextRegion], merged_layout),
|
||||||
|
pdf_objects=extracted_page_layout,
|
||||||
|
)
|
||||||
|
|
||||||
|
inferred_page.elements[:] = elements
|
||||||
|
|
||||||
|
return inferred_document_layout
|
||||||
|
Loading…
x
Reference in New Issue
Block a user