Refactor: support image extraction (#2201)

### Summary This PR is the second part of the "image extraction" refactor to move it from unstructured-inference repo to unstructured repo, the first part is done in https://github.com/Unstructured-IO/unstructured-inference/pull/299. This PR adds logic to support extracting images. ### Testing `git clone -b refactor/remove_image_extraction_code --single-branch https://github.com/Unstructured-IO/unstructured-inference.git && cd unstructured-inference && pip install -e . && cd ../` ``` elements = partition_pdf( filename="example-docs/embedded-images.pdf", strategy="hi_res", extract_images_in_pdf=True, ) print("\n\n".join([str(el) for el in elements])) ```
2025-11-03 11:34:07 +00:00 · 2023-12-05 10:22:29 -08:00 · 2023-12-05 10:22:29 -08:00 · ed76b11b1a
commit ed76b11b1a
parent c5cb216ac8
24 changed files with 334 additions and 91 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -2,6 +2,7 @@

 ### Enhancements

+* **Refactor image extraction code.** The image extraction code is moved from `unstructured-inference` to `unstructured`. 
 * **Refactor pdfminer code.** The pdfminer code is moved from `unstructured-inference` to `unstructured`.

 ### Features
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@ -36,7 +36,7 @@ idna==3.6
    #   requests
 imagesize==1.4.1
    # via sphinx
-importlib-metadata==6.9.0
+importlib-metadata==7.0.0
    # via sphinx
 jinja2==3.1.2
    # via
--- a/requirements/build.txt
+++ b/requirements/build.txt
@ -36,7 +36,7 @@ idna==3.6
    #   requests
 imagesize==1.4.1
    # via sphinx
-importlib-metadata==6.9.0
+importlib-metadata==7.0.0
    # via sphinx
 jinja2==3.1.2
    # via
--- a/requirements/dev.txt
+++ b/requirements/dev.txt
@ -91,7 +91,7 @@ idna==3.6
    #   anyio
    #   jsonschema
    #   requests
-importlib-metadata==6.9.0
+importlib-metadata==7.0.0
    # via
    #   build
    #   jupyter-client
@ -167,7 +167,7 @@ jupyter-events==0.9.0
    # via jupyter-server
 jupyter-lsp==2.2.1
    # via jupyterlab
-jupyter-server==2.11.1
+jupyter-server==2.11.2
    # via
    #   jupyter-lsp
    #   jupyterlab
@ -198,7 +198,7 @@ mistune==3.0.2
    # via nbconvert
 nbclient==0.9.0
    # via nbconvert
-nbconvert==7.11.0
+nbconvert==7.12.0
    # via
    #   jupyter
    #   jupyter-server
@ -290,7 +290,7 @@ pyyaml==6.0.1
    #   -c test.txt
    #   jupyter-events
    #   pre-commit
-pyzmq==25.1.1
+pyzmq==25.1.2
    # via
    #   ipykernel
    #   jupyter-client
@ -405,7 +405,7 @@ webencodings==0.5.1
    # via
    #   bleach
    #   tinycss2
-websocket-client==1.6.4
+websocket-client==1.7.0
    # via jupyter-server
 wheel==0.42.0
    # via
--- a/requirements/extra-markdown.txt
+++ b/requirements/extra-markdown.txt
@ -4,7 +4,7 @@
 #
 #    pip-compile --output-file=extra-markdown.txt extra-markdown.in
 #
-importlib-metadata==6.9.0
+importlib-metadata==7.0.0
    # via markdown
 markdown==3.5.1
    # via -r extra-markdown.in
--- a/requirements/extra-paddleocr.txt
+++ b/requirements/extra-paddleocr.txt
@ -45,7 +45,7 @@ flask==3.0.0
    #   visualdl
 flask-babel==4.0.0
    # via visualdl
-fonttools==4.45.1
+fonttools==4.46.0
    # via matplotlib
 future==0.18.3
    # via bce-python-sdk
@ -59,7 +59,7 @@ imageio==2.33.0
    #   scikit-image
 imgaug==0.4.0
    # via unstructured-paddleocr
-importlib-metadata==6.9.0
+importlib-metadata==7.0.0
    # via flask
 importlib-resources==6.1.1
    # via matplotlib
--- a/requirements/extra-pdf-image.in
+++ b/requirements/extra-pdf-image.in
@ -8,7 +8,7 @@ pikepdf
 pypdf
 # Do not move to contsraints.in, otherwise unstructured-inference will not be upgraded
 # when unstructured library is.
-unstructured-inference==0.7.17
+unstructured-inference==0.7.18
 # unstructured fork of pytesseract that provides an interface to allow for multiple output formats
 # from one tesseract call
 unstructured.pytesseract>=0.3.12
--- a/requirements/extra-pdf-image.txt
+++ b/requirements/extra-pdf-image.txt
@ -37,7 +37,7 @@ filelock==3.13.1
    #   transformers
 flatbuffers==23.5.26
    # via onnxruntime
-fonttools==4.45.1
+fonttools==4.46.0
    # via matplotlib
 fsspec==2023.9.1
    # via
@ -134,7 +134,7 @@ pdfminer-six==20221105
    #   pdfplumber
 pdfplumber==0.10.3
    # via layoutparser
-pikepdf==8.7.1
+pikepdf==8.8.0
    # via -r extra-pdf-image.in
 pillow==10.0.1
    # via
@ -250,7 +250,7 @@ typing-extensions==4.8.0
    #   torch
 tzdata==2023.3
    # via pandas
-unstructured-inference==0.7.17
+unstructured-inference==0.7.18
    # via -r extra-pdf-image.in
 unstructured-pytesseract==0.3.12
    # via
--- a/requirements/ingest/azure.txt
+++ b/requirements/ingest/azure.txt
@ -60,7 +60,7 @@ idna==3.6
    #   yarl
 isodate==0.6.1
    # via azure-storage-blob
-msal==1.25.0
+msal==1.26.0
    # via
    #   azure-datalake-store
    #   azure-identity
--- a/requirements/ingest/delta-table.txt
+++ b/requirements/ingest/delta-table.txt
@ -4,7 +4,7 @@
 #
 #    pip-compile --output-file=ingest/delta-table.txt ingest/delta-table.in
 #
-deltalake==0.13.0
+deltalake==0.14.0
    # via -r ingest/delta-table.in
 fsspec==2023.9.1
    # via
--- a/requirements/ingest/embed-aws-bedrock.txt
+++ b/requirements/ingest/embed-aws-bedrock.txt
@ -64,11 +64,11 @@ jsonpatch==1.33
    #   langchain-core
 jsonpointer==2.4
    # via jsonpatch
-langchain==0.0.344
+langchain==0.0.345
    # via -r ingest/embed-aws-bedrock.in
-langchain-core==0.0.8
+langchain-core==0.0.9
    # via langchain
-langsmith==0.0.68
+langsmith==0.0.69
    # via
    #   langchain
    #   langchain-core
--- a/requirements/ingest/embed-huggingface.txt
+++ b/requirements/ingest/embed-huggingface.txt
@ -79,11 +79,11 @@ jsonpatch==1.33
    #   langchain-core
 jsonpointer==2.4
    # via jsonpatch
-langchain==0.0.344
+langchain==0.0.345
    # via -r ingest/embed-huggingface.in
-langchain-core==0.0.8
+langchain-core==0.0.9
    # via langchain
-langsmith==0.0.68
+langsmith==0.0.69
    # via
    #   langchain
    #   langchain-core
--- a/requirements/ingest/embed-openai.txt
+++ b/requirements/ingest/embed-openai.txt
@ -64,11 +64,11 @@ jsonpatch==1.33
    #   langchain-core
 jsonpointer==2.4
    # via jsonpatch
-langchain==0.0.344
+langchain==0.0.345
    # via -r ingest/embed-openai.in
-langchain-core==0.0.8
+langchain-core==0.0.9
    # via langchain
-langsmith==0.0.68
+langsmith==0.0.69
    # via
    #   langchain
    #   langchain-core
@ -125,7 +125,7 @@ tenacity==8.2.3
    # via
    #   langchain
    #   langchain-core
-tiktoken==0.5.1
+tiktoken==0.5.2
    # via -r ingest/embed-openai.in
 tqdm==4.66.1
    # via
--- a/requirements/ingest/onedrive.txt
+++ b/requirements/ingest/onedrive.txt
@ -29,7 +29,7 @@ idna==3.6
    # via
    #   -c ingest/../base.txt
    #   requests
-msal==1.25.0
+msal==1.26.0
    # via
    #   -r ingest/onedrive.in
    #   office365-rest-python-client
--- a/requirements/ingest/outlook.txt
+++ b/requirements/ingest/outlook.txt
@ -23,7 +23,7 @@ idna==3.6
    # via
    #   -c ingest/../base.txt
    #   requests
-msal==1.25.0
+msal==1.26.0
    # via
    #   -r ingest/outlook.in
    #   office365-rest-python-client
--- a/requirements/ingest/reddit.txt
+++ b/requirements/ingest/reddit.txt
@ -33,5 +33,5 @@ urllib3==1.26.18
    #   -c ingest/../base.txt
    #   -c ingest/../constraints.in
    #   requests
-websocket-client==1.6.4
+websocket-client==1.7.0
    # via praw
--- a/requirements/ingest/sharepoint.txt
+++ b/requirements/ingest/sharepoint.txt
@ -23,7 +23,7 @@ idna==3.6
    # via
    #   -c ingest/../base.txt
    #   requests
-msal==1.25.0
+msal==1.26.0
    # via
    #   -r ingest/sharepoint.in
    #   office365-rest-python-client
--- a/requirements/ingest/slack.txt
+++ b/requirements/ingest/slack.txt
@ -4,5 +4,5 @@
 #
 #    pip-compile --output-file=ingest/slack.txt ingest/slack.in
 #
-slack-sdk==3.26.0
+slack-sdk==3.26.1
    # via -r ingest/slack.in
--- a/requirements/ingest/weaviate.txt
+++ b/requirements/ingest/weaviate.txt
@ -2,44 +2,41 @@
 # This file is autogenerated by pip-compile with Python 3.8
 # by the following command:
 #
-#    pip-compile --constraint=requirements/constraints.in requirements/ingest/weaviate.in
+#    pip-compile --output-file=ingest/weaviate.txt ingest/weaviate.in
 #
 authlib==1.2.1
    # via weaviate-client
 certifi==2023.11.17
    # via
-    #   -c requirements/constraints.in
-    #   -c requirements/ingest/../base.txt
-    #   -c requirements/ingest/../constraints.in
+    #   -c ingest/../base.txt
+    #   -c ingest/../constraints.in
    #   requests
 cffi==1.16.0
    # via cryptography
 charset-normalizer==3.3.2
    # via
-    #   -c requirements/ingest/../base.txt
+    #   -c ingest/../base.txt
    #   requests
-cryptography==41.0.5
+cryptography==41.0.7
    # via authlib
-idna==3.4
+idna==3.6
    # via
-    #   -c requirements/ingest/../base.txt
+    #   -c ingest/../base.txt
    #   requests
 pycparser==2.21
    # via cffi
 requests==2.31.0
    # via
-    #   -c requirements/ingest/../base.txt
+    #   -c ingest/../base.txt
    #   weaviate-client
 urllib3==1.26.18
    # via
-    #   -c requirements/constraints.in
-    #   -c requirements/ingest/../base.txt
-    #   -c requirements/ingest/../constraints.in
+    #   -c ingest/../base.txt
+    #   -c ingest/../constraints.in
    #   requests
 validators==0.22.0
    # via weaviate-client
 weaviate-client==3.25.3
    # via
-    #   -c requirements/constraints.in
-    #   -c requirements/ingest/../constraints.in
-    #   -r requirements/ingest/weaviate.in
+    #   -c ingest/../constraints.in
+    #   -r ingest/weaviate.in
--- a/requirements/test.txt
+++ b/requirements/test.txt
@ -36,7 +36,7 @@ flake8==6.1.0
    #   flake8-print
 flake8-print==5.0.0
    # via -r test.in
-freezegun==1.2.2
+freezegun==1.3.1
    # via -r test.in
 grpcio==1.59.3
    # via -r test.in
@ -111,7 +111,7 @@ requests==2.31.0
    # via
    #   -c base.txt
    #   label-studio-sdk
-ruff==0.1.6
+ruff==0.1.7
    # via -r test.in
 six==1.16.0
    # via
--- a/test_unstructured/partition/pdf_image/test_pdf_image_utils.py
+++ b/test_unstructured/partition/pdf_image/test_pdf_image_utils.py
@ -0,0 +1,115 @@
+import os
+import tempfile
+
+import numpy as np
+import pytest
+from PIL import Image as PILImg
+
+from test_unstructured.unit_utils import example_doc_path
+from unstructured.documents.coordinates import PixelSpace
+from unstructured.documents.elements import ElementMetadata, Image
+from unstructured.partition.pdf_image import pdf_image_utils
+
+
+@pytest.mark.parametrize("image_type", ["pil", "numpy_array"])
+def test_write_image(image_type):
+    mock_pil_image = PILImg.new("RGB", (50, 50))
+    mock_numpy_image = np.zeros((50, 50, 3), np.uint8)
+
+    image_map = {
+        "pil": mock_pil_image,
+        "numpy_array": mock_numpy_image,
+    }
+    image = image_map[image_type]
+
+    with tempfile.TemporaryDirectory() as tmpdir:
+        output_image_path = os.path.join(tmpdir, "test_image.jpg")
+        pdf_image_utils.write_image(image, output_image_path)
+        assert os.path.exists(output_image_path)
+
+        # Additional check to see if the written image can be read
+        read_image = PILImg.open(output_image_path)
+        assert read_image is not None
+
+
+@pytest.mark.parametrize("file_mode", ["filename", "rb"])
+@pytest.mark.parametrize("path_only", [True, False])
+def test_convert_pdf_to_image(
+    file_mode, path_only, filename=example_doc_path("embedded-images.pdf")
+):
+    with tempfile.TemporaryDirectory() as tmpdir:
+        if file_mode == "filename":
+            images = pdf_image_utils.convert_pdf_to_image(
+                filename=filename,
+                file=None,
+                output_folder=tmpdir,
+                path_only=path_only,
+            )
+        else:
+            with open(filename, "rb") as f:
+                images = pdf_image_utils.convert_pdf_to_image(
+                    filename="",
+                    file=f,
+                    output_folder=tmpdir,
+                    path_only=path_only,
+                )
+
+        if path_only:
+            assert isinstance(images[0], str)
+        else:
+            assert isinstance(images[0], PILImg.Image)
+
+
+def test_extract_images_from_elements(filename=example_doc_path("embedded-images.pdf")):
+    with tempfile.TemporaryDirectory() as tmpdir:
+        elements = [
+            Image(
+                text="3",
+                coordinates=(
+                    (78.7401411111111, 86.61545694444455),
+                    (78.7401411111111, 519.9487805555556),
+                    (512.0734647222223, 519.9487805555556),
+                    (512.0734647222223, 86.61545694444455),
+                ),
+                coordinate_system=PixelSpace(width=1575, height=1166),
+                metadata=ElementMetadata(page_number=1),
+            ),
+            Image(
+                text="4",
+                coordinates=(
+                    (570.8661397222222, 86.6154566666667),
+                    (570.8661397222222, 519.6862825000001),
+                    (1003.9369655555556, 519.6862825000001),
+                    (1003.9369655555556, 86.6154566666667),
+                ),
+                coordinate_system=PixelSpace(width=1575, height=1166),
+                metadata=ElementMetadata(page_number=1),
+            ),
+            Image(
+                text="5",
+                coordinates=(
+                    (1062.9921808333331, 86.61545694444455),
+                    (1062.9921808333331, 519.9487805555556),
+                    (1496.3255044444445, 519.9487805555556),
+                    (1496.3255044444445, 86.61545694444455),
+                ),
+                coordinate_system=PixelSpace(width=1575, height=1166),
+                metadata=ElementMetadata(page_number=1),
+            ),
+        ]
+
+        pdf_image_utils.extract_images_from_elements(
+            elements=elements, pdf_image_dpi=200, filename=filename, output_dir_path=str(tmpdir)
+        )
+
+        for i, el in enumerate(elements):
+            expected_image_path = os.path.join(
+                str(tmpdir), f"figure-{el.metadata.page_number}-{i + 1}.jpg"
+            )
+            assert os.path.isfile(el.metadata.image_path)
+            assert el.metadata.image_path == expected_image_path
+
+
+def test_write_image_raises_error():
+    with pytest.raises(ValueError):
+        pdf_image_utils.write_image("invalid_type", "test_image.jpg")
--- a/unstructured/documents/elements.py
+++ b/unstructured/documents/elements.py
@ -555,6 +555,44 @@ def _add_regex_metadata(
    return elements


+class ElementType:
+    TITLE = "Title"
+    TEXT = "Text"
+    UNCATEGORIZED_TEXT = "UncategorizedText"
+    NARRATIVE_TEXT = "NarrativeText"
+    BULLETED_TEXT = "BulletedText"
+    ABSTRACT = "Abstract"
+    THREADING = "Threading"
+    FORM = "Form"
+    FIELD_NAME = "Field-Name"
+    VALUE = "Value"
+    LINK = "Link"
+    COMPOSITE_ELEMENT = "CompositeElement"
+    IMAGE = "Image"
+    PICTURE = "Picture"
+    FIGURE_CAPTION = "FigureCaption"
+    FIGURE = "Figure"
+    CAPTION = "Caption"
+    LIST = "List"
+    LIST_ITEM = "ListItem"
+    LIST_ITEM_OTHER = "List-item"
+    CHECKED = "Checked"
+    UNCHECKED = "Unchecked"
+    ADDRESS = "Address"
+    EMAIL_ADDRESS = "EmailAddress"
+    PAGE_BREAK = "PageBreak"
+    FORMULA = "Formula"
+    TABLE = "Table"
+    HEADER = "Header"
+    HEADLINE = "Headline"
+    SUB_HEADLINE = "Subheadline"
+    PAGE_HEADER = "Page-header"  # Title?
+    SECTION_HEADER = "Section-header"
+    FOOTER = "Footer"
+    FOOTNOTE = "Footnote"
+    PAGE_FOOTER = "Page-footer"
+
+
 class Element(abc.ABC):
    """An element is a section of a page in the document."""

@ -764,7 +802,7 @@ class EmailAddress(Text):
 class Image(Text):
    """A text element for capturing image metadata."""

-    category = "Image"
+    category = ElementType.IMAGE


 class PageBreak(Text):
@ -797,44 +835,6 @@ class Footer(Text):
    category = "Footer"


-class ElementType:
-    TITLE = "Title"
-    TEXT = "Text"
-    UNCATEGORIZED_TEXT = "UncategorizedText"
-    NARRATIVE_TEXT = "NarrativeText"
-    BULLETED_TEXT = "BulletedText"
-    ABSTRACT = "Abstract"
-    THREADING = "Threading"
-    FORM = "Form"
-    FIELD_NAME = "Field-Name"
-    VALUE = "Value"
-    LINK = "Link"
-    COMPOSITE_ELEMENT = "CompositeElement"
-    IMAGE = "Image"
-    PICTURE = "Picture"
-    FIGURE_CAPTION = "FigureCaption"
-    FIGURE = "Figure"
-    CAPTION = "Caption"
-    LIST = "List"
-    LIST_ITEM = "ListItem"
-    LIST_ITEM_OTHER = "List-item"
-    CHECKED = "Checked"
-    UNCHECKED = "Unchecked"
-    ADDRESS = "Address"
-    EMAIL_ADDRESS = "EmailAddress"
-    PAGE_BREAK = "PageBreak"
-    FORMULA = "Formula"
-    TABLE = "Table"
-    HEADER = "Header"
-    HEADLINE = "Headline"
-    SUB_HEADLINE = "Subheadline"
-    PAGE_HEADER = "Page-header"  # Title?
-    SECTION_HEADER = "Section-header"
-    FOOTER = "Footer"
-    FOOTNOTE = "Footnote"
-    PAGE_FOOTER = "Page-footer"
-
-
 TYPE_TO_TEXT_ELEMENT_MAP: Dict[str, Any] = {
    ElementType.TITLE: Title,
    ElementType.SECTION_HEADER: Title,
--- a/unstructured/partition/pdf_image/pdf.py
+++ b/unstructured/partition/pdf_image/pdf.py
@ -70,6 +70,7 @@ from unstructured.partition.lang import (
    check_languages,
    prepare_languages_for_tesseract,
 )
+from unstructured.partition.pdf_image.pdf_image_utils import extract_images_from_elements
 from unstructured.partition.pdf_image.pdfminer_utils import (
    open_pdfminer_pages_generator,
    rect_to_bbox,
@ -381,8 +382,6 @@ def _partition_pdf_or_image_local(
            is_image=is_image,
            model_name=model_name,
            pdf_image_dpi=pdf_image_dpi,
-            extract_images_in_pdf=extract_images_in_pdf,
-            image_output_dir_path=image_output_dir_path,
        )

        # NOTE(christine): merged_document_layout = extracted_layout + inferred_layout
@ -411,8 +410,6 @@ def _partition_pdf_or_image_local(
            is_image=is_image,
            model_name=model_name,
            pdf_image_dpi=pdf_image_dpi,
-            extract_images_in_pdf=extract_images_in_pdf,
-            image_output_dir_path=image_output_dir_path,
        )
        if hasattr(file, "seek"):
            file.seek(0)
@ -458,6 +455,15 @@ def _partition_pdf_or_image_local(
        **kwargs,
    )

+    if extract_images_in_pdf:
+        extract_images_from_elements(
+            elements=elements,
+            filename=filename,
+            file=file,
+            pdf_image_dpi=pdf_image_dpi,
+            output_dir_path=image_output_dir_path,
+        )
+
    out_elements = []
    for el in elements:
        if isinstance(el, PageBreak) and not include_page_breaks:
--- a/unstructured/partition/pdf_image/pdf_image_utils.py
+++ b/unstructured/partition/pdf_image/pdf_image_utils.py
@ -0,0 +1,124 @@
+import os
+import tempfile
+from pathlib import PurePath
+from typing import TYPE_CHECKING, BinaryIO, List, Optional, Union, cast
+
+import cv2
+import numpy as np
+import pdf2image
+from PIL import Image
+
+from unstructured.documents.elements import ElementType
+from unstructured.logger import logger
+from unstructured.partition.common import convert_to_bytes
+
+if TYPE_CHECKING:
+    from unstructured.documents.elements import Element
+
+
+def write_image(image: Union[Image.Image, np.ndarray], output_image_path: str):
+    """
+    Write an image to a specified file path, supporting both PIL Image and numpy ndarray formats.
+
+    Parameters:
+    - image (Union[Image.Image, np.ndarray]): The image to be written, which can be in PIL Image
+      format or a numpy ndarray format.
+    - output_image_path (str): The path to which the image will be written.
+
+    Raises:
+    - ValueError: If the provided image type is neither PIL Image nor numpy ndarray.
+
+    Returns:
+    - None: The function writes the image to the specified path but does not return any value.
+    """
+
+    if isinstance(image, Image.Image):
+        image.save(output_image_path)
+    elif isinstance(image, np.ndarray):
+        cv2.imwrite(output_image_path, image)
+    else:
+        raise ValueError("Unsupported Image Type")
+
+
+def convert_pdf_to_image(
+    filename: str,
+    file: Optional[Union[bytes, BinaryIO]] = None,
+    dpi: int = 200,
+    output_folder: Optional[Union[str, PurePath]] = None,
+    path_only: bool = False,
+) -> Union[List[Image.Image], List[str]]:
+    """Get the image renderings of the pdf pages using pdf2image"""
+
+    if path_only and not output_folder:
+        raise ValueError("output_folder must be specified if path_only is true")
+
+    if file is not None:
+        f_bytes = convert_to_bytes(file)
+        images = pdf2image.convert_from_bytes(
+            f_bytes,
+            dpi=dpi,
+            output_folder=output_folder,
+            paths_only=path_only,
+        )
+    else:
+        images = pdf2image.convert_from_path(
+            filename,
+            dpi=dpi,
+            output_folder=output_folder,
+            paths_only=path_only,
+        )
+
+    return images
+
+
+def extract_images_from_elements(
+    elements: List["Element"],
+    pdf_image_dpi: int,
+    filename: str = "",
+    file: Optional[Union[bytes, BinaryIO]] = None,
+    output_dir_path: Optional[str] = None,
+):
+    """
+    Extract and save images from the page. This method iterates through the layout elements
+    of the page, identifies image regions, and extracts and saves them as separate image files.
+    """
+
+    if not output_dir_path:
+        output_dir_path = os.path.join(os.getcwd(), "figures")
+    os.makedirs(output_dir_path, exist_ok=True)
+
+    with tempfile.TemporaryDirectory() as temp_dir:
+        _image_paths = convert_pdf_to_image(
+            filename,
+            file,
+            pdf_image_dpi,
+            output_folder=temp_dir,
+            path_only=True,
+        )
+        image_paths = cast(List[str], _image_paths)
+
+        figure_number = 0
+        for el in elements:
+            coordinates = el.metadata.coordinates
+            if not coordinates or not coordinates.points or el.category != ElementType.IMAGE:
+                continue
+
+            points = coordinates.points
+            x1, y1 = points[0]
+            x2, y2 = points[2]
+            page_number = el.metadata.page_number
+
+            figure_number += 1
+            try:
+                output_f_path = os.path.join(
+                    output_dir_path,
+                    f"figure-{page_number}-{figure_number}.jpg",
+                )
+                image_path = image_paths[page_number - 1]
+                image = Image.open(image_path)
+                cropped_image = image.crop((x1, y1, x2, y2))
+                write_image(cropped_image, output_f_path)
+                # add image path to element metadata
+                el.metadata.image_path = output_f_path
+            except (ValueError, IOError):
+                logger.warning("Image Extraction Error: Skipping the failed image", exc_info=True)