Feat: return base64 encoded images for PDF's (#2310)

Closes #2302. ### Summary - add functionality to get a Base64 encoded string from a PIL image - store base64 encoded image data in two metadata fields: `image_base64` and `image_mime_type` - update the "image element filter" logic to keep all image elements in the output if a user specifies image extraction ### Testing ``` from unstructured.partition.pdf import partition_pdf elements = partition_pdf( filename="example-docs/embedded-images-tables.pdf", strategy="hi_res", extract_element_types=["Image", "Table"], extract_to_payload=True, ) ``` or ``` from unstructured.partition.auto import partition elements = partition( filename="example-docs/embedded-images-tables.pdf", strategy="hi_res", pdf_extract_element_types=["Image", "Table"], pdf_extract_to_payload=True, ) ```
2025-11-02 11:03:38 +00:00 · 2023-12-26 21:39:01 -08:00 · 2023-12-26 21:39:01 -08:00 · dd144456de
commit dd144456de
parent 8ba9fadf8a
12 changed files with 1220 additions and 69 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -6,6 +6,8 @@
 * **Update encoders to leverage dataclasses** All encoders now follow a class approach which get annotated with the dataclass decorator. Similar to the connectors, it uses a nested dataclass for the configs required to configure a client as well as a field/property approach to cache the client. This makes sure any variable associated with the class exists as a dataclass field.

 ### Features
+ 
+* **Store base64 encoded image data in metadata fields.** Rather than saving to file, stores base64 encoded data of the image bytes and the mimetype for the image in metadata fields: `image_base64` and `image_mime_type` (if that is what the user specifies by some other param like `pdf_extract_to_payload`). This would allow the API to have parity with the library.

 ### Fixes

--- a/example-docs/embedded-images-tables.jpg
+++ b/example-docs/embedded-images-tables.jpg
--- a/example-docs/embedded-images-tables.pdf
+++ b/example-docs/embedded-images-tables.pdf
--- a/test_unstructured/partition/pdf_image/test_image.py
+++ b/test_unstructured/partition/pdf_image/test_image.py
@ -1,5 +1,6 @@
 import os
 import pathlib
+import tempfile
 from unittest import mock

 import pytest
@ -7,6 +8,7 @@ from PIL import Image
 from pytesseract import TesseractError
 from unstructured_inference.inference import layout

+from test_unstructured.partition.pdf_image.test_pdf import assert_element_extraction
 from test_unstructured.unit_utils import assert_round_trips_through_JSON, example_doc_path
 from unstructured.chunking.title import chunk_by_title
 from unstructured.documents.elements import ElementType
@ -632,3 +634,34 @@ def test_partition_image_has_filename(inference_results):
    assert element.metadata.filetype == "JPEG"
    # This should be kept from the filename we originally gave
    assert element.metadata.filename == filename
+
+
+@pytest.mark.parametrize("file_mode", ["filename", "rb"])
+@pytest.mark.parametrize("extract_to_payload", [False, True])
+def test_partition_image_element_extraction(
+    file_mode,
+    extract_to_payload,
+    filename=example_doc_path("embedded-images-tables.jpg"),
+):
+    extract_element_types = ["Image", "Table"]
+
+    with tempfile.TemporaryDirectory() as tmpdir:
+        if file_mode == "filename":
+            elements = image.partition_image(
+                filename=filename,
+                strategy="hi_res",
+                extract_element_types=extract_element_types,
+                extract_to_payload=extract_to_payload,
+                image_output_dir_path=tmpdir,
+            )
+        else:
+            with open(filename, "rb") as f:
+                elements = image.partition_image(
+                    file=f,
+                    strategy="hi_res",
+                    extract_element_types=extract_element_types,
+                    extract_to_payload=extract_to_payload,
+                    image_output_dir_path=tmpdir,
+                )
+
+        assert_element_extraction(elements, extract_element_types, extract_to_payload, tmpdir)
--- a/test_unstructured/partition/pdf_image/test_pdf.py
+++ b/test_unstructured/partition/pdf_image/test_pdf.py
@ -1,6 +1,8 @@
+import base64
 import logging
 import math
 import os
+import tempfile
 from tempfile import SpooledTemporaryFile
 from unittest import mock

@ -15,6 +17,7 @@ from unstructured.documents.coordinates import PixelSpace
 from unstructured.documents.elements import (
    CoordinatesMetadata,
    ElementMetadata,
+    ElementType,
    ListItem,
    NarrativeText,
    Text,
@ -1123,3 +1126,62 @@ def test_extractable_elements_repair_invalid_pdf_structure(filename, expected_lo
    caplog.set_level(logging.INFO)
    assert pdf.extractable_elements(filename=example_doc_path(filename))
    assert expected_log in caplog.text
+
+
+def assert_element_extraction(elements, extract_element_types, extract_to_payload, tmpdir):
+    extracted_elements = []
+    for el_type in extract_element_types:
+        extracted_elements_by_type = []
+        for el in elements:
+            if el.category == el_type:
+                extracted_elements_by_type.append(el)
+        extracted_elements.append(extracted_elements_by_type)
+
+    for extracted_elements_by_type in extracted_elements:
+        for i, el in enumerate(extracted_elements_by_type):
+            if extract_to_payload:
+                assert el.metadata.image_base64 is not None
+                assert el.metadata.image_mime_type == "image/jpeg"
+                image_data = base64.b64decode(el.metadata.image_base64)
+                assert isinstance(image_data, bytes)
+                assert el.metadata.image_path is None
+            else:
+                basename = "table" if el.category == ElementType.TABLE else "figure"
+                expected_image_path = os.path.join(
+                    str(tmpdir), f"{basename}-{el.metadata.page_number}-{i + 1}.jpg"
+                )
+                assert el.metadata.image_path == expected_image_path
+                assert os.path.isfile(expected_image_path)
+                assert el.metadata.image_base64 is None
+                assert el.metadata.image_mime_type is None
+
+
+@pytest.mark.parametrize("file_mode", ["filename", "rb"])
+@pytest.mark.parametrize("extract_to_payload", [False, True])
+def test_partition_pdf_element_extraction(
+    file_mode,
+    extract_to_payload,
+    filename=example_doc_path("embedded-images-tables.pdf"),
+):
+    extract_element_types = ["Image", "Table"]
+
+    with tempfile.TemporaryDirectory() as tmpdir:
+        if file_mode == "filename":
+            elements = pdf.partition_pdf(
+                filename=filename,
+                strategy="hi_res",
+                extract_element_types=extract_element_types,
+                extract_to_payload=extract_to_payload,
+                image_output_dir_path=tmpdir,
+            )
+        else:
+            with open(filename, "rb") as f:
+                elements = pdf.partition_pdf(
+                    file=f,
+                    strategy="hi_res",
+                    extract_element_types=extract_element_types,
+                    extract_to_payload=extract_to_payload,
+                    image_output_dir_path=tmpdir,
+                )
+
+        assert_element_extraction(elements, extract_element_types, extract_to_payload, tmpdir)
--- a/test_unstructured/partition/pdf_image/test_pdf_image_utils.py
+++ b/test_unstructured/partition/pdf_image/test_pdf_image_utils.py
@ -7,7 +7,7 @@ from PIL import Image as PILImg

 from test_unstructured.unit_utils import example_doc_path
 from unstructured.documents.coordinates import PixelSpace
-from unstructured.documents.elements import ElementMetadata, ElementType, Image
+from unstructured.documents.elements import ElementMetadata, ElementType, Image, Table
 from unstructured.partition.pdf_image import pdf_image_utils


@ -60,58 +60,66 @@ def test_convert_pdf_to_image(
            assert isinstance(images[0], PILImg.Image)


-def test_save_elements(filename=example_doc_path("embedded-images.pdf")):
+@pytest.mark.parametrize("element_category_to_save", [ElementType.IMAGE, ElementType.TABLE])
+@pytest.mark.parametrize("extract_to_payload", [False, True])
+def test_save_elements(
+    element_category_to_save,
+    extract_to_payload,
+    filename=example_doc_path("layout-parser-paper-fast.pdf"),
+):
    with tempfile.TemporaryDirectory() as tmpdir:
        elements = [
            Image(
                text="3",
-                coordinates=(
-                    (78.7401411111111, 86.61545694444455),
-                    (78.7401411111111, 519.9487805555556),
-                    (512.0734647222223, 519.9487805555556),
-                    (512.0734647222223, 86.61545694444455),
-                ),
+                coordinates=((78, 86), (78, 519), (512, 519), (512, 86)),
                coordinate_system=PixelSpace(width=1575, height=1166),
                metadata=ElementMetadata(page_number=1),
            ),
            Image(
                text="4",
-                coordinates=(
-                    (570.8661397222222, 86.6154566666667),
-                    (570.8661397222222, 519.6862825000001),
-                    (1003.9369655555556, 519.6862825000001),
-                    (1003.9369655555556, 86.6154566666667),
-                ),
+                coordinates=((570, 86), (570, 519), (1003, 519), (1003, 86)),
                coordinate_system=PixelSpace(width=1575, height=1166),
                metadata=ElementMetadata(page_number=1),
            ),
            Image(
                text="5",
-                coordinates=(
-                    (1062.9921808333331, 86.61545694444455),
-                    (1062.9921808333331, 519.9487805555556),
-                    (1496.3255044444445, 519.9487805555556),
-                    (1496.3255044444445, 86.61545694444455),
-                ),
+                coordinates=((1062, 86), (1062, 519), (1496, 519), (1496, 86)),
                coordinate_system=PixelSpace(width=1575, height=1166),
                metadata=ElementMetadata(page_number=1),
            ),
+            Table(
+                text="Sample Table",
+                coordinates=((1062, 86), (1062, 519), (1496, 519), (1496, 86)),
+                coordinate_system=PixelSpace(width=1575, height=1166),
+                metadata=ElementMetadata(page_number=2),
+            ),
        ]

        pdf_image_utils.save_elements(
            elements=elements,
-            element_category_to_save=ElementType.IMAGE,
+            element_category_to_save=element_category_to_save,
            pdf_image_dpi=200,
            filename=filename,
            output_dir_path=str(tmpdir),
+            extract_to_payload=extract_to_payload,
        )

-        for i, el in enumerate(elements):
+        saved_elements = [el for el in elements if el.category == element_category_to_save]
+        for i, el in enumerate(saved_elements):
+            basename = "table" if el.category == ElementType.TABLE else "figure"
            expected_image_path = os.path.join(
-                str(tmpdir), f"figure-{el.metadata.page_number}-{i + 1}.jpg"
+                str(tmpdir), f"{basename}-{el.metadata.page_number}-{i + 1}.jpg"
            )
-            assert os.path.isfile(el.metadata.image_path)
-            assert el.metadata.image_path == expected_image_path
+            if extract_to_payload:
+                assert isinstance(el.metadata.image_base64, str)
+                assert isinstance(el.metadata.image_mime_type, str)
+                assert not el.metadata.image_path
+                assert not os.path.isfile(expected_image_path)
+            else:
+                assert os.path.isfile(expected_image_path)
+                assert el.metadata.image_path == expected_image_path
+                assert not el.metadata.image_base64
+                assert not el.metadata.image_mime_type


 def test_write_image_raises_error():
--- a/test_unstructured/partition/test_auto.py
+++ b/test_unstructured/partition/test_auto.py
@ -3,7 +3,7 @@ import os
 import pathlib
 import warnings
 from importlib import import_module
-from unittest.mock import ANY, Mock, patch
+from unittest.mock import Mock, patch

 import docx
 import pytest
@ -347,15 +347,17 @@ def test_auto_partition_pdf_with_fast_strategy(monkeypatch):

    mock_partition.assert_called_once_with(
        filename=filename,
-        metadata_filename=None,
        file=None,
        url=None,
-        include_page_breaks=False,
-        infer_table_structure=False,
-        extract_images_in_pdf=ANY,
-        image_output_dir_path=ANY,
        strategy=PartitionStrategy.FAST,
        languages=None,
+        metadata_filename=None,
+        include_page_breaks=False,
+        infer_table_structure=False,
+        extract_images_in_pdf=False,
+        extract_element_types=None,
+        image_output_dir_path=None,
+        extract_to_payload=False,
        hi_res_model_name=None,
    )

--- a/unstructured/documents/elements.py
+++ b/unstructured/documents/elements.py
@ -173,9 +173,11 @@ class ElementMetadata:
    file_directory: Optional[str]
    filename: Optional[str]
    filetype: Optional[str]
+    image_path: Optional[str]
+    image_base64: Optional[str]
+    image_mime_type: Optional[str]
    # -- specific to DOCX which has distinct primary, first-page, and even-page header/footers --
    header_footer_type: Optional[str]
-    image_path: Optional[str]
    # -- used in chunks only, when chunk must be split mid-text to fit window --
    is_continuation: Optional[bool]
    languages: Optional[List[str]]
@ -457,6 +459,8 @@ class ConsolidationStrategy(enum.Enum):
            "filetype": cls.FIRST,
            "header_footer_type": cls.DROP,
            "image_path": cls.DROP,
+            "image_base64": cls.DROP,
+            "image_mime_type": cls.DROP,
            "is_continuation": cls.DROP,  # -- not expected, added by chunking, not before --
            "languages": cls.LIST_UNIQUE,
            "last_modified": cls.FIRST,
--- a/unstructured/partition/auto.py
+++ b/unstructured/partition/auto.py
@ -137,7 +137,9 @@ def partition(
    detect_language_per_element: bool = False,
    pdf_infer_table_structure: bool = False,
    pdf_extract_images: bool = False,
+    pdf_extract_element_types: Optional[List[str]] = None,
    pdf_image_output_dir_path: Optional[str] = None,
+    pdf_extract_to_payload: bool = False,
    xml_keep_tags: bool = False,
    data_source_metadata: Optional[DataSourceMetadata] = None,
    metadata_filename: Optional[str] = None,
@ -193,11 +195,26 @@ def partition(
        transformation of the data into an HTML <table>.
        The "text" field for a partitioned Table Element is always present, whether True or False.
    pdf_extract_images
-        If True and strategy=hi_res, any detected images will be saved in the path specified by
-        pdf_image_output_dir_path.
+        Only applicable if `strategy=hi_res`.
+        If True, any detected images will be saved in the path specified by 'image_output_dir_path'
+        or stored as base64 encoded data within metadata fields.
+        Deprecation Note: This parameter is marked for deprecation. Future versions will use
+        'extract_element_types' for broader extraction capabilities.
+    pdf_extract_element_types
+        Only applicable if `strategy=hi_res`.
+        Images of the element type(s) specified in this list (e.g., ["Image", "Table"]) will be
+        saved in the path specified by 'image_output_dir_path' or stored as base64 encoded data
+        within metadata fields.
+    pdf_extract_to_payload
+        Only applicable if `strategy=hi_res`.
+        If True, images of the element type(s) defined in 'extract_element_types' will be encoded
+        as base64 data and stored in two metadata fields: 'image_base64' and 'image_mime_type'.
+        This parameter facilitates the inclusion of element data directly within the payload,
+        especially for web-based applications or APIs.
    pdf_image_output_dir_path
-        If pdf_extract_images=True and strategy=hi_res, any detected images will be saved in the
-        given path
+        Only applicable if `strategy=hi_res` and `pdf_extract_to_payload=False`.
+        The filesystem path for saving images of the element type(s)
+        specified in 'extract_element_types'.
    xml_keep_tags
        If True, will retain the XML tags in the output. Otherwise it will simply extract
        the text from within the tags. Only applies to partition_xml.
@ -397,7 +414,9 @@ def partition(
            strategy=strategy,
            languages=languages,
            extract_images_in_pdf=pdf_extract_images,
+            extract_element_types=pdf_extract_element_types,
            image_output_dir_path=pdf_image_output_dir_path,
+            extract_to_payload=pdf_extract_to_payload,
            hi_res_model_name=hi_res_model_name or model_name,
            **kwargs,
        )
--- a/unstructured/partition/image.py
+++ b/unstructured/partition/image.py
@ -26,6 +26,10 @@ def partition_image(
    metadata_last_modified: Optional[str] = None,
    chunking_strategy: Optional[str] = None,
    hi_res_model_name: Optional[str] = None,
+    extract_images_in_pdf: bool = False,
+    extract_element_types: Optional[List[str]] = None,
+    image_output_dir_path: Optional[str] = None,
+    extract_to_payload: bool = False,
    **kwargs,
 ) -> List[Element]:
    """Parses an image into a list of interpreted elements.
@ -58,6 +62,27 @@ def partition_image(
        The last modified date for the document.
    hi_res_model_name
        The layout detection model used when partitioning strategy is set to `hi_res`.
+    extract_images_in_pdf
+        Only applicable if `strategy=hi_res`.
+        If True, any detected images will be saved in the path specified by 'image_output_dir_path'
+        or stored as base64 encoded data within metadata fields.
+        Deprecation Note: This parameter is marked for deprecation. Future versions will use
+        'extract_element_types' for broader extraction capabilities.
+    extract_element_types
+        Only applicable if `strategy=hi_res`.
+        Images of the element type(s) specified in this list (e.g., ["Image", "Table"]) will be
+        saved in the path specified by 'image_output_dir_path' or stored as base64 encoded data
+        within metadata fields.
+    extract_to_payload
+        Only applicable if `strategy=hi_res`.
+        If True, images of the element type(s) defined in 'extract_element_types' will be encoded
+        as base64 data and stored in two metadata fields: 'image_base64' and 'image_mime_type'.
+        This parameter facilitates the inclusion of element data directly within the payload,
+        especially for web-based applications or APIs.
+    image_output_dir_path
+        Only applicable if `strategy=hi_res` and `extract_to_payload=False`.
+        The filesystem path for saving images of the element type(s)
+        specified in 'extract_element_types'.
    """
    exactly_one(filename=filename, file=file)

@ -93,5 +118,9 @@ def partition_image(
        strategy=strategy,
        metadata_last_modified=metadata_last_modified,
        hi_res_model_name=hi_res_model_name,
+        extract_images_in_pdf=extract_images_in_pdf,
+        extract_element_types=extract_element_types,
+        image_output_dir_path=image_output_dir_path,
+        extract_to_payload=extract_to_payload,
        **kwargs,
    )
--- a/unstructured/partition/pdf.py
+++ b/unstructured/partition/pdf.py
@ -106,7 +106,6 @@ from unstructured.utils import requires_dependencies
 if TYPE_CHECKING:
    pass

-
 # NOTE(alan): Patching this to fix a bug in pdfminer.six. Submitted this PR into pdfminer.six to fix
 # the bug: https://github.com/pdfminer/pdfminer.six/pull/885
 psparser.PSBaseParser._parse_keyword = parse_keyword  # type: ignore
@ -140,10 +139,11 @@ def partition_pdf(
    metadata_last_modified: Optional[str] = None,
    chunking_strategy: Optional[str] = None,  # used by decorator
    links: Sequence[Link] = [],
+    hi_res_model_name: Optional[str] = None,
    extract_images_in_pdf: bool = False,
    extract_element_types: Optional[List[str]] = None,
    image_output_dir_path: Optional[str] = None,
-    hi_res_model_name: Optional[str] = None,
+    extract_to_payload: bool = False,
    **kwargs,
 ) -> List[Element]:
    """Parses a pdf document into a list of interpreted elements.
@ -173,18 +173,29 @@ def partition_pdf(
        with Tesseract, you'll first need to install the appropriate Tesseract language pack.
    metadata_last_modified
        The last modified date for the document.
-    extract_images_in_pdf
-        Only applicable if `strategy=hi_res`.
-        If `True`, any detected images will be saved in the path specified by
-        image_output_dir_path.
-    extract_element_types
-        Only applicable if `strategy=hi_res`.
-        Images of the element type(s) defined in this list will be saved to `image_output_dir_path`.
-    image_output_dir_path
-        Only applicable if `strategy=hi_res`.
-        The path for saving images when using `extract_images_in_pdf` or `extract_element_types`.
    hi_res_model_name
        The layout detection model used when partitioning strategy is set to `hi_res`.
+    extract_images_in_pdf
+        Only applicable if `strategy=hi_res`.
+        If True, any detected images will be saved in the path specified by 'image_output_dir_path'
+        or stored as base64 encoded data within metadata fields.
+        Deprecation Note: This parameter is marked for deprecation. Future versions will use
+        'extract_element_types' for broader extraction capabilities.
+    extract_element_types
+        Only applicable if `strategy=hi_res`.
+        Images of the element type(s) specified in this list (e.g., ["Image", "Table"]) will be
+        saved in the path specified by 'image_output_dir_path' or stored as base64 encoded data
+        within metadata fields.
+    extract_to_payload
+        Only applicable if `strategy=hi_res`.
+        If True, images of the element type(s) defined in 'extract_element_types' will be encoded
+        as base64 data and stored in two metadata fields: 'image_base64' and 'image_mime_type'.
+        This parameter facilitates the inclusion of element data directly within the payload,
+        especially for web-based applications or APIs.
+    image_output_dir_path
+        Only applicable if `strategy=hi_res` and `extract_to_payload=False`.
+        The filesystem path for saving images of the element type(s)
+        specified in 'extract_element_types'.
    """

    exactly_one(filename=filename, file=file)
@ -199,10 +210,11 @@ def partition_pdf(
        infer_table_structure=infer_table_structure,
        languages=languages,
        metadata_last_modified=metadata_last_modified,
+        hi_res_model_name=hi_res_model_name,
        extract_images_in_pdf=extract_images_in_pdf,
        extract_element_types=extract_element_types,
        image_output_dir_path=image_output_dir_path,
-        hi_res_model_name=hi_res_model_name,
+        extract_to_payload=extract_to_payload,
        **kwargs,
    )

@ -249,13 +261,14 @@ def _partition_pdf_or_image_local(
    languages: Optional[List[str]] = None,
    ocr_mode: str = OCRMode.FULL_PAGE.value,
    model_name: Optional[str] = None,  # to be deprecated in favor of `hi_res_model_name`
+    hi_res_model_name: Optional[str] = None,
+    pdf_image_dpi: Optional[int] = None,
    metadata_last_modified: Optional[str] = None,
    pdf_text_extractable: bool = False,
    extract_images_in_pdf: bool = False,
    extract_element_types: Optional[List[str]] = None,
    image_output_dir_path: Optional[str] = None,
-    pdf_image_dpi: Optional[int] = None,
-    hi_res_model_name: Optional[str] = None,
+    extract_to_payload: bool = False,
    analysis: bool = False,
    analyzed_image_output_dir_path: Optional[str] = None,
    **kwargs,
@ -402,7 +415,9 @@ def _partition_pdf_or_image_local(
            element_category_to_save=ElementType.IMAGE,
            filename=filename,
            file=file,
+            is_image=is_image,
            pdf_image_dpi=pdf_image_dpi,
+            extract_to_payload=extract_to_payload,
            output_dir_path=image_output_dir_path,
        )

@ -415,7 +430,9 @@ def _partition_pdf_or_image_local(
            element_category_to_save=el_type,
            filename=filename,
            file=file,
+            is_image=is_image,
            pdf_image_dpi=pdf_image_dpi,
+            extract_to_payload=extract_to_payload,
            output_dir_path=image_output_dir_path,
        )

@ -425,10 +442,12 @@ def _partition_pdf_or_image_local(
            continue

        if isinstance(el, Image):
-            # NOTE(crag): small chunks of text from Image elements tend to be garbage
-            if not el.metadata.image_path and (
-                el.text is None or len(el.text) < 24 or el.text.find(" ") == -1
+            if (
+                not extract_images_in_pdf
+                and ElementType.IMAGE not in extract_element_types
+                and (el.text is None or len(el.text) < 24 or el.text.find(" ") == -1)
            ):
+                # NOTE(crag): small chunks of text from Image elements tend to be garbage
                continue
            else:
                out_elements.append(cast(Element, el))
@ -457,10 +476,11 @@ def partition_pdf_or_image(
    ocr_languages: Optional[str] = None,
    languages: Optional[List[str]] = None,
    metadata_last_modified: Optional[str] = None,
+    hi_res_model_name: Optional[str] = None,
    extract_images_in_pdf: bool = False,
    extract_element_types: Optional[List[str]] = None,
    image_output_dir_path: Optional[str] = None,
-    hi_res_model_name: Optional[str] = None,
+    extract_to_payload: bool = False,
    **kwargs,
 ) -> List[Element]:
    """Parses a pdf or image document into a list of interpreted elements."""
@ -518,11 +538,12 @@ def partition_pdf_or_image(
                include_page_breaks=include_page_breaks,
                languages=languages,
                metadata_last_modified=metadata_last_modified or last_modification_date,
+                hi_res_model_name=hi_res_model_name,
                pdf_text_extractable=pdf_text_extractable,
                extract_images_in_pdf=extract_images_in_pdf,
                extract_element_types=extract_element_types,
                image_output_dir_path=image_output_dir_path,
-                hi_res_model_name=hi_res_model_name,
+                extract_to_payload=extract_to_payload,
                **kwargs,
            )
            out_elements = _process_uncategorized_text_elements(elements)
--- a/unstructured/partition/pdf_image/pdf_image_utils.py
+++ b/unstructured/partition/pdf_image/pdf_image_utils.py
@ -1,5 +1,7 @@
+import base64
 import os
 import tempfile
+from io import BytesIO
 from pathlib import PurePath
 from typing import TYPE_CHECKING, BinaryIO, List, Optional, Union, cast

@ -79,11 +81,17 @@ def save_elements(
    pdf_image_dpi: int,
    filename: str = "",
    file: Optional[Union[bytes, BinaryIO]] = None,
+    is_image: bool = False,
+    extract_to_payload: bool = False,
    output_dir_path: Optional[str] = None,
 ):
    """
-    Extract and save images from the page. This method iterates through the layout elements
-    of the page, identifies image regions, and extracts and saves them as separate image files.
+    Saves specific elements from a PDF as images either to a directory or embeds them in the
+    element's payload.
+
+    This function processes a list of elements partitioned from a PDF file. For each element of
+    a specified category, it extracts and saves the image. The images can either be saved to
+    a specified directory or embedded into the element's payload as a base64-encoded string.
    """

    if not output_dir_path:
@ -91,14 +99,25 @@ def save_elements(
    os.makedirs(output_dir_path, exist_ok=True)

    with tempfile.TemporaryDirectory() as temp_dir:
-        _image_paths = convert_pdf_to_image(
-            filename,
-            file,
-            pdf_image_dpi,
-            output_folder=temp_dir,
-            path_only=True,
-        )
-        image_paths = cast(List[str], _image_paths)
+        if is_image:
+            if file is None:
+                image_paths = [filename]
+            else:
+                if hasattr(file, "seek"):
+                    file.seek(0)
+                temp_file = tempfile.NamedTemporaryFile(delete=False, dir=temp_dir)
+                temp_file.write(file.read() if hasattr(file, "read") else file)
+                temp_file.flush()
+                image_paths = [temp_file.name]
+        else:
+            _image_paths = convert_pdf_to_image(
+                filename,
+                file,
+                pdf_image_dpi,
+                output_folder=temp_dir,
+                path_only=True,
+            )
+            image_paths = cast(List[str], _image_paths)

        figure_number = 0
        for el in elements:
@ -124,9 +143,17 @@ def save_elements(
                image_path = image_paths[page_number - 1]
                image = Image.open(image_path)
                cropped_image = image.crop((x1, y1, x2, y2))
-                write_image(cropped_image, output_f_path)
-                # add image path to element metadata
-                el.metadata.image_path = output_f_path
+                if extract_to_payload:
+                    buffered = BytesIO()
+                    cropped_image.save(buffered, format="JPEG")
+                    img_base64 = base64.b64encode(buffered.getvalue())
+                    img_base64_str = img_base64.decode()
+                    el.metadata.image_base64 = img_base64_str
+                    el.metadata.image_mime_type = "image/jpeg"
+                else:
+                    write_image(cropped_image, output_f_path)
+                    # add image path to element metadata
+                    el.metadata.image_path = output_f_path
            except (ValueError, IOError):
                logger.warning("Image Extraction Error: Skipping the failed image", exc_info=True)