From 5b0ae3fd8b638f573d3a966ebeb0e3eef5d4bad2 Mon Sep 17 00:00:00 2001
From: Christine Straub <christinemstraub@gmail.com>
Date: Thu, 4 Jan 2024 09:52:00 -0800
Subject: [PATCH] Refactor: rename image extraction kwargs (#2303)

Currently, we're using different kwarg names in partition() and
partition_pdf(), which has implications for the API since it goes
through partition().

### Summary
- rename `extract_element_types` -> `extract_image_block_types`
- rename `image_output_dir_path` to `extract_image_block_output_dir`
- rename `extract_to_payload` -> `extract_image_block_to_payload`
- rename `pdf_extract_images` -> `extract_images_in_pdf` in
`partition.auto`
- add unit tests to test element extraction for `pdf/image` via
`partition.auto`
### Testing
CI should pass.
---
 CHANGELOG.md                                  | 10 +++
 .../partition/pdf_image/test_image.py         | 22 +++---
 .../partition/pdf_image/test_pdf.py           | 30 ++++----
 .../pdf_image/test_pdf_image_utils.py         |  8 +--
 test_unstructured/partition/test_auto.py      | 48 ++++++++++++-
 .../partition/test_strategies.py              |  6 +-
 unstructured/__version__.py                   |  2 +-
 unstructured/partition/auto.py                | 47 ++++++------
 unstructured/partition/image.py               | 37 +++++-----
 unstructured/partition/pdf.py                 | 71 ++++++++++---------
 .../partition/pdf_image/pdf_image_utils.py    | 20 +++---
 unstructured/partition/strategies.py          |  4 +-
 12 files changed, 185 insertions(+), 120 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index c670045fd..32113b493 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,13 @@
+## 0.11.9-dev0
+
+### Enhancements
+
+* **Rename kwargs related to extracting image blocks.** Rename the kwargs related to extracting image blocks for consistency and API usage.
+
+### Features
+
+### Fixes
+
 ## 0.11.8
 
 ### Enhancements
diff --git a/test_unstructured/partition/pdf_image/test_image.py b/test_unstructured/partition/pdf_image/test_image.py
index f3071f52a..d0dface90 100644
--- a/test_unstructured/partition/pdf_image/test_image.py
+++ b/test_unstructured/partition/pdf_image/test_image.py
@@ -637,29 +637,31 @@ def test_partition_image_has_filename(inference_results):
 
 
 @pytest.mark.parametrize("file_mode", ["filename", "rb"])
-@pytest.mark.parametrize("extract_to_payload", [False, True])
+@pytest.mark.parametrize("extract_image_block_to_payload", [False, True])
 def test_partition_image_element_extraction(
     file_mode,
-    extract_to_payload,
+    extract_image_block_to_payload,
     filename=example_doc_path("embedded-images-tables.jpg"),
 ):
-    extract_element_types = ["Image", "Table"]
+    extract_image_block_types = ["Image", "Table"]
 
     with tempfile.TemporaryDirectory() as tmpdir:
         if file_mode == "filename":
             elements = image.partition_image(
                 filename=filename,
-                extract_element_types=extract_element_types,
-                extract_to_payload=extract_to_payload,
-                image_output_dir_path=tmpdir,
+                extract_image_block_types=extract_image_block_types,
+                extract_image_block_to_payload=extract_image_block_to_payload,
+                extract_image_block_output_dir=tmpdir,
             )
         else:
             with open(filename, "rb") as f:
                 elements = image.partition_image(
                     file=f,
-                    extract_element_types=extract_element_types,
-                    extract_to_payload=extract_to_payload,
-                    image_output_dir_path=tmpdir,
+                    extract_image_block_types=extract_image_block_types,
+                    extract_image_block_to_payload=extract_image_block_to_payload,
+                    extract_image_block_output_dir=tmpdir,
                 )
 
-        assert_element_extraction(elements, extract_element_types, extract_to_payload, tmpdir)
+        assert_element_extraction(
+            elements, extract_image_block_types, extract_image_block_to_payload, tmpdir
+        )
diff --git a/test_unstructured/partition/pdf_image/test_pdf.py b/test_unstructured/partition/pdf_image/test_pdf.py
index 6fa8194a6..2e8d606c9 100644
--- a/test_unstructured/partition/pdf_image/test_pdf.py
+++ b/test_unstructured/partition/pdf_image/test_pdf.py
@@ -1128,9 +1128,11 @@ def test_extractable_elements_repair_invalid_pdf_structure(filename, expected_lo
     assert expected_log in caplog.text
 
 
-def assert_element_extraction(elements, extract_element_types, extract_to_payload, tmpdir):
+def assert_element_extraction(
+    elements, extract_image_block_types, extract_image_block_to_payload, tmpdir
+):
     extracted_elements = []
-    for el_type in extract_element_types:
+    for el_type in extract_image_block_types:
         extracted_elements_by_type = []
         for el in elements:
             if el.category == el_type:
@@ -1139,7 +1141,7 @@ def assert_element_extraction(elements, extract_element_types, extract_to_payloa
 
     for extracted_elements_by_type in extracted_elements:
         for i, el in enumerate(extracted_elements_by_type):
-            if extract_to_payload:
+            if extract_image_block_to_payload:
                 assert el.metadata.image_base64 is not None
                 assert el.metadata.image_mime_type == "image/jpeg"
                 image_data = base64.b64decode(el.metadata.image_base64)
@@ -1157,29 +1159,31 @@ def assert_element_extraction(elements, extract_element_types, extract_to_payloa
 
 
 @pytest.mark.parametrize("file_mode", ["filename", "rb"])
-@pytest.mark.parametrize("extract_to_payload", [False, True])
+@pytest.mark.parametrize("extract_image_block_to_payload", [False, True])
 def test_partition_pdf_element_extraction(
     file_mode,
-    extract_to_payload,
+    extract_image_block_to_payload,
     filename=example_doc_path("embedded-images-tables.pdf"),
 ):
-    extract_element_types = ["Image", "Table"]
+    extract_image_block_types = ["Image", "Table"]
 
     with tempfile.TemporaryDirectory() as tmpdir:
         if file_mode == "filename":
             elements = pdf.partition_pdf(
                 filename=filename,
-                extract_element_types=extract_element_types,
-                extract_to_payload=extract_to_payload,
-                image_output_dir_path=tmpdir,
+                extract_image_block_types=extract_image_block_types,
+                extract_image_block_to_payload=extract_image_block_to_payload,
+                extract_image_block_output_dir=tmpdir,
             )
         else:
             with open(filename, "rb") as f:
                 elements = pdf.partition_pdf(
                     file=f,
-                    extract_element_types=extract_element_types,
-                    extract_to_payload=extract_to_payload,
-                    image_output_dir_path=tmpdir,
+                    extract_image_block_types=extract_image_block_types,
+                    extract_image_block_to_payload=extract_image_block_to_payload,
+                    extract_image_block_output_dir=tmpdir,
                 )
 
-        assert_element_extraction(elements, extract_element_types, extract_to_payload, tmpdir)
+        assert_element_extraction(
+            elements, extract_image_block_types, extract_image_block_to_payload, tmpdir
+        )
diff --git a/test_unstructured/partition/pdf_image/test_pdf_image_utils.py b/test_unstructured/partition/pdf_image/test_pdf_image_utils.py
index 13bc2b50b..7cf823e09 100644
--- a/test_unstructured/partition/pdf_image/test_pdf_image_utils.py
+++ b/test_unstructured/partition/pdf_image/test_pdf_image_utils.py
@@ -61,10 +61,10 @@ def test_convert_pdf_to_image(
 
 
 @pytest.mark.parametrize("element_category_to_save", [ElementType.IMAGE, ElementType.TABLE])
-@pytest.mark.parametrize("extract_to_payload", [False, True])
+@pytest.mark.parametrize("extract_image_block_to_payload", [False, True])
 def test_save_elements(
     element_category_to_save,
-    extract_to_payload,
+    extract_image_block_to_payload,
     filename=example_doc_path("layout-parser-paper-fast.pdf"),
 ):
     with tempfile.TemporaryDirectory() as tmpdir:
@@ -101,7 +101,7 @@ def test_save_elements(
             pdf_image_dpi=200,
             filename=filename,
             output_dir_path=str(tmpdir),
-            extract_to_payload=extract_to_payload,
+            extract_image_block_to_payload=extract_image_block_to_payload,
         )
 
         saved_elements = [el for el in elements if el.category == element_category_to_save]
@@ -110,7 +110,7 @@ def test_save_elements(
             expected_image_path = os.path.join(
                 str(tmpdir), f"{basename}-{el.metadata.page_number}-{i + 1}.jpg"
             )
-            if extract_to_payload:
+            if extract_image_block_to_payload:
                 assert isinstance(el.metadata.image_base64, str)
                 assert isinstance(el.metadata.image_mime_type, str)
                 assert not el.metadata.image_path
diff --git a/test_unstructured/partition/test_auto.py b/test_unstructured/partition/test_auto.py
index 4df28143d..bd2b3596f 100644
--- a/test_unstructured/partition/test_auto.py
+++ b/test_unstructured/partition/test_auto.py
@@ -1,6 +1,7 @@
 import json
 import os
 import pathlib
+import tempfile
 import warnings
 from importlib import import_module
 from unittest.mock import Mock, patch
@@ -8,6 +9,7 @@ from unittest.mock import Mock, patch
 import docx
 import pytest
 
+from test_unstructured.partition.pdf_image.test_pdf import assert_element_extraction
 from test_unstructured.partition.test_constants import (
     EXPECTED_TABLE,
     EXPECTED_TABLE_XLSX,
@@ -355,9 +357,9 @@ def test_auto_partition_pdf_with_fast_strategy(monkeypatch):
         include_page_breaks=False,
         infer_table_structure=False,
         extract_images_in_pdf=False,
-        extract_element_types=None,
-        image_output_dir_path=None,
-        extract_to_payload=False,
+        extract_image_block_types=None,
+        extract_image_block_output_dir=None,
+        extract_image_block_to_payload=False,
         hi_res_model_name=None,
     )
 
@@ -460,6 +462,26 @@ def test_auto_partition_image_default_strategy_hi_res(pass_metadata_filename, co
     assert elements[idx].metadata.coordinates is not None
 
 
+@pytest.mark.parametrize("extract_image_block_to_payload", [False, True])
+def test_auto_partition_image_element_extraction(
+    extract_image_block_to_payload,
+    filename=os.path.join(EXAMPLE_DOCS_DIRECTORY, "embedded-images-tables.jpg"),
+):
+    extract_image_block_types = ["Image", "Table"]
+
+    with tempfile.TemporaryDirectory() as tmpdir:
+        elements = partition(
+            filename=filename,
+            extract_image_block_types=extract_image_block_types,
+            extract_image_block_to_payload=extract_image_block_to_payload,
+            extract_image_block_output_dir=tmpdir,
+        )
+
+        assert_element_extraction(
+            elements, extract_image_block_types, extract_image_block_to_payload, tmpdir
+        )
+
+
 @pytest.mark.parametrize(
     ("pass_metadata_filename", "content_type"),
     [(False, None), (False, "image/jpeg"), (True, "image/jpeg"), (True, None)],
@@ -666,6 +688,26 @@ def test_auto_filetype_overrides_file_specific(content_type, expected, monkeypat
     assert all(el.metadata.filetype == expected for el in elements)
 
 
+@pytest.mark.parametrize("extract_image_block_to_payload", [False, True])
+def test_auto_partition_pdf_element_extraction(
+    extract_image_block_to_payload,
+    filename=os.path.join(EXAMPLE_DOCS_DIRECTORY, "embedded-images-tables.pdf"),
+):
+    extract_image_block_types = ["Image", "Table"]
+
+    with tempfile.TemporaryDirectory() as tmpdir:
+        elements = partition(
+            filename=filename,
+            extract_image_block_types=extract_image_block_types,
+            extract_image_block_to_payload=extract_image_block_to_payload,
+            extract_image_block_output_dir=tmpdir,
+        )
+
+        assert_element_extraction(
+            elements, extract_image_block_types, extract_image_block_to_payload, tmpdir
+        )
+
+
 supported_filetypes = [
     _
     for _ in FileType
diff --git a/test_unstructured/partition/test_strategies.py b/test_unstructured/partition/test_strategies.py
index b51c0fbe9..fbc580bda 100644
--- a/test_unstructured/partition/test_strategies.py
+++ b/test_unstructured/partition/test_strategies.py
@@ -76,7 +76,7 @@ def test_determine_pdf_or_image_fast_strategy(pdf_text_extractable, infer_table_
         "pdf_text_extractable",
         "infer_table_structure",
         "extract_images_in_pdf",
-        "extract_element_types",
+        "extract_image_block_types",
         "expected",
     ),
     [
@@ -102,7 +102,7 @@ def test_determine_pdf_auto_strategy(
     pdf_text_extractable,
     infer_table_structure,
     extract_images_in_pdf,
-    extract_element_types,
+    extract_image_block_types,
     expected,
 ):
     strategy = strategies.determine_pdf_or_image_strategy(
@@ -111,7 +111,7 @@ def test_determine_pdf_auto_strategy(
         pdf_text_extractable=pdf_text_extractable,
         infer_table_structure=infer_table_structure,
         extract_images_in_pdf=extract_images_in_pdf,
-        extract_element_types=extract_element_types,
+        extract_image_block_types=extract_image_block_types,
     )
     assert strategy == expected
 
diff --git a/unstructured/__version__.py b/unstructured/__version__.py
index 320de9b63..0d1529cc6 100644
--- a/unstructured/__version__.py
+++ b/unstructured/__version__.py
@@ -1 +1 @@
-__version__ = "0.11.8"  # pragma: no cover
+__version__ = "0.11.9-dev0"  # pragma: no cover
diff --git a/unstructured/partition/auto.py b/unstructured/partition/auto.py
index 23d7d9ec3..df7e8bf8a 100644
--- a/unstructured/partition/auto.py
+++ b/unstructured/partition/auto.py
@@ -136,10 +136,10 @@ def partition(
     languages: Optional[List[str]] = None,
     detect_language_per_element: bool = False,
     pdf_infer_table_structure: bool = False,
-    pdf_extract_images: bool = False,
-    pdf_extract_element_types: Optional[List[str]] = None,
-    pdf_image_output_dir_path: Optional[str] = None,
-    pdf_extract_to_payload: bool = False,
+    extract_images_in_pdf: bool = False,
+    extract_image_block_types: Optional[List[str]] = None,
+    extract_image_block_output_dir: Optional[str] = None,
+    extract_image_block_to_payload: bool = False,
     xml_keep_tags: bool = False,
     data_source_metadata: Optional[DataSourceMetadata] = None,
     metadata_filename: Optional[str] = None,
@@ -194,27 +194,28 @@ def partition(
         additional metadata field, "text_as_html," where the value (string) is a just a
         transformation of the data into an HTML <table>.
         The "text" field for a partitioned Table Element is always present, whether True or False.
-    pdf_extract_images
+    extract_images_in_pdf
         Only applicable if `strategy=hi_res`.
-        If True, any detected images will be saved in the path specified by 'image_output_dir_path'
-        or stored as base64 encoded data within metadata fields.
+        If True, any detected images will be saved in the path specified by
+        'extract_image_block_output_dir' or stored as base64 encoded data within metadata fields.
         Deprecation Note: This parameter is marked for deprecation. Future versions will use
-        'extract_element_types' for broader extraction capabilities.
-    pdf_extract_element_types
+        'extract_image_block_types' for broader extraction capabilities.
+    extract_image_block_types
         Only applicable if `strategy=hi_res`.
         Images of the element type(s) specified in this list (e.g., ["Image", "Table"]) will be
-        saved in the path specified by 'image_output_dir_path' or stored as base64 encoded data
-        within metadata fields.
-    pdf_extract_to_payload
+        saved in the path specified by 'extract_image_block_output_dir' or stored as base64
+        encoded data within metadata fields.
+    extract_image_block_to_payload
         Only applicable if `strategy=hi_res`.
-        If True, images of the element type(s) defined in 'extract_element_types' will be encoded
-        as base64 data and stored in two metadata fields: 'image_base64' and 'image_mime_type'.
+        If True, images of the element type(s) defined in 'extract_image_block_types' will be
+        encoded as base64 data and stored in two metadata fields: 'image_base64' and
+        'image_mime_type'.
         This parameter facilitates the inclusion of element data directly within the payload,
         especially for web-based applications or APIs.
-    pdf_image_output_dir_path
-        Only applicable if `strategy=hi_res` and `pdf_extract_to_payload=False`.
+    extract_image_block_output_dir
+        Only applicable if `strategy=hi_res` and `extract_image_block_to_payload=False`.
         The filesystem path for saving images of the element type(s)
-        specified in 'extract_element_types'.
+        specified in 'extract_image_block_types'.
     xml_keep_tags
         If True, will retain the XML tags in the output. Otherwise it will simply extract
         the text from within the tags. Only applies to partition_xml.
@@ -413,11 +414,11 @@ def partition(
             infer_table_structure=infer_table_structure,
             strategy=strategy,
             languages=languages,
-            extract_images_in_pdf=pdf_extract_images,
-            extract_element_types=pdf_extract_element_types,
-            image_output_dir_path=pdf_image_output_dir_path,
-            extract_to_payload=pdf_extract_to_payload,
             hi_res_model_name=hi_res_model_name or model_name,
+            extract_images_in_pdf=extract_images_in_pdf,
+            extract_image_block_types=extract_image_block_types,
+            extract_image_block_output_dir=extract_image_block_output_dir,
+            extract_image_block_to_payload=extract_image_block_to_payload,
             **kwargs,
         )
     elif (filetype == FileType.PNG) or (filetype == FileType.JPG) or (filetype == FileType.TIFF):
@@ -430,6 +431,10 @@ def partition(
             strategy=strategy,
             languages=languages,
             hi_res_model_name=hi_res_model_name or model_name,
+            extract_images_in_pdf=extract_images_in_pdf,
+            extract_image_block_types=extract_image_block_types,
+            extract_image_block_output_dir=extract_image_block_output_dir,
+            extract_image_block_to_payload=extract_image_block_to_payload,
             **kwargs,
         )
     elif filetype == FileType.TXT:
diff --git a/unstructured/partition/image.py b/unstructured/partition/image.py
index 9fb890d28..27e1fb03b 100644
--- a/unstructured/partition/image.py
+++ b/unstructured/partition/image.py
@@ -27,9 +27,9 @@ def partition_image(
     chunking_strategy: Optional[str] = None,
     hi_res_model_name: Optional[str] = None,
     extract_images_in_pdf: bool = False,
-    extract_element_types: Optional[List[str]] = None,
-    image_output_dir_path: Optional[str] = None,
-    extract_to_payload: bool = False,
+    extract_image_block_types: Optional[List[str]] = None,
+    extract_image_block_output_dir: Optional[str] = None,
+    extract_image_block_to_payload: bool = False,
     **kwargs,
 ) -> List[Element]:
     """Parses an image into a list of interpreted elements.
@@ -64,25 +64,26 @@ def partition_image(
         The layout detection model used when partitioning strategy is set to `hi_res`.
     extract_images_in_pdf
         Only applicable if `strategy=hi_res`.
-        If True, any detected images will be saved in the path specified by 'image_output_dir_path'
-        or stored as base64 encoded data within metadata fields.
+        If True, any detected images will be saved in the path specified by
+        'extract_image_block_output_dir' or stored as base64 encoded data within metadata fields.
         Deprecation Note: This parameter is marked for deprecation. Future versions will use
-        'extract_element_types' for broader extraction capabilities.
-    extract_element_types
+        'extract_image_block_types' for broader extraction capabilities.
+    extract_image_block_types
         Only applicable if `strategy=hi_res`.
         Images of the element type(s) specified in this list (e.g., ["Image", "Table"]) will be
-        saved in the path specified by 'image_output_dir_path' or stored as base64 encoded data
-        within metadata fields.
-    extract_to_payload
+        saved in the path specified by 'extract_image_block_output_dir' or stored as base64 encoded
+        data within metadata fields.
+    extract_image_block_to_payload
         Only applicable if `strategy=hi_res`.
-        If True, images of the element type(s) defined in 'extract_element_types' will be encoded
-        as base64 data and stored in two metadata fields: 'image_base64' and 'image_mime_type'.
+        If True, images of the element type(s) defined in 'extract_image_block_types' will be
+        encoded as base64 data and stored in two metadata fields: 'image_base64' and
+        'image_mime_type'.
         This parameter facilitates the inclusion of element data directly within the payload,
         especially for web-based applications or APIs.
-    image_output_dir_path
-        Only applicable if `strategy=hi_res` and `extract_to_payload=False`.
+    extract_image_block_output_dir
+        Only applicable if `strategy=hi_res` and `extract_image_block_to_payload=False`.
         The filesystem path for saving images of the element type(s)
-        specified in 'extract_element_types'.
+        specified in 'extract_image_block_types'.
     """
     exactly_one(filename=filename, file=file)
 
@@ -119,8 +120,8 @@ def partition_image(
         metadata_last_modified=metadata_last_modified,
         hi_res_model_name=hi_res_model_name,
         extract_images_in_pdf=extract_images_in_pdf,
-        extract_element_types=extract_element_types,
-        image_output_dir_path=image_output_dir_path,
-        extract_to_payload=extract_to_payload,
+        extract_image_block_types=extract_image_block_types,
+        extract_image_block_output_dir=extract_image_block_output_dir,
+        extract_image_block_to_payload=extract_image_block_to_payload,
         **kwargs,
     )
diff --git a/unstructured/partition/pdf.py b/unstructured/partition/pdf.py
index 9e61023da..09cae60cb 100644
--- a/unstructured/partition/pdf.py
+++ b/unstructured/partition/pdf.py
@@ -141,9 +141,9 @@ def partition_pdf(
     links: Sequence[Link] = [],
     hi_res_model_name: Optional[str] = None,
     extract_images_in_pdf: bool = False,
-    extract_element_types: Optional[List[str]] = None,
-    image_output_dir_path: Optional[str] = None,
-    extract_to_payload: bool = False,
+    extract_image_block_types: Optional[List[str]] = None,
+    extract_image_block_output_dir: Optional[str] = None,
+    extract_image_block_to_payload: bool = False,
     **kwargs,
 ) -> List[Element]:
     """Parses a pdf document into a list of interpreted elements.
@@ -177,25 +177,26 @@ def partition_pdf(
         The layout detection model used when partitioning strategy is set to `hi_res`.
     extract_images_in_pdf
         Only applicable if `strategy=hi_res`.
-        If True, any detected images will be saved in the path specified by 'image_output_dir_path'
-        or stored as base64 encoded data within metadata fields.
+        If True, any detected images will be saved in the path specified by
+        'extract_image_block_output_dir' or stored as base64 encoded data within metadata fields.
         Deprecation Note: This parameter is marked for deprecation. Future versions will use
-        'extract_element_types' for broader extraction capabilities.
-    extract_element_types
+        'extract_image_block_types' for broader extraction capabilities.
+    extract_image_block_types
         Only applicable if `strategy=hi_res`.
         Images of the element type(s) specified in this list (e.g., ["Image", "Table"]) will be
-        saved in the path specified by 'image_output_dir_path' or stored as base64 encoded data
-        within metadata fields.
-    extract_to_payload
+        saved in the path specified by 'extract_image_block_output_dir' or stored as base64
+        encoded data within metadata fields.
+    extract_image_block_to_payload
         Only applicable if `strategy=hi_res`.
-        If True, images of the element type(s) defined in 'extract_element_types' will be encoded
-        as base64 data and stored in two metadata fields: 'image_base64' and 'image_mime_type'.
+        If True, images of the element type(s) defined in 'extract_image_block_types' will be
+        encoded as base64 data and stored in two metadata fields: 'image_base64' and
+        'image_mime_type'.
         This parameter facilitates the inclusion of element data directly within the payload,
         especially for web-based applications or APIs.
-    image_output_dir_path
-        Only applicable if `strategy=hi_res` and `extract_to_payload=False`.
+    extract_image_block_output_dir
+        Only applicable if `strategy=hi_res` and `extract_image_block_to_payload=False`.
         The filesystem path for saving images of the element type(s)
-        specified in 'extract_element_types'.
+        specified in 'extract_image_block_types'.
     """
 
     exactly_one(filename=filename, file=file)
@@ -212,9 +213,9 @@ def partition_pdf(
         metadata_last_modified=metadata_last_modified,
         hi_res_model_name=hi_res_model_name,
         extract_images_in_pdf=extract_images_in_pdf,
-        extract_element_types=extract_element_types,
-        image_output_dir_path=image_output_dir_path,
-        extract_to_payload=extract_to_payload,
+        extract_image_block_types=extract_image_block_types,
+        extract_image_block_output_dir=extract_image_block_output_dir,
+        extract_image_block_to_payload=extract_image_block_to_payload,
         **kwargs,
     )
 
@@ -266,9 +267,9 @@ def _partition_pdf_or_image_local(
     metadata_last_modified: Optional[str] = None,
     pdf_text_extractable: bool = False,
     extract_images_in_pdf: bool = False,
-    extract_element_types: Optional[List[str]] = None,
-    image_output_dir_path: Optional[str] = None,
-    extract_to_payload: bool = False,
+    extract_image_block_types: Optional[List[str]] = None,
+    extract_image_block_output_dir: Optional[str] = None,
+    extract_image_block_to_payload: bool = False,
     analysis: bool = False,
     analyzed_image_output_dir_path: Optional[str] = None,
     **kwargs,
@@ -406,7 +407,7 @@ def _partition_pdf_or_image_local(
         **kwargs,
     )
 
-    extract_element_types = check_element_types_to_extract(extract_element_types)
+    extract_image_block_types = check_element_types_to_extract(extract_image_block_types)
     #  NOTE(christine): `extract_images_in_pdf` would deprecate
     #  (but continue to support for a while)
     if extract_images_in_pdf:
@@ -417,11 +418,11 @@ def _partition_pdf_or_image_local(
             file=file,
             is_image=is_image,
             pdf_image_dpi=pdf_image_dpi,
-            extract_to_payload=extract_to_payload,
-            output_dir_path=image_output_dir_path,
+            extract_image_block_to_payload=extract_image_block_to_payload,
+            output_dir_path=extract_image_block_output_dir,
         )
 
-    for el_type in extract_element_types:
+    for el_type in extract_image_block_types:
         if extract_images_in_pdf and el_type == ElementType.IMAGE:
             continue
 
@@ -432,8 +433,8 @@ def _partition_pdf_or_image_local(
             file=file,
             is_image=is_image,
             pdf_image_dpi=pdf_image_dpi,
-            extract_to_payload=extract_to_payload,
-            output_dir_path=image_output_dir_path,
+            extract_image_block_to_payload=extract_image_block_to_payload,
+            output_dir_path=extract_image_block_output_dir,
         )
 
     out_elements = []
@@ -444,7 +445,7 @@ def _partition_pdf_or_image_local(
         if isinstance(el, Image):
             if (
                 not extract_images_in_pdf
-                and ElementType.IMAGE not in extract_element_types
+                and ElementType.IMAGE not in extract_image_block_types
                 and (el.text is None or len(el.text) < 24 or el.text.find(" ") == -1)
             ):
                 # NOTE(crag): small chunks of text from Image elements tend to be garbage
@@ -478,9 +479,9 @@ def partition_pdf_or_image(
     metadata_last_modified: Optional[str] = None,
     hi_res_model_name: Optional[str] = None,
     extract_images_in_pdf: bool = False,
-    extract_element_types: Optional[List[str]] = None,
-    image_output_dir_path: Optional[str] = None,
-    extract_to_payload: bool = False,
+    extract_image_block_types: Optional[List[str]] = None,
+    extract_image_block_output_dir: Optional[str] = None,
+    extract_image_block_to_payload: bool = False,
     **kwargs,
 ) -> List[Element]:
     """Parses a pdf or image document into a list of interpreted elements."""
@@ -523,7 +524,7 @@ def partition_pdf_or_image(
         pdf_text_extractable=pdf_text_extractable,
         infer_table_structure=infer_table_structure,
         extract_images_in_pdf=extract_images_in_pdf,
-        extract_element_types=extract_element_types,
+        extract_image_block_types=extract_image_block_types,
     )
 
     if file is not None:
@@ -544,9 +545,9 @@ def partition_pdf_or_image(
                 hi_res_model_name=hi_res_model_name,
                 pdf_text_extractable=pdf_text_extractable,
                 extract_images_in_pdf=extract_images_in_pdf,
-                extract_element_types=extract_element_types,
-                image_output_dir_path=image_output_dir_path,
-                extract_to_payload=extract_to_payload,
+                extract_image_block_types=extract_image_block_types,
+                extract_image_block_output_dir=extract_image_block_output_dir,
+                extract_image_block_to_payload=extract_image_block_to_payload,
                 **kwargs,
             )
             out_elements = _process_uncategorized_text_elements(elements)
diff --git a/unstructured/partition/pdf_image/pdf_image_utils.py b/unstructured/partition/pdf_image/pdf_image_utils.py
index 5ffd8d07f..d551a46e5 100644
--- a/unstructured/partition/pdf_image/pdf_image_utils.py
+++ b/unstructured/partition/pdf_image/pdf_image_utils.py
@@ -82,7 +82,7 @@ def save_elements(
     filename: str = "",
     file: Optional[Union[bytes, BinaryIO]] = None,
     is_image: bool = False,
-    extract_to_payload: bool = False,
+    extract_image_block_to_payload: bool = False,
     output_dir_path: Optional[str] = None,
 ):
     """
@@ -143,7 +143,7 @@ def save_elements(
                 image_path = image_paths[page_number - 1]
                 image = Image.open(image_path)
                 cropped_image = image.crop((x1, y1, x2, y2))
-                if extract_to_payload:
+                if extract_image_block_to_payload:
                     buffered = BytesIO()
                     cropped_image.save(buffered, format="JPEG")
                     img_base64 = base64.b64encode(buffered.getvalue())
@@ -159,28 +159,28 @@ def save_elements(
 
 
 def check_element_types_to_extract(
-    extract_element_types: Optional[List[str]],
+    extract_image_block_types: Optional[List[str]],
 ) -> List[str]:
     """Check and normalize the provided list of element types to extract."""
 
-    if extract_element_types is None:
+    if extract_image_block_types is None:
         return []
 
-    if not isinstance(extract_element_types, list):
+    if not isinstance(extract_image_block_types, list):
         raise TypeError(
-            "The extract_element_types parameter must be a list of element types as strings, "
+            "The extract_image_block_types parameter must be a list of element types as strings, "
             "ex. ['Table', 'Image']",
         )
 
     available_element_types = list(ElementType.to_dict().values())
-    normalized_extract_element_types = []
-    for el_type in extract_element_types:
+    normalized_extract_image_block_types = []
+    for el_type in extract_image_block_types:
         normalized_el_type = el_type.lower().capitalize()
         if normalized_el_type not in available_element_types:
             logger.warning(f"The requested type ({el_type}) doesn't match any available type")
-        normalized_extract_element_types.append(normalized_el_type)
+        normalized_extract_image_block_types.append(normalized_el_type)
 
-    return normalized_extract_element_types
+    return normalized_extract_image_block_types
 
 
 def valid_text(text: str) -> bool:
diff --git a/unstructured/partition/strategies.py b/unstructured/partition/strategies.py
index 98fe98582..2a3bc226c 100644
--- a/unstructured/partition/strategies.py
+++ b/unstructured/partition/strategies.py
@@ -27,7 +27,7 @@ def determine_pdf_or_image_strategy(
     pdf_text_extractable: bool = False,
     infer_table_structure: bool = False,
     extract_images_in_pdf: bool = False,
-    extract_element_types: Optional[List[str]] = None,
+    extract_image_block_types: Optional[List[str]] = None,
 ):
     """Determines what strategy to use for processing PDFs or images, accounting for fallback
     logic if some dependencies are not available."""
@@ -35,7 +35,7 @@ def determine_pdf_or_image_strategy(
     unstructured_inference_installed = dependency_exists("unstructured_inference")
 
     if strategy == PartitionStrategy.AUTO:
-        extract_element = extract_images_in_pdf or bool(extract_element_types)
+        extract_element = extract_images_in_pdf or bool(extract_image_block_types)
         if is_image:
             strategy = _determine_image_auto_strategy()
         else: