diff --git a/CHANGELOG.md b/CHANGELOG.md index 122be8bf8..b4eea0e57 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,4 @@ -## 0.15.14-dev1 +## 0.15.14-dev2 ### Enhancements @@ -6,13 +6,14 @@ ### Fixes +* **Update Python SDK usage in `partition_via_api`.** Make a minor syntax change to ensure forward compatibility with the upcoming 0.26.0 Python SDK. * **Remove "unused" `date_from_file_object` parameter.** As part of simplifying partitioning parameter set, remove `date_from_file_object` parameter. A file object does not have a last-modified date attribute so can never give a useful value. When a file-object is used as the document source (such as in Unstructured API) the last-modified date must come from the `metadata_last_modified` argument. ## 0.15.13 ### BREAKING CHANGES -* **Remove dead experimental code.** Unused code in `file_utils.experimantal` and `file_utils.metadata` was removed. These functions were never published in the documentation, but if a client dug these out and used them this removal could break client code. +* **Remove dead experimental code.** Unused code in `file_utils.experimental` and `file_utils.metadata` was removed. These functions were never published in the documentation, but if a client dug these out and used them this removal could break client code. ### Enhancements diff --git a/requirements/deps/constraints.txt b/requirements/deps/constraints.txt index e9befdbae..539e11907 100644 --- a/requirements/deps/constraints.txt +++ b/requirements/deps/constraints.txt @@ -16,3 +16,5 @@ urllib3<1.27 botocore<1.34.132 # TODO: Constriant due to both 8.5.0 and 8.4.0 being installed during pip-compile importlib-metadata>=8.5.0 +# (austin): Versions below this have a different interface for passing parameters +unstructured-client>=0.23.0 diff --git a/test_unstructured/partition/test_api.py b/test_unstructured/partition/test_api.py index 103c643b3..f95dd7859 100644 --- a/test_unstructured/partition/test_api.py +++ b/test_unstructured/partition/test_api.py @@ -9,6 +9,7 @@ import pytest import requests from unstructured_client.general import General from unstructured_client.models import shared +from unstructured_client.models.operations import PartitionRequest from unstructured_client.models.shared import PartitionParameters from unstructured.documents.elements import ElementType, NarrativeText @@ -55,7 +56,7 @@ def test_partition_via_api_with_file_correctly_calls_sdk( # Update the fixture content to match the format passed to partition_via_api modified_expected_call = expected_call_[:] - modified_expected_call[1].files.content = f + modified_expected_call[1].partition_parameters.files.content = f partition_mock_.assert_called_once_with(*modified_expected_call) assert isinstance(partition_mock_.call_args_list[0].args[0], General) @@ -76,7 +77,7 @@ def test_partition_via_api_warns_with_file_and_filename_and_calls_sdk( # Update the fixture content to match the format passed to partition_via_api modified_expected_call = expected_call_[:] - modified_expected_call[1].files.content = f + modified_expected_call[1].partition_parameters.files.content = f partition_mock_.assert_called_once_with(*modified_expected_call) assert "WARNING" in caplog.text @@ -487,36 +488,38 @@ def expected_call_(): file_bytes = f.read() return [ ANY, - PartitionParameters( - files=shared.Files( - content=file_bytes, - file_name=example_doc_path("eml/fake-email.eml"), - ), - chunking_strategy=None, - combine_under_n_chars=None, - coordinates=False, - encoding=None, - extract_image_block_types=None, - gz_uncompressed_content_type=None, - hi_res_model_name=None, - include_orig_elements=None, - include_page_breaks=False, - languages=None, - max_characters=None, - multipage_sections=True, - new_after_n_chars=None, - ocr_languages=None, - output_format=shared.OutputFormat.APPLICATION_JSON, - overlap=0, - overlap_all=False, - pdf_infer_table_structure=True, - similarity_threshold=None, - skip_infer_table_types=None, - split_pdf_concurrency_level=5, - split_pdf_page=True, - starting_page_number=None, - strategy=shared.Strategy.HI_RES, - unique_element_ids=False, - xml_keep_tags=False, + PartitionRequest( + partition_parameters=PartitionParameters( + files=shared.Files( + content=file_bytes, + file_name=example_doc_path("eml/fake-email.eml"), + ), + chunking_strategy=None, + combine_under_n_chars=None, + coordinates=False, + encoding=None, + extract_image_block_types=None, + gz_uncompressed_content_type=None, + hi_res_model_name=None, + include_orig_elements=None, + include_page_breaks=False, + languages=None, + max_characters=None, + multipage_sections=True, + new_after_n_chars=None, + ocr_languages=None, + output_format=shared.OutputFormat.APPLICATION_JSON, + overlap=0, + overlap_all=False, + pdf_infer_table_structure=True, + similarity_threshold=None, + skip_infer_table_types=None, + split_pdf_concurrency_level=5, + split_pdf_page=True, + starting_page_number=None, + strategy=shared.Strategy.HI_RES, + unique_element_ids=False, + xml_keep_tags=False, + ) ), ] diff --git a/unstructured/__version__.py b/unstructured/__version__.py index 626bae57b..6e0c6fa73 100644 --- a/unstructured/__version__.py +++ b/unstructured/__version__.py @@ -1 +1 @@ -__version__ = "0.15.14-dev1" # pragma: no cover +__version__ = "0.15.14-dev2" # pragma: no cover diff --git a/unstructured/partition/api.py b/unstructured/partition/api.py index b8a1cded2..00d2dc31b 100644 --- a/unstructured/partition/api.py +++ b/unstructured/partition/api.py @@ -5,7 +5,7 @@ from typing import IO, Any, Optional, Sequence import requests from unstructured_client import UnstructuredClient -from unstructured_client.models import shared +from unstructured_client.models import operations, shared from unstructured.documents.elements import Element from unstructured.logger import logger @@ -83,8 +83,11 @@ def partition_via_api( ) files = shared.Files(content=file, file_name=metadata_filename) - req = shared.PartitionParameters(files=files, **request_kwargs) - response = sdk.general.partition(req) + req = operations.PartitionRequest( + partition_parameters=shared.PartitionParameters(files=files, **request_kwargs) + ) + + response = sdk.general.partition(request=req) if response.status_code == 200: return elements_from_json(text=response.raw_response.text)