fix: update python SDK syntax for forward compatibility (#3656)

Wrap the `shared.PartitionParameters` usage with
`operations.PartitionRequest`. This syntax has been deprecated since
v0.23.0 of the SDK, and will be unsupported in v0.26.0.
This commit is contained in:
Austin Walker 2024-09-24 12:37:38 -04:00 committed by GitHub
parent 3bab9d93e6
commit 6428d19e5a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 48 additions and 39 deletions

View File

@ -1,4 +1,4 @@
## 0.15.14-dev1
## 0.15.14-dev2
### Enhancements
@ -6,13 +6,14 @@
### Fixes
* **Update Python SDK usage in `partition_via_api`.** Make a minor syntax change to ensure forward compatibility with the upcoming 0.26.0 Python SDK.
* **Remove "unused" `date_from_file_object` parameter.** As part of simplifying partitioning parameter set, remove `date_from_file_object` parameter. A file object does not have a last-modified date attribute so can never give a useful value. When a file-object is used as the document source (such as in Unstructured API) the last-modified date must come from the `metadata_last_modified` argument.
## 0.15.13
### BREAKING CHANGES
* **Remove dead experimental code.** Unused code in `file_utils.experimantal` and `file_utils.metadata` was removed. These functions were never published in the documentation, but if a client dug these out and used them this removal could break client code.
* **Remove dead experimental code.** Unused code in `file_utils.experimental` and `file_utils.metadata` was removed. These functions were never published in the documentation, but if a client dug these out and used them this removal could break client code.
### Enhancements

View File

@ -16,3 +16,5 @@ urllib3<1.27
botocore<1.34.132
# TODO: Constriant due to both 8.5.0 and 8.4.0 being installed during pip-compile
importlib-metadata>=8.5.0
# (austin): Versions below this have a different interface for passing parameters
unstructured-client>=0.23.0

View File

@ -9,6 +9,7 @@ import pytest
import requests
from unstructured_client.general import General
from unstructured_client.models import shared
from unstructured_client.models.operations import PartitionRequest
from unstructured_client.models.shared import PartitionParameters
from unstructured.documents.elements import ElementType, NarrativeText
@ -55,7 +56,7 @@ def test_partition_via_api_with_file_correctly_calls_sdk(
# Update the fixture content to match the format passed to partition_via_api
modified_expected_call = expected_call_[:]
modified_expected_call[1].files.content = f
modified_expected_call[1].partition_parameters.files.content = f
partition_mock_.assert_called_once_with(*modified_expected_call)
assert isinstance(partition_mock_.call_args_list[0].args[0], General)
@ -76,7 +77,7 @@ def test_partition_via_api_warns_with_file_and_filename_and_calls_sdk(
# Update the fixture content to match the format passed to partition_via_api
modified_expected_call = expected_call_[:]
modified_expected_call[1].files.content = f
modified_expected_call[1].partition_parameters.files.content = f
partition_mock_.assert_called_once_with(*modified_expected_call)
assert "WARNING" in caplog.text
@ -487,36 +488,38 @@ def expected_call_():
file_bytes = f.read()
return [
ANY,
PartitionParameters(
files=shared.Files(
content=file_bytes,
file_name=example_doc_path("eml/fake-email.eml"),
),
chunking_strategy=None,
combine_under_n_chars=None,
coordinates=False,
encoding=None,
extract_image_block_types=None,
gz_uncompressed_content_type=None,
hi_res_model_name=None,
include_orig_elements=None,
include_page_breaks=False,
languages=None,
max_characters=None,
multipage_sections=True,
new_after_n_chars=None,
ocr_languages=None,
output_format=shared.OutputFormat.APPLICATION_JSON,
overlap=0,
overlap_all=False,
pdf_infer_table_structure=True,
similarity_threshold=None,
skip_infer_table_types=None,
split_pdf_concurrency_level=5,
split_pdf_page=True,
starting_page_number=None,
strategy=shared.Strategy.HI_RES,
unique_element_ids=False,
xml_keep_tags=False,
PartitionRequest(
partition_parameters=PartitionParameters(
files=shared.Files(
content=file_bytes,
file_name=example_doc_path("eml/fake-email.eml"),
),
chunking_strategy=None,
combine_under_n_chars=None,
coordinates=False,
encoding=None,
extract_image_block_types=None,
gz_uncompressed_content_type=None,
hi_res_model_name=None,
include_orig_elements=None,
include_page_breaks=False,
languages=None,
max_characters=None,
multipage_sections=True,
new_after_n_chars=None,
ocr_languages=None,
output_format=shared.OutputFormat.APPLICATION_JSON,
overlap=0,
overlap_all=False,
pdf_infer_table_structure=True,
similarity_threshold=None,
skip_infer_table_types=None,
split_pdf_concurrency_level=5,
split_pdf_page=True,
starting_page_number=None,
strategy=shared.Strategy.HI_RES,
unique_element_ids=False,
xml_keep_tags=False,
)
),
]

View File

@ -1 +1 @@
__version__ = "0.15.14-dev1" # pragma: no cover
__version__ = "0.15.14-dev2" # pragma: no cover

View File

@ -5,7 +5,7 @@ from typing import IO, Any, Optional, Sequence
import requests
from unstructured_client import UnstructuredClient
from unstructured_client.models import shared
from unstructured_client.models import operations, shared
from unstructured.documents.elements import Element
from unstructured.logger import logger
@ -83,8 +83,11 @@ def partition_via_api(
)
files = shared.Files(content=file, file_name=metadata_filename)
req = shared.PartitionParameters(files=files, **request_kwargs)
response = sdk.general.partition(req)
req = operations.PartitionRequest(
partition_parameters=shared.PartitionParameters(files=files, **request_kwargs)
)
response = sdk.general.partition(request=req)
if response.status_code == 200:
return elements_from_json(text=response.raw_response.text)