fix: update python SDK syntax for forward compatibility (#3656)

Wrap the `shared.PartitionParameters` usage with
`operations.PartitionRequest`. This syntax has been deprecated since
v0.23.0 of the SDK, and will be unsupported in v0.26.0.
This commit is contained in:
Austin Walker 2024-09-24 12:37:38 -04:00 committed by GitHub
parent 3bab9d93e6
commit 6428d19e5a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 48 additions and 39 deletions

View File

@ -1,4 +1,4 @@
## 0.15.14-dev1 ## 0.15.14-dev2
### Enhancements ### Enhancements
@ -6,13 +6,14 @@
### Fixes ### Fixes
* **Update Python SDK usage in `partition_via_api`.** Make a minor syntax change to ensure forward compatibility with the upcoming 0.26.0 Python SDK.
* **Remove "unused" `date_from_file_object` parameter.** As part of simplifying partitioning parameter set, remove `date_from_file_object` parameter. A file object does not have a last-modified date attribute so can never give a useful value. When a file-object is used as the document source (such as in Unstructured API) the last-modified date must come from the `metadata_last_modified` argument. * **Remove "unused" `date_from_file_object` parameter.** As part of simplifying partitioning parameter set, remove `date_from_file_object` parameter. A file object does not have a last-modified date attribute so can never give a useful value. When a file-object is used as the document source (such as in Unstructured API) the last-modified date must come from the `metadata_last_modified` argument.
## 0.15.13 ## 0.15.13
### BREAKING CHANGES ### BREAKING CHANGES
* **Remove dead experimental code.** Unused code in `file_utils.experimantal` and `file_utils.metadata` was removed. These functions were never published in the documentation, but if a client dug these out and used them this removal could break client code. * **Remove dead experimental code.** Unused code in `file_utils.experimental` and `file_utils.metadata` was removed. These functions were never published in the documentation, but if a client dug these out and used them this removal could break client code.
### Enhancements ### Enhancements

View File

@ -16,3 +16,5 @@ urllib3<1.27
botocore<1.34.132 botocore<1.34.132
# TODO: Constriant due to both 8.5.0 and 8.4.0 being installed during pip-compile # TODO: Constriant due to both 8.5.0 and 8.4.0 being installed during pip-compile
importlib-metadata>=8.5.0 importlib-metadata>=8.5.0
# (austin): Versions below this have a different interface for passing parameters
unstructured-client>=0.23.0

View File

@ -9,6 +9,7 @@ import pytest
import requests import requests
from unstructured_client.general import General from unstructured_client.general import General
from unstructured_client.models import shared from unstructured_client.models import shared
from unstructured_client.models.operations import PartitionRequest
from unstructured_client.models.shared import PartitionParameters from unstructured_client.models.shared import PartitionParameters
from unstructured.documents.elements import ElementType, NarrativeText from unstructured.documents.elements import ElementType, NarrativeText
@ -55,7 +56,7 @@ def test_partition_via_api_with_file_correctly_calls_sdk(
# Update the fixture content to match the format passed to partition_via_api # Update the fixture content to match the format passed to partition_via_api
modified_expected_call = expected_call_[:] modified_expected_call = expected_call_[:]
modified_expected_call[1].files.content = f modified_expected_call[1].partition_parameters.files.content = f
partition_mock_.assert_called_once_with(*modified_expected_call) partition_mock_.assert_called_once_with(*modified_expected_call)
assert isinstance(partition_mock_.call_args_list[0].args[0], General) assert isinstance(partition_mock_.call_args_list[0].args[0], General)
@ -76,7 +77,7 @@ def test_partition_via_api_warns_with_file_and_filename_and_calls_sdk(
# Update the fixture content to match the format passed to partition_via_api # Update the fixture content to match the format passed to partition_via_api
modified_expected_call = expected_call_[:] modified_expected_call = expected_call_[:]
modified_expected_call[1].files.content = f modified_expected_call[1].partition_parameters.files.content = f
partition_mock_.assert_called_once_with(*modified_expected_call) partition_mock_.assert_called_once_with(*modified_expected_call)
assert "WARNING" in caplog.text assert "WARNING" in caplog.text
@ -487,36 +488,38 @@ def expected_call_():
file_bytes = f.read() file_bytes = f.read()
return [ return [
ANY, ANY,
PartitionParameters( PartitionRequest(
files=shared.Files( partition_parameters=PartitionParameters(
content=file_bytes, files=shared.Files(
file_name=example_doc_path("eml/fake-email.eml"), content=file_bytes,
), file_name=example_doc_path("eml/fake-email.eml"),
chunking_strategy=None, ),
combine_under_n_chars=None, chunking_strategy=None,
coordinates=False, combine_under_n_chars=None,
encoding=None, coordinates=False,
extract_image_block_types=None, encoding=None,
gz_uncompressed_content_type=None, extract_image_block_types=None,
hi_res_model_name=None, gz_uncompressed_content_type=None,
include_orig_elements=None, hi_res_model_name=None,
include_page_breaks=False, include_orig_elements=None,
languages=None, include_page_breaks=False,
max_characters=None, languages=None,
multipage_sections=True, max_characters=None,
new_after_n_chars=None, multipage_sections=True,
ocr_languages=None, new_after_n_chars=None,
output_format=shared.OutputFormat.APPLICATION_JSON, ocr_languages=None,
overlap=0, output_format=shared.OutputFormat.APPLICATION_JSON,
overlap_all=False, overlap=0,
pdf_infer_table_structure=True, overlap_all=False,
similarity_threshold=None, pdf_infer_table_structure=True,
skip_infer_table_types=None, similarity_threshold=None,
split_pdf_concurrency_level=5, skip_infer_table_types=None,
split_pdf_page=True, split_pdf_concurrency_level=5,
starting_page_number=None, split_pdf_page=True,
strategy=shared.Strategy.HI_RES, starting_page_number=None,
unique_element_ids=False, strategy=shared.Strategy.HI_RES,
xml_keep_tags=False, unique_element_ids=False,
xml_keep_tags=False,
)
), ),
] ]

View File

@ -1 +1 @@
__version__ = "0.15.14-dev1" # pragma: no cover __version__ = "0.15.14-dev2" # pragma: no cover

View File

@ -5,7 +5,7 @@ from typing import IO, Any, Optional, Sequence
import requests import requests
from unstructured_client import UnstructuredClient from unstructured_client import UnstructuredClient
from unstructured_client.models import shared from unstructured_client.models import operations, shared
from unstructured.documents.elements import Element from unstructured.documents.elements import Element
from unstructured.logger import logger from unstructured.logger import logger
@ -83,8 +83,11 @@ def partition_via_api(
) )
files = shared.Files(content=file, file_name=metadata_filename) files = shared.Files(content=file, file_name=metadata_filename)
req = shared.PartitionParameters(files=files, **request_kwargs) req = operations.PartitionRequest(
response = sdk.general.partition(req) partition_parameters=shared.PartitionParameters(files=files, **request_kwargs)
)
response = sdk.general.partition(request=req)
if response.status_code == 200: if response.status_code == 200:
return elements_from_json(text=response.raw_response.text) return elements_from_json(text=response.raw_response.text)