mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-12-13 08:01:37 +00:00
fix: update python SDK syntax for forward compatibility (#3656)
Wrap the `shared.PartitionParameters` usage with `operations.PartitionRequest`. This syntax has been deprecated since v0.23.0 of the SDK, and will be unsupported in v0.26.0.
This commit is contained in:
parent
3bab9d93e6
commit
6428d19e5a
@ -1,4 +1,4 @@
|
||||
## 0.15.14-dev1
|
||||
## 0.15.14-dev2
|
||||
|
||||
### Enhancements
|
||||
|
||||
@ -6,13 +6,14 @@
|
||||
|
||||
### Fixes
|
||||
|
||||
* **Update Python SDK usage in `partition_via_api`.** Make a minor syntax change to ensure forward compatibility with the upcoming 0.26.0 Python SDK.
|
||||
* **Remove "unused" `date_from_file_object` parameter.** As part of simplifying partitioning parameter set, remove `date_from_file_object` parameter. A file object does not have a last-modified date attribute so can never give a useful value. When a file-object is used as the document source (such as in Unstructured API) the last-modified date must come from the `metadata_last_modified` argument.
|
||||
|
||||
## 0.15.13
|
||||
|
||||
### BREAKING CHANGES
|
||||
|
||||
* **Remove dead experimental code.** Unused code in `file_utils.experimantal` and `file_utils.metadata` was removed. These functions were never published in the documentation, but if a client dug these out and used them this removal could break client code.
|
||||
* **Remove dead experimental code.** Unused code in `file_utils.experimental` and `file_utils.metadata` was removed. These functions were never published in the documentation, but if a client dug these out and used them this removal could break client code.
|
||||
|
||||
### Enhancements
|
||||
|
||||
|
||||
@ -16,3 +16,5 @@ urllib3<1.27
|
||||
botocore<1.34.132
|
||||
# TODO: Constriant due to both 8.5.0 and 8.4.0 being installed during pip-compile
|
||||
importlib-metadata>=8.5.0
|
||||
# (austin): Versions below this have a different interface for passing parameters
|
||||
unstructured-client>=0.23.0
|
||||
|
||||
@ -9,6 +9,7 @@ import pytest
|
||||
import requests
|
||||
from unstructured_client.general import General
|
||||
from unstructured_client.models import shared
|
||||
from unstructured_client.models.operations import PartitionRequest
|
||||
from unstructured_client.models.shared import PartitionParameters
|
||||
|
||||
from unstructured.documents.elements import ElementType, NarrativeText
|
||||
@ -55,7 +56,7 @@ def test_partition_via_api_with_file_correctly_calls_sdk(
|
||||
|
||||
# Update the fixture content to match the format passed to partition_via_api
|
||||
modified_expected_call = expected_call_[:]
|
||||
modified_expected_call[1].files.content = f
|
||||
modified_expected_call[1].partition_parameters.files.content = f
|
||||
|
||||
partition_mock_.assert_called_once_with(*modified_expected_call)
|
||||
assert isinstance(partition_mock_.call_args_list[0].args[0], General)
|
||||
@ -76,7 +77,7 @@ def test_partition_via_api_warns_with_file_and_filename_and_calls_sdk(
|
||||
|
||||
# Update the fixture content to match the format passed to partition_via_api
|
||||
modified_expected_call = expected_call_[:]
|
||||
modified_expected_call[1].files.content = f
|
||||
modified_expected_call[1].partition_parameters.files.content = f
|
||||
|
||||
partition_mock_.assert_called_once_with(*modified_expected_call)
|
||||
assert "WARNING" in caplog.text
|
||||
@ -487,36 +488,38 @@ def expected_call_():
|
||||
file_bytes = f.read()
|
||||
return [
|
||||
ANY,
|
||||
PartitionParameters(
|
||||
files=shared.Files(
|
||||
content=file_bytes,
|
||||
file_name=example_doc_path("eml/fake-email.eml"),
|
||||
),
|
||||
chunking_strategy=None,
|
||||
combine_under_n_chars=None,
|
||||
coordinates=False,
|
||||
encoding=None,
|
||||
extract_image_block_types=None,
|
||||
gz_uncompressed_content_type=None,
|
||||
hi_res_model_name=None,
|
||||
include_orig_elements=None,
|
||||
include_page_breaks=False,
|
||||
languages=None,
|
||||
max_characters=None,
|
||||
multipage_sections=True,
|
||||
new_after_n_chars=None,
|
||||
ocr_languages=None,
|
||||
output_format=shared.OutputFormat.APPLICATION_JSON,
|
||||
overlap=0,
|
||||
overlap_all=False,
|
||||
pdf_infer_table_structure=True,
|
||||
similarity_threshold=None,
|
||||
skip_infer_table_types=None,
|
||||
split_pdf_concurrency_level=5,
|
||||
split_pdf_page=True,
|
||||
starting_page_number=None,
|
||||
strategy=shared.Strategy.HI_RES,
|
||||
unique_element_ids=False,
|
||||
xml_keep_tags=False,
|
||||
PartitionRequest(
|
||||
partition_parameters=PartitionParameters(
|
||||
files=shared.Files(
|
||||
content=file_bytes,
|
||||
file_name=example_doc_path("eml/fake-email.eml"),
|
||||
),
|
||||
chunking_strategy=None,
|
||||
combine_under_n_chars=None,
|
||||
coordinates=False,
|
||||
encoding=None,
|
||||
extract_image_block_types=None,
|
||||
gz_uncompressed_content_type=None,
|
||||
hi_res_model_name=None,
|
||||
include_orig_elements=None,
|
||||
include_page_breaks=False,
|
||||
languages=None,
|
||||
max_characters=None,
|
||||
multipage_sections=True,
|
||||
new_after_n_chars=None,
|
||||
ocr_languages=None,
|
||||
output_format=shared.OutputFormat.APPLICATION_JSON,
|
||||
overlap=0,
|
||||
overlap_all=False,
|
||||
pdf_infer_table_structure=True,
|
||||
similarity_threshold=None,
|
||||
skip_infer_table_types=None,
|
||||
split_pdf_concurrency_level=5,
|
||||
split_pdf_page=True,
|
||||
starting_page_number=None,
|
||||
strategy=shared.Strategy.HI_RES,
|
||||
unique_element_ids=False,
|
||||
xml_keep_tags=False,
|
||||
)
|
||||
),
|
||||
]
|
||||
|
||||
@ -1 +1 @@
|
||||
__version__ = "0.15.14-dev1" # pragma: no cover
|
||||
__version__ = "0.15.14-dev2" # pragma: no cover
|
||||
|
||||
@ -5,7 +5,7 @@ from typing import IO, Any, Optional, Sequence
|
||||
|
||||
import requests
|
||||
from unstructured_client import UnstructuredClient
|
||||
from unstructured_client.models import shared
|
||||
from unstructured_client.models import operations, shared
|
||||
|
||||
from unstructured.documents.elements import Element
|
||||
from unstructured.logger import logger
|
||||
@ -83,8 +83,11 @@ def partition_via_api(
|
||||
)
|
||||
files = shared.Files(content=file, file_name=metadata_filename)
|
||||
|
||||
req = shared.PartitionParameters(files=files, **request_kwargs)
|
||||
response = sdk.general.partition(req)
|
||||
req = operations.PartitionRequest(
|
||||
partition_parameters=shared.PartitionParameters(files=files, **request_kwargs)
|
||||
)
|
||||
|
||||
response = sdk.general.partition(request=req)
|
||||
|
||||
if response.status_code == 200:
|
||||
return elements_from_json(text=response.raw_response.text)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user