mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-08-19 14:19:27 +00:00
fix API-297: List parameters incorrectly passed to API requests (#3154)
In two places parameters passed to the python client when using either Ingest workflow and `partition_via_api` function directly we parse the parameters with list values to strings e.g. ```python extract_image_block_types=["image"] -> extract_image_block_types='["image"]' ``` as of now these parameters are parsed incorrectly when given as strings and correctly when given as lists. This PR removes parsing from `PartitionConfig` and `partition_via_api`. --------- Co-authored-by: Filip Knefel <filip@unstructured.io>
This commit is contained in:
parent
2f0400f279
commit
c2065db716
@ -5,6 +5,7 @@
|
|||||||
### Features
|
### Features
|
||||||
|
|
||||||
### Fixes
|
### Fixes
|
||||||
|
* **Fix passing parameters to python-client** - Remove parsing list arguments to strings in passing arguments to python-client in Ingest workflow and `partition_via_api`
|
||||||
|
|
||||||
**table metric bug fix** get_element_level_alignment()now will find all the matched indices in predicted table data instead of only returning the first match in the case of multiple matches for the same gt string.
|
**table metric bug fix** get_element_level_alignment()now will find all the matched indices in predicted table data instead of only returning the first match in the case of multiple matches for the same gt string.
|
||||||
|
|
||||||
|
@ -249,10 +249,12 @@ def test_partition_via_api_pass_list_type_parameters(monkeypatch):
|
|||||||
ANY,
|
ANY,
|
||||||
data=ANY,
|
data=ANY,
|
||||||
files=[
|
files=[
|
||||||
["extract_image_block_types", [None, '["image", "table"]']],
|
["extract_image_block_types[]", [None, "image"]],
|
||||||
|
["extract_image_block_types[]", [None, "table"]],
|
||||||
["files", ANY],
|
["files", ANY],
|
||||||
["languages", [None, '["eng"]']],
|
["languages[]", [None, "eng"]],
|
||||||
["skip_infer_table_types", [None, '["pdf", "docx"]']],
|
["skip_infer_table_types[]", [None, "pdf"]],
|
||||||
|
["skip_infer_table_types[]", [None, "docx"]],
|
||||||
["strategy", [None, "hi_res"]],
|
["strategy", [None, "hi_res"]],
|
||||||
],
|
],
|
||||||
headers=ANY,
|
headers=ANY,
|
||||||
|
@ -574,14 +574,11 @@ class BaseSingleIngestDoc(BaseIngestDoc, IngestDocJsonMixin, ABC):
|
|||||||
|
|
||||||
logger.debug(f"Using remote partition ({endpoint})")
|
logger.debug(f"Using remote partition ({endpoint})")
|
||||||
|
|
||||||
passthrough_partition_kwargs = {
|
|
||||||
k: str(v) for k, v in partition_kwargs.items() if v is not None
|
|
||||||
}
|
|
||||||
elements = partition_via_api(
|
elements = partition_via_api(
|
||||||
filename=str(self.filename),
|
filename=str(self.filename),
|
||||||
api_key=partition_config.api_key,
|
api_key=partition_config.api_key,
|
||||||
api_url=endpoint,
|
api_url=endpoint,
|
||||||
**passthrough_partition_kwargs,
|
**partition_kwargs,
|
||||||
)
|
)
|
||||||
# TODO: add m_data_source_metadata to unstructured-api pipeline_api and then
|
# TODO: add m_data_source_metadata to unstructured-api pipeline_api and then
|
||||||
# pass the stringified json here
|
# pass the stringified json here
|
||||||
|
@ -1,7 +1,6 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import contextlib
|
import contextlib
|
||||||
import json
|
|
||||||
from typing import IO, Optional
|
from typing import IO, Optional
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
@ -84,13 +83,6 @@ def partition_via_api(
|
|||||||
)
|
)
|
||||||
files = shared.Files(content=file, file_name=metadata_filename)
|
files = shared.Files(content=file, file_name=metadata_filename)
|
||||||
|
|
||||||
# NOTE(christine): Converts all list type parameters to JSON formatted strings
|
|
||||||
# (e.g. ["image", "table"] -> '["image", "table"]')
|
|
||||||
# This can be removed if "speakeasy" supports passing list type parameters to FastAPI.
|
|
||||||
for k, v in request_kwargs.items():
|
|
||||||
if isinstance(v, list):
|
|
||||||
request_kwargs[k] = json.dumps(v)
|
|
||||||
|
|
||||||
req = shared.PartitionParameters(files=files, **request_kwargs)
|
req = shared.PartitionParameters(files=files, **request_kwargs)
|
||||||
response = sdk.general.partition(req)
|
response = sdk.general.partition(req)
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user