mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-10-11 08:03:10 +00:00
chore: remove unused _partition_via_api
function (#999)
* don't push * fix: clean up code * fix: remove unused _partition_via_api * feat: update changelog * clean up * changelog and version * remove print * remove print * revert test file --------- Co-authored-by: Matt Robinson <mrobinson@unstructured.io> Co-authored-by: Matt Robinson <mrobinson@unstructuredai.io>
This commit is contained in:
parent
bef93aef6e
commit
2888c20a46
@ -1,4 +1,4 @@
|
||||
## 0.9.1-dev6
|
||||
## 0.9.1-dev7
|
||||
|
||||
### Enhancements
|
||||
|
||||
@ -13,12 +13,12 @@
|
||||
|
||||
### Fixes
|
||||
|
||||
* Remove unused `_partition_via_api` function
|
||||
* Fixed emoji bug in `partition_xlsx`.
|
||||
* Pass file_filename metadata when partitioning file object
|
||||
* Pass `file_filename` metadata when partitioning file object
|
||||
* Skip ingest test on missing Slack token
|
||||
* Add Dropbox variables to CI environments
|
||||
* Adds new element type `EmailAddress` for recognising email address in the text
|
||||
|
||||
* Simplifies `min_partition` logic; makes partitions falling below the `min_partition`
|
||||
less likely.
|
||||
|
||||
|
@ -1 +1 @@
|
||||
__version__ = "0.9.1-dev6" # pragma: no cover
|
||||
__version__ = "0.9.1-dev7" # pragma: no cover
|
||||
|
@ -1,56 +0,0 @@
|
||||
from typing import BinaryIO, List, Mapping, Optional, Tuple, Union
|
||||
from urllib.parse import urlsplit
|
||||
|
||||
import requests # type: ignore
|
||||
|
||||
from unstructured.documents.elements import Element
|
||||
|
||||
|
||||
def _partition_via_api(
|
||||
filename: str = "",
|
||||
file: Optional[Union[BinaryIO, bytes]] = None,
|
||||
url: str = "https://ml.unstructured.io/layout/pdf",
|
||||
token: Optional[str] = None,
|
||||
data: Optional[dict] = None, # NOTE(alan): Remove after different models are handled by routing
|
||||
include_page_breaks: bool = False,
|
||||
) -> List[Element]:
|
||||
"""Use API for partitioning."""
|
||||
if not filename and not file:
|
||||
raise FileNotFoundError("No filename nor file were specified")
|
||||
|
||||
split_url = urlsplit(url)
|
||||
healthcheck_url = f"{split_url.scheme}://{split_url.netloc}/healthcheck"
|
||||
healthcheck_response = requests.models.Response()
|
||||
if not token:
|
||||
healthcheck_response = requests.get(url=healthcheck_url)
|
||||
|
||||
if healthcheck_response.status_code != 200:
|
||||
raise ValueError("endpoint api healthcheck has failed!")
|
||||
|
||||
file_: Mapping[str, Tuple[str, Union[BinaryIO, bytes]]] = {
|
||||
"file": (
|
||||
filename,
|
||||
file if file else open(filename, "rb"), # noqa: SIM115
|
||||
),
|
||||
}
|
||||
response = requests.post(
|
||||
url=url,
|
||||
headers={"Authorization": f"Bearer {token}" if token else ""},
|
||||
files=file_,
|
||||
data=data, # NOTE(alan): Remove after unstructured API is using routing
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
pages = response.json()["pages"]
|
||||
num_pages = len(pages)
|
||||
elements = []
|
||||
for i, page in enumerate(pages):
|
||||
for element in page["elements"]:
|
||||
elements.append(element)
|
||||
if include_page_breaks and i < num_pages - 1:
|
||||
elements.append({"type": "PageBreak"})
|
||||
|
||||
return elements
|
||||
|
||||
else:
|
||||
raise ValueError(f"response status code = {response.status_code}")
|
Loading…
x
Reference in New Issue
Block a user