mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-10-12 00:23:35 +00:00
chore: remove unused _partition_via_api
function (#999)
* don't push * fix: clean up code * fix: remove unused _partition_via_api * feat: update changelog * clean up * changelog and version * remove print * remove print * revert test file --------- Co-authored-by: Matt Robinson <mrobinson@unstructured.io> Co-authored-by: Matt Robinson <mrobinson@unstructuredai.io>
This commit is contained in:
parent
bef93aef6e
commit
2888c20a46
@ -1,4 +1,4 @@
|
|||||||
## 0.9.1-dev6
|
## 0.9.1-dev7
|
||||||
|
|
||||||
### Enhancements
|
### Enhancements
|
||||||
|
|
||||||
@ -13,12 +13,12 @@
|
|||||||
|
|
||||||
### Fixes
|
### Fixes
|
||||||
|
|
||||||
|
* Remove unused `_partition_via_api` function
|
||||||
* Fixed emoji bug in `partition_xlsx`.
|
* Fixed emoji bug in `partition_xlsx`.
|
||||||
* Pass file_filename metadata when partitioning file object
|
* Pass `file_filename` metadata when partitioning file object
|
||||||
* Skip ingest test on missing Slack token
|
* Skip ingest test on missing Slack token
|
||||||
* Add Dropbox variables to CI environments
|
* Add Dropbox variables to CI environments
|
||||||
* Adds new element type `EmailAddress` for recognising email address in the text
|
* Adds new element type `EmailAddress` for recognising email address in the text
|
||||||
|
|
||||||
* Simplifies `min_partition` logic; makes partitions falling below the `min_partition`
|
* Simplifies `min_partition` logic; makes partitions falling below the `min_partition`
|
||||||
less likely.
|
less likely.
|
||||||
|
|
||||||
|
@ -1 +1 @@
|
|||||||
__version__ = "0.9.1-dev6" # pragma: no cover
|
__version__ = "0.9.1-dev7" # pragma: no cover
|
||||||
|
@ -1,56 +0,0 @@
|
|||||||
from typing import BinaryIO, List, Mapping, Optional, Tuple, Union
|
|
||||||
from urllib.parse import urlsplit
|
|
||||||
|
|
||||||
import requests # type: ignore
|
|
||||||
|
|
||||||
from unstructured.documents.elements import Element
|
|
||||||
|
|
||||||
|
|
||||||
def _partition_via_api(
|
|
||||||
filename: str = "",
|
|
||||||
file: Optional[Union[BinaryIO, bytes]] = None,
|
|
||||||
url: str = "https://ml.unstructured.io/layout/pdf",
|
|
||||||
token: Optional[str] = None,
|
|
||||||
data: Optional[dict] = None, # NOTE(alan): Remove after different models are handled by routing
|
|
||||||
include_page_breaks: bool = False,
|
|
||||||
) -> List[Element]:
|
|
||||||
"""Use API for partitioning."""
|
|
||||||
if not filename and not file:
|
|
||||||
raise FileNotFoundError("No filename nor file were specified")
|
|
||||||
|
|
||||||
split_url = urlsplit(url)
|
|
||||||
healthcheck_url = f"{split_url.scheme}://{split_url.netloc}/healthcheck"
|
|
||||||
healthcheck_response = requests.models.Response()
|
|
||||||
if not token:
|
|
||||||
healthcheck_response = requests.get(url=healthcheck_url)
|
|
||||||
|
|
||||||
if healthcheck_response.status_code != 200:
|
|
||||||
raise ValueError("endpoint api healthcheck has failed!")
|
|
||||||
|
|
||||||
file_: Mapping[str, Tuple[str, Union[BinaryIO, bytes]]] = {
|
|
||||||
"file": (
|
|
||||||
filename,
|
|
||||||
file if file else open(filename, "rb"), # noqa: SIM115
|
|
||||||
),
|
|
||||||
}
|
|
||||||
response = requests.post(
|
|
||||||
url=url,
|
|
||||||
headers={"Authorization": f"Bearer {token}" if token else ""},
|
|
||||||
files=file_,
|
|
||||||
data=data, # NOTE(alan): Remove after unstructured API is using routing
|
|
||||||
)
|
|
||||||
|
|
||||||
if response.status_code == 200:
|
|
||||||
pages = response.json()["pages"]
|
|
||||||
num_pages = len(pages)
|
|
||||||
elements = []
|
|
||||||
for i, page in enumerate(pages):
|
|
||||||
for element in page["elements"]:
|
|
||||||
elements.append(element)
|
|
||||||
if include_page_breaks and i < num_pages - 1:
|
|
||||||
elements.append({"type": "PageBreak"})
|
|
||||||
|
|
||||||
return elements
|
|
||||||
|
|
||||||
else:
|
|
||||||
raise ValueError(f"response status code = {response.status_code}")
|
|
Loading…
x
Reference in New Issue
Block a user