diff --git a/CHANGELOG.md b/CHANGELOG.md index 1263dd3fe..f8504fcf6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,9 +1,11 @@ -## 0.7.5-dev1 +## 0.7.5-dev2 ### Enhancements * Adds functionality to sort elements in `partition_pdf` for `fast` strategy * Adds ingest tests with `--fast` strategy on PDF documents +* Adds --api-key to unstructured-ingest + ### Features diff --git a/unstructured/__version__.py b/unstructured/__version__.py index c9bc2f642..05d356e5d 100644 --- a/unstructured/__version__.py +++ b/unstructured/__version__.py @@ -1 +1 @@ -__version__ = "0.7.5-dev1" # pragma: no cover +__version__ = "0.7.5-dev2" # pragma: no cover diff --git a/unstructured/ingest/interfaces.py b/unstructured/ingest/interfaces.py index 9552d3b98..973a7ee8f 100644 --- a/unstructured/ingest/interfaces.py +++ b/unstructured/ingest/interfaces.py @@ -29,6 +29,7 @@ class StandardConnectorConfig: metadata_include: Optional[str] = None partition_by_api: bool = False partition_endpoint: str = "https://api.unstructured.io/general/v0/general" + api_key: str = "" preserve_downloads: bool = False re_download: bool = False @@ -179,9 +180,13 @@ class BaseIngestDoc(ABC): logger.debug(f"Using remote partition ({endpoint})") with open(self.filename, "rb") as f: + headers_dict = {} + if len(self.standard_config.api_key) > 0: + headers_dict["UNSTRUCTURED-API-KEY"] = self.standard_config.api_key response = requests.post( f"{endpoint}", files={"files": (str(self.filename), f)}, + headers=headers_dict, # TODO: add m_data_source_metadata to unstructured-api pipeline_api and then # pass the stringified json here ) diff --git a/unstructured/ingest/main.py b/unstructured/ingest/main.py index 2d4e0dc9e..d795bca40 100755 --- a/unstructured/ingest/main.py +++ b/unstructured/ingest/main.py @@ -151,6 +151,11 @@ class MainProcess: help="The method that will be used to process the documents. " "Default: auto. Other strategies include `fast` and `hi_res`.", ) +@click.option( + "--api-key", + default="", + help="API Key for partition endpoint.", +) @click.option( "--local-input-path", default=None, @@ -443,6 +448,7 @@ def main( partition_by_api, partition_endpoint, partition_strategy, + api_key, local_input_path, local_recursive, local_file_glob, @@ -545,6 +551,7 @@ def main( partition_endpoint=partition_endpoint, preserve_downloads=preserve_downloads, re_download=re_download, + api_key=api_key, ) if remote_url: protocol = urlparse(remote_url).scheme