feat: add --api-key parameter to unstructured-ingest (#644)

This commit is contained in:
kravetsmic 2023-06-14 08:05:18 +03:00 committed by GitHub
parent 9443bd40e2
commit 8258dbb25f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 16 additions and 2 deletions

View File

@ -1,9 +1,11 @@
## 0.7.5-dev1
## 0.7.5-dev2
### Enhancements
* Adds functionality to sort elements in `partition_pdf` for `fast` strategy
* Adds ingest tests with `--fast` strategy on PDF documents
* Adds --api-key to unstructured-ingest
### Features

View File

@ -1 +1 @@
__version__ = "0.7.5-dev1" # pragma: no cover
__version__ = "0.7.5-dev2" # pragma: no cover

View File

@ -29,6 +29,7 @@ class StandardConnectorConfig:
metadata_include: Optional[str] = None
partition_by_api: bool = False
partition_endpoint: str = "https://api.unstructured.io/general/v0/general"
api_key: str = ""
preserve_downloads: bool = False
re_download: bool = False
@ -179,9 +180,13 @@ class BaseIngestDoc(ABC):
logger.debug(f"Using remote partition ({endpoint})")
with open(self.filename, "rb") as f:
headers_dict = {}
if len(self.standard_config.api_key) > 0:
headers_dict["UNSTRUCTURED-API-KEY"] = self.standard_config.api_key
response = requests.post(
f"{endpoint}",
files={"files": (str(self.filename), f)},
headers=headers_dict,
# TODO: add m_data_source_metadata to unstructured-api pipeline_api and then
# pass the stringified json here
)

View File

@ -151,6 +151,11 @@ class MainProcess:
help="The method that will be used to process the documents. "
"Default: auto. Other strategies include `fast` and `hi_res`.",
)
@click.option(
"--api-key",
default="",
help="API Key for partition endpoint.",
)
@click.option(
"--local-input-path",
default=None,
@ -443,6 +448,7 @@ def main(
partition_by_api,
partition_endpoint,
partition_strategy,
api_key,
local_input_path,
local_recursive,
local_file_glob,
@ -545,6 +551,7 @@ def main(
partition_endpoint=partition_endpoint,
preserve_downloads=preserve_downloads,
re_download=re_download,
api_key=api_key,
)
if remote_url:
protocol = urlparse(remote_url).scheme