diff --git a/CHANGELOG.md b/CHANGELOG.md index 8fb975686..ffe31b3a5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,4 @@ -## 0.12.1-dev5 +## 0.12.1-dev6 ### Enhancements @@ -6,6 +6,7 @@ * **Add overlap option for chunking.** Add option to overlap chunks. Intra-chunk and inter-chunk overlap are requested separately. Intra-chunk overlap is applied only to the second and later chunks formed by text-splitting an oversized chunk. Inter-chunk overlap may also be specified; this applies overlap between "normal" (not-oversized) chunks. * **Salesforce connector accepts private key path or value.** Salesforce parameter `private-key-file` has been renamed to `private-key`. Private key can be provided as path to file or file contents. * **Add "basic" chunking to ingest CLI.** Add options to ingest CLI allowing access to the new "basic" chunking strategy and overlap options. +* **Make Elasticsearch Destination connector arguments optional.** Elasticsearch Destination connector write settings are made optional and will rely on default values when not specified. ### Features diff --git a/unstructured/__version__.py b/unstructured/__version__.py index 291e14b02..4598bca38 100644 --- a/unstructured/__version__.py +++ b/unstructured/__version__.py @@ -1 +1 @@ -__version__ = "0.12.1-dev5" # pragma: no cover +__version__ = "0.12.1-dev6" # pragma: no cover diff --git a/unstructured/ingest/cli/cmds/elasticsearch.py b/unstructured/ingest/cli/cmds/elasticsearch.py index d10d33062..58e3ec4d6 100644 --- a/unstructured/ingest/cli/cmds/elasticsearch.py +++ b/unstructured/ingest/cli/cmds/elasticsearch.py @@ -92,7 +92,7 @@ class ElasticsearchCliWriteConfig(ElasticsearchWriteConfig, CliConfig): options = [ click.Option( ["--batch-size-bytes"], - required=True, + required=False, default=15_000_000, type=int, help="Size limit (in bytes) for each batch of items to be uploaded. Check" @@ -101,8 +101,8 @@ class ElasticsearchCliWriteConfig(ElasticsearchWriteConfig, CliConfig): ), click.Option( ["--num-processes"], - required=True, - default=2, + required=False, + default=1, type=int, help="Number of processes to be used while uploading content", ), diff --git a/unstructured/ingest/connector/elasticsearch.py b/unstructured/ingest/connector/elasticsearch.py index b9ff3e237..4e0b77073 100644 --- a/unstructured/ingest/connector/elasticsearch.py +++ b/unstructured/ingest/connector/elasticsearch.py @@ -308,8 +308,8 @@ class ElasticsearchSourceConnector(SourceConnectorCleanupMixin, BaseSourceConnec @dataclass class ElasticsearchWriteConfig(WriteConfig): - batch_size_bytes: int - num_processes: int + batch_size_bytes: int = 15_000_000 + num_processes: int = 1 @dataclass