mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-06-27 02:30:08 +00:00

Adds OpenSearch as a source and destination. Since OpenSearch is a fork of Elasticsearch, these connectors rely heavily on inheriting the Elasticsearch connectors whenever possible. - Adds OpenSearch source connector to be able to ingest documents from OpenSearch. - Adds OpenSearch destination connector to be able to ingest documents from any supported source, embed them and write the embeddings / documents into OpenSearch. - Defines an example unstructured elements schema for users to be able to setup their unstructured OpenSearch indexes easily. --------- Co-authored-by: potter-potter <david.potter@gmail.com>
26 lines
915 B
Python
26 lines
915 B
Python
from unstructured.ingest.connector.opensearch import (
|
|
OpenSearchAccessConfig,
|
|
SimpleOpenSearchConfig,
|
|
)
|
|
from unstructured.ingest.interfaces import PartitionConfig, ProcessorConfig, ReadConfig
|
|
from unstructured.ingest.runner import OpenSearchRunner
|
|
|
|
if __name__ == "__main__":
|
|
runner = OpenSearchRunner(
|
|
processor_config=ProcessorConfig(
|
|
verbose=True,
|
|
output_dir="opensearch-ingest-output",
|
|
num_processes=2,
|
|
),
|
|
read_config=ReadConfig(),
|
|
partition_config=PartitionConfig(
|
|
metadata_exclude=["filename", "file_directory", "metadata.data_source.date_processed"],
|
|
),
|
|
connector_config=SimpleOpenSearchConfig(
|
|
access_config=OpenSearchAccessConfig(hosts=["http://localhost:9200"]),
|
|
index_name="movies",
|
|
fields=["ethnicity", "director", "plot"],
|
|
),
|
|
)
|
|
runner.run()
|