26 lines
939 B
Python
Raw Normal View History

from unstructured.ingest.connector.elasticsearch import (
ElasticsearchAccessConfig,
SimpleElasticsearchConfig,
)
from unstructured.ingest.interfaces import PartitionConfig, ProcessorConfig, ReadConfig
from unstructured.ingest.runner import ElasticSearchRunner
if __name__ == "__main__":
runner = ElasticSearchRunner(
processor_config=ProcessorConfig(
verbose=True,
output_dir="elasticsearch-ingest-output",
num_processes=2,
),
read_config=ReadConfig(),
partition_config=PartitionConfig(
metadata_exclude=["filename", "file_directory", "metadata.data_source.date_processed"],
),
connector_config=SimpleElasticsearchConfig(
access_config=ElasticsearchAccessConfig(hosts=["http://localhost:9200"]),
index_name="movies",
fields=["ethnicity", "director", "plot"],
),
)
runner.run()