#!/usr/bin/env bash # Processes a the file from local, chunks, embeds, and writes the results to an MongoDB collection. # Structured outputs are stored in local-to-mongodb/ SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd) cd "$SCRIPT_DIR"/../../.. || exit 1 # As an example we're using the local connector, # however ingesting from any supported source connector is possible. # shellcheck disable=2094 PYTHONPATH=. ./unstructured/ingest/main.py \ local \ --input-path example-docs/book-war-and-peace-1225p.txt \ --output-dir local-to-mongodb \ --strategy fast \ --chunking-strategy by_title \ --embedding-provider "" \ --num-processes 2 \ --verbose \ --work-dir "" \ mongodb \ --uri "" \ --collection "" \ --host "" \ --port "" \ --collection ""