#!/usr/bin/env bash # Processes example-docs/book-war-and-peace-1p.txt/, # embeds the processed document and writes to results to a Chroma collection. # Structured outputs are stored in local-to-chroma/ SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd) cd "$SCRIPT_DIR"/../../.. || exit 1 # As an example we're using the local source connector, # however ingesting from any supported source connector is possible. # shellcheck disable=2094 PYTHONPATH=. ./unstructured/ingest/main.py \ local \ --input-path example-docs/book-war-and-peace-1p.txt \ --output-dir local-to-chroma \ --strategy fast \ --chunking-strategy by_title \ --embedding-provider "" \ --num-processes 2 \ --verbose \ --work-dir "" \ chroma \ --path "" \ --settings "" \ --tenant "" \ --database "" \ --host "" \ --port "" \ --ssl "" \ --headers "" \ --collection-name "" \ --batch-size ""