#!/usr/bin/env bash # Processes all the files from abfs://container1/ in azureunstructured1 account, # using the `unstructured` library. # Structured outputs are stored in azure-ingest-output/ SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) cd "$SCRIPT_DIR"/../../.. || exit 1 PYTHONPATH=. ./unstructured/ingest/main.py \ s3 \ --remote-url s3://utic-dev-tech-fixtures/small-pdf-set/ \ --anonymous \ --output-dir s3-small-batch-output-to-azure \ --num-processes 2 \ --verbose \ --strategy fast \ azure-cognitive-search \ --key "$AZURE_SEARCH_API_KEY" \ --endpoint "$AZURE_SEARCH_ENDPOINT" \ --index utic-test-ingest-fixtures-output