David Potter 9177aa20a8
feature CORE-3985: add Clarifai destination connector (#2633)
Thanks to @mogith-pn from Clarifai we have a new destination connector!

This PR intends to add Clarifai as a ingest destination connector.

Access via CLI and programmatic.
Documentation and Examples.
Integration test script.
2024-03-21 16:36:21 +00:00

115 lines
3.1 KiB
Bash
Executable File

#!/usr/bin/env bash
set -e
DEST_PATH=$(dirname "$(realpath "$0")")
SCRIPT_DIR=$(dirname "$DEST_PATH")
cd "$SCRIPT_DIR"/.. || exit 1
OUTPUT_FOLDER_NAME=clarifai-dest
OUTPUT_DIR=$SCRIPT_DIR/structured-output/$OUTPUT_FOLDER_NAME
WORK_DIR=$SCRIPT_DIR/workdir/$OUTPUT_FOLDER_NAME
max_processes=${MAX_PROCESSES:=$(python3 -c "import os; print(os.cpu_count())")}
if [ -z "$CLARIFAI_API_KEY" ]; then
echo "Skipping Clarifai ingest test because CLARIFAI_API_KEY env var is not set."
exit 0
fi
RANDOM_SUFFIX=$((RANDOM % 100000 + 1))
# Set the variables with default values
USER_ID="unstructured"
APP_ID="test-app-unstructured-$RANDOM_SUFFIX"
# shellcheck disable=SC1091
source "$SCRIPT_DIR"/cleanup.sh
function cleanup {
# Get response code to check if app really exists
response_code=$(curl \
-s -o /dev/null \
-w "%{http_code}" \
--request GET "https://api.clarifai.com/v2/users/$USER_ID/apps/$APP_ID" \
--header "Authorization: Key $CLARIFAI_API_KEY")
# Cleanup (delete) index if it exists
if [ "$response_code" == "200" ]; then
echo ""
echo "deleting clarifai app $APP_ID"
curl --request DELETE "https://api.clarifai.com/v2/users/$USER_ID/apps/$APP_ID" \
-H "Authorization: Key $CLARIFAI_API_KEY"
else
echo "There was an error during deletion of clarifai app $APP_ID, with response code: $response_code. App might not exists in your account."
fi
# Local file cleanup
cleanup_dir "$WORK_DIR"
cleanup_dir "$OUTPUT_DIR"
}
trap cleanup EXIT
echo "Creating Clarifai app $APP_ID"
response_code=$(
curl \
-s -o /dev/null \
-w "%{http_code}" \
--location --request POST "https://api.clarifai.com/v2/users/$USER_ID/apps/" \
--header "Content-Type: application/json" \
--header "Authorization: Key $CLARIFAI_API_KEY" \
--data-raw "{\"apps\": [{\"id\": \"$APP_ID\", \"default_workflow_id\": \"Universal\"}]}"
)
if [ "$response_code" -lt 400 ]; then
echo "App created successfully: $APP_ID"
else
echo "Failed to create app $APP_ID: $response_code"
exit 1
fi
PYTHONPATH=. ./unstructured/ingest/main.py \
local \
--input-path example-docs/book-war-and-peace-1p.txt \
--output-dir "$OUTPUT_DIR" \
--strategy fast \
--chunk-elements \
--num-processes "$max_processes" \
--work-dir "$WORK_DIR" \
--verbose \
clarifai \
--app-id "$APP_ID" \
--user-id "$USER_ID" \
--api-key "$CLARIFAI_API_KEY" \
--batch-size 100
no_of_inputs=0
sleep_time=5
max_retries=10
retry_count=0
while [ "$no_of_inputs" -eq 0 ]; do
echo "checking for no of inputs in clarifai app"
sleep $sleep_time
if [ "$retry_count" -eq "$max_retries" ]; then
echo "Reached maximum retries limit. Exiting..."
break
fi
resp=$(curl \
-s GET "https://api.clarifai.com/v2/users/$USER_ID/apps/$APP_ID/inputs/status" \
-H "Authorization: Key $CLARIFAI_API_KEY")
no_of_inputs=$(echo "$resp" | jq -r '.counts.processed' | sed 's/\x1b\[[0-9;]*m//g')
echo "Processed count: $no_of_inputs"
retry_count=$((retry_count + 1))
done
EXPECTED=8
if [ "$no_of_inputs" -ne "$EXPECTED" ]; then
echo "Number of inputs in the clarifai app $APP_ID is not equal to expected. Test failed."
exit 1
fi