mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-10-01 11:21:13 +00:00
feat: add logging to ingest CLI for tests being skipped at the end (#2174)
### Description Often times there are tests being skipped either due to missing env vars or explicitly defined in the base script but these get lost in the logs. This PR updates the scripts to leverage a custom error code if being skipped due to missing env vars and this custom error code is being caught by the base script and logs all files being skipped to a file. At the end of the script, this file gets logged in the CI output.
This commit is contained in:
parent
0aae1faa54
commit
b951d73a9b
1
.gitignore
vendored
1
.gitignore
vendored
@ -139,6 +139,7 @@ dmypy.json
|
|||||||
/structured-output
|
/structured-output
|
||||||
test_unstructured_ingest/workdir/
|
test_unstructured_ingest/workdir/
|
||||||
test_unstructured_ingest/delta-table-dest/
|
test_unstructured_ingest/delta-table-dest/
|
||||||
|
test_unstructured_ingest/skipped-files.txt
|
||||||
|
|
||||||
# suggested ingest mirror directory
|
# suggested ingest mirror directory
|
||||||
/mirror
|
/mirror
|
||||||
|
@ -18,7 +18,7 @@ API_VERSION=2023-07-01-Preview
|
|||||||
|
|
||||||
if [ -z "$AZURE_SEARCH_ENDPOINT" ] && [ -z "$AZURE_SEARCH_API_KEY" ]; then
|
if [ -z "$AZURE_SEARCH_ENDPOINT" ] && [ -z "$AZURE_SEARCH_API_KEY" ]; then
|
||||||
echo "Skipping Azure Cognitive Search ingest test because neither AZURE_SEARCH_ENDPOINT nor AZURE_SEARCH_API_KEY env vars are set."
|
echo "Skipping Azure Cognitive Search ingest test because neither AZURE_SEARCH_ENDPOINT nor AZURE_SEARCH_API_KEY env vars are set."
|
||||||
exit 0
|
exit 8
|
||||||
fi
|
fi
|
||||||
# shellcheck disable=SC1091
|
# shellcheck disable=SC1091
|
||||||
source "$SCRIPT_DIR"/cleanup.sh
|
source "$SCRIPT_DIR"/cleanup.sh
|
||||||
|
@ -13,7 +13,7 @@ max_processes=${MAX_PROCESSES:=$(python3 -c "import os; print(os.cpu_count())")}
|
|||||||
|
|
||||||
if [ -z "$AZURE_DEST_CONNECTION_STR" ]; then
|
if [ -z "$AZURE_DEST_CONNECTION_STR" ]; then
|
||||||
echo "Skipping Azure destination ingest test because the AZURE_DEST_CONNECTION_STR env var is not set."
|
echo "Skipping Azure destination ingest test because the AZURE_DEST_CONNECTION_STR env var is not set."
|
||||||
exit 0
|
exit 8
|
||||||
fi
|
fi
|
||||||
|
|
||||||
CONTAINER=utic-ingest-test-fixtures-output
|
CONTAINER=utic-ingest-test-fixtures-output
|
||||||
|
@ -16,7 +16,7 @@ CI=${CI:-"false"}
|
|||||||
if [ -z "$DROPBOX_APP_KEY" ] || [ -z "$DROPBOX_APP_SECRET" ] || [ -z "$DROPBOX_REFRESH_TOKEN" ]; then
|
if [ -z "$DROPBOX_APP_KEY" ] || [ -z "$DROPBOX_APP_SECRET" ] || [ -z "$DROPBOX_REFRESH_TOKEN" ]; then
|
||||||
echo "Skipping Dropbox ingest test because one or more of these env vars is not set:"
|
echo "Skipping Dropbox ingest test because one or more of these env vars is not set:"
|
||||||
echo "DROPBOX_APP_KEY, DROPBOX_APP_SECRET, DROPBOX_REFRESH_TOKEN"
|
echo "DROPBOX_APP_KEY, DROPBOX_APP_SECRET, DROPBOX_REFRESH_TOKEN"
|
||||||
exit 0
|
exit 8
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Get a new access token from Dropbox
|
# Get a new access token from Dropbox
|
||||||
|
@ -17,7 +17,7 @@ CI=${CI:-"false"}
|
|||||||
|
|
||||||
if [ -z "$GCP_INGEST_SERVICE_KEY" ]; then
|
if [ -z "$GCP_INGEST_SERVICE_KEY" ]; then
|
||||||
echo "Skipping Google Drive ingest test because the GCP_INGEST_SERVICE_KEY env var is not set."
|
echo "Skipping Google Drive ingest test because the GCP_INGEST_SERVICE_KEY env var is not set."
|
||||||
exit 0
|
exit 8
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Create temporary service key file
|
# Create temporary service key file
|
||||||
|
@ -15,7 +15,7 @@ CI=${CI:-"false"}
|
|||||||
|
|
||||||
if [ -z "$MONGODB_URI" ] && [ -z "$MONGODB_DATABASE_NAME" ]; then
|
if [ -z "$MONGODB_URI" ] && [ -z "$MONGODB_DATABASE_NAME" ]; then
|
||||||
echo "Skipping MongoDB destination ingest test because the MONGODB_URI and MONGODB_DATABASE_NAME env var are not set."
|
echo "Skipping MongoDB destination ingest test because the MONGODB_URI and MONGODB_DATABASE_NAME env var are not set."
|
||||||
exit 0
|
exit 8
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|
||||||
|
@ -18,22 +18,22 @@ CI=${CI:-"false"}
|
|||||||
|
|
||||||
if [ -z "$SHAREPOINT_CLIENT_ID" ] || [ -z "$SHAREPOINT_CRED" ] ; then
|
if [ -z "$SHAREPOINT_CLIENT_ID" ] || [ -z "$SHAREPOINT_CRED" ] ; then
|
||||||
echo "Skipping Sharepoint ingest test because the SHAREPOINT_CLIENT_ID or SHAREPOINT_CRED env var is not set."
|
echo "Skipping Sharepoint ingest test because the SHAREPOINT_CLIENT_ID or SHAREPOINT_CRED env var is not set."
|
||||||
exit 0
|
exit 8
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ -z "$SHAREPOINT_PERMISSIONS_APP_ID" ] || [ -z "$SHAREPOINT_PERMISSIONS_APP_CRED" ] || [ -z "$SHAREPOINT_PERMISSIONS_TENANT" ] ; then
|
if [ -z "$SHAREPOINT_PERMISSIONS_APP_ID" ] || [ -z "$SHAREPOINT_PERMISSIONS_APP_CRED" ] || [ -z "$SHAREPOINT_PERMISSIONS_TENANT" ] ; then
|
||||||
echo "Skipping Sharepoint ingest test because the SHAREPOINT_PERMISSIONS_APP_ID, SHAREPOINT_PERMISSIONS_APP_CRED, or SHAREPOINT_PERMISSIONS_TENANT env var is not set."
|
echo "Skipping Sharepoint ingest test because the SHAREPOINT_PERMISSIONS_APP_ID, SHAREPOINT_PERMISSIONS_APP_CRED, or SHAREPOINT_PERMISSIONS_TENANT env var is not set."
|
||||||
exit 0
|
exit 8
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ -z "$OPENAI_API_KEY" ]; then
|
if [ -z "$OPENAI_API_KEY" ]; then
|
||||||
echo "Skipping Sharepoint embedding ingest test because the OPENAI_API_KEY env var is not set."
|
echo "Skipping Sharepoint embedding ingest test because the OPENAI_API_KEY env var is not set."
|
||||||
exit 0
|
exit 8
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ -z "$AZURE_SEARCH_ENDPOINT" ] && [ -z "$AZURE_SEARCH_API_KEY" ]; then
|
if [ -z "$AZURE_SEARCH_ENDPOINT" ] && [ -z "$AZURE_SEARCH_API_KEY" ]; then
|
||||||
echo "Skipping Sharepoint Azure Cognitive Search ingest test because neither AZURE_SEARCH_ENDPOINT nor AZURE_SEARCH_API_KEY env vars are set."
|
echo "Skipping Sharepoint Azure Cognitive Search ingest test because neither AZURE_SEARCH_ENDPOINT nor AZURE_SEARCH_API_KEY env vars are set."
|
||||||
exit 0
|
exit 8
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# shellcheck disable=SC1091
|
# shellcheck disable=SC1091
|
||||||
|
@ -4,7 +4,7 @@ set -e
|
|||||||
|
|
||||||
if [ -z "$UNS_API_KEY" ]; then
|
if [ -z "$UNS_API_KEY" ]; then
|
||||||
echo "Skipping ingest test against api because the UNS_API_KEY env var is not set."
|
echo "Skipping ingest test against api because the UNS_API_KEY env var is not set."
|
||||||
exit 0
|
exit 8
|
||||||
fi
|
fi
|
||||||
SRC_PATH=$(dirname "$(realpath "$0")")
|
SRC_PATH=$(dirname "$(realpath "$0")")
|
||||||
SCRIPT_DIR=$(dirname "$SRC_PATH")
|
SCRIPT_DIR=$(dirname "$SRC_PATH")
|
||||||
|
@ -32,7 +32,7 @@ VARIED_DATA_BASE_ID_2="appJ43QmP8I17zu88"
|
|||||||
|
|
||||||
if [ -z "$AIRTABLE_PERSONAL_ACCESS_TOKEN" ]; then
|
if [ -z "$AIRTABLE_PERSONAL_ACCESS_TOKEN" ]; then
|
||||||
echo "Skipping Airtable ingest test because the AIRTABLE_PERSONAL_ACCESS_TOKEN is not set."
|
echo "Skipping Airtable ingest test because the AIRTABLE_PERSONAL_ACCESS_TOKEN is not set."
|
||||||
exit 0
|
exit 8
|
||||||
fi
|
fi
|
||||||
|
|
||||||
RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py}
|
RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py}
|
||||||
|
@ -30,7 +30,7 @@ trap cleanup EXIT
|
|||||||
|
|
||||||
if [ -z "$AIRTABLE_PERSONAL_ACCESS_TOKEN" ]; then
|
if [ -z "$AIRTABLE_PERSONAL_ACCESS_TOKEN" ]; then
|
||||||
echo "Skipping Airtable ingest test because the AIRTABLE_PERSONAL_ACCESS_TOKEN is not set."
|
echo "Skipping Airtable ingest test because the AIRTABLE_PERSONAL_ACCESS_TOKEN is not set."
|
||||||
exit 0
|
exit 8
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Provides component IDs such as LARGE_TEST_LIST_OF_PATHS,
|
# Provides component IDs such as LARGE_TEST_LIST_OF_PATHS,
|
||||||
|
@ -29,7 +29,7 @@ trap cleanup EXIT
|
|||||||
|
|
||||||
if [ -z "$BOX_APP_CONFIG" ] && [ -z "$BOX_APP_CONFIG_PATH" ]; then
|
if [ -z "$BOX_APP_CONFIG" ] && [ -z "$BOX_APP_CONFIG_PATH" ]; then
|
||||||
echo "Skipping Box ingest test because neither BOX_APP_CONFIG nor BOX_APP_CONFIG_PATH env vars are set."
|
echo "Skipping Box ingest test because neither BOX_APP_CONFIG nor BOX_APP_CONFIG_PATH env vars are set."
|
||||||
exit 0
|
exit 8
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ -z "$BOX_APP_CONFIG_PATH" ]; then
|
if [ -z "$BOX_APP_CONFIG_PATH" ]; then
|
||||||
|
@ -28,7 +28,7 @@ trap cleanup EXIT
|
|||||||
|
|
||||||
if [ -z "$CONFLUENCE_USER_EMAIL" ] || [ -z "$CONFLUENCE_API_TOKEN" ]; then
|
if [ -z "$CONFLUENCE_USER_EMAIL" ] || [ -z "$CONFLUENCE_API_TOKEN" ]; then
|
||||||
echo "Skipping Confluence ingest test because the CONFLUENCE_USER_EMAIL or CONFLUENCE_API_TOKEN env var is not set."
|
echo "Skipping Confluence ingest test because the CONFLUENCE_USER_EMAIL or CONFLUENCE_API_TOKEN env var is not set."
|
||||||
exit 0
|
exit 8
|
||||||
fi
|
fi
|
||||||
|
|
||||||
RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py}
|
RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py}
|
||||||
|
@ -30,7 +30,7 @@ trap cleanup EXIT
|
|||||||
|
|
||||||
if [ -z "$CONFLUENCE_USER_EMAIL" ] || [ -z "$CONFLUENCE_API_TOKEN" ]; then
|
if [ -z "$CONFLUENCE_USER_EMAIL" ] || [ -z "$CONFLUENCE_API_TOKEN" ]; then
|
||||||
echo "Skipping Confluence ingest test because the CONFLUENCE_USER_EMAIL or CONFLUENCE_API_TOKEN env var is not set."
|
echo "Skipping Confluence ingest test because the CONFLUENCE_USER_EMAIL or CONFLUENCE_API_TOKEN env var is not set."
|
||||||
exit 0
|
exit 8
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# The test checks the scenario where --confluence-list-of-spaces and --confluence-num-of-spaces
|
# The test checks the scenario where --confluence-list-of-spaces and --confluence-num-of-spaces
|
||||||
|
@ -15,7 +15,7 @@ CI=${CI:-"false"}
|
|||||||
|
|
||||||
if [ -z "$AWS_ACCESS_KEY_ID" ] && [ -z "$AWS_SECRET_ACCESS_KEY" ]; then
|
if [ -z "$AWS_ACCESS_KEY_ID" ] && [ -z "$AWS_SECRET_ACCESS_KEY" ]; then
|
||||||
echo "Skipping Delta Table ingest test because either AWS_ACCESS_KEY_ID or AWS_SECRET_ACCESS_KEY env var was not set."
|
echo "Skipping Delta Table ingest test because either AWS_ACCESS_KEY_ID or AWS_SECRET_ACCESS_KEY env var was not set."
|
||||||
exit 0
|
exit 8
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# shellcheck disable=SC1091
|
# shellcheck disable=SC1091
|
||||||
|
@ -26,7 +26,7 @@ trap cleanup EXIT
|
|||||||
|
|
||||||
if [ -z "$DISCORD_TOKEN" ]; then
|
if [ -z "$DISCORD_TOKEN" ]; then
|
||||||
echo "Skipping Discord ingest test because the DISCORD_TOKEN env var is not set."
|
echo "Skipping Discord ingest test because the DISCORD_TOKEN env var is not set."
|
||||||
exit 0
|
exit 8
|
||||||
fi
|
fi
|
||||||
|
|
||||||
RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py}
|
RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py}
|
||||||
|
@ -27,7 +27,7 @@ trap cleanup EXIT
|
|||||||
if [ -z "$DROPBOX_APP_KEY" ] || [ -z "$DROPBOX_APP_SECRET" ] || [ -z "$DROPBOX_REFRESH_TOKEN" ]; then
|
if [ -z "$DROPBOX_APP_KEY" ] || [ -z "$DROPBOX_APP_SECRET" ] || [ -z "$DROPBOX_REFRESH_TOKEN" ]; then
|
||||||
echo "Skipping Dropbox ingest test because one or more of these env vars is not set:"
|
echo "Skipping Dropbox ingest test because one or more of these env vars is not set:"
|
||||||
echo "DROPBOX_APP_KEY, DROPBOX_APP_SECRET, DROPBOX_REFRESH_TOKEN"
|
echo "DROPBOX_APP_KEY, DROPBOX_APP_SECRET, DROPBOX_REFRESH_TOKEN"
|
||||||
exit 0
|
exit 8
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Get a new access token from Dropbox
|
# Get a new access token from Dropbox
|
||||||
|
@ -27,7 +27,7 @@ trap cleanup EXIT
|
|||||||
|
|
||||||
if [ -z "$GCP_INGEST_SERVICE_KEY" ]; then
|
if [ -z "$GCP_INGEST_SERVICE_KEY" ]; then
|
||||||
echo "Skipping Google Drive ingest test because the GCP_INGEST_SERVICE_KEY env var is not set."
|
echo "Skipping Google Drive ingest test because the GCP_INGEST_SERVICE_KEY env var is not set."
|
||||||
exit 0
|
exit 8
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Create temporary service key file
|
# Create temporary service key file
|
||||||
|
@ -28,7 +28,7 @@ trap cleanup EXIT
|
|||||||
if [ -z "$GCP_INGEST_SERVICE_KEY" ]; then
|
if [ -z "$GCP_INGEST_SERVICE_KEY" ]; then
|
||||||
echo "Skipping Google Drive ingest test because the GCP_INGEST_SERVICE_KEY env var is not set."
|
echo "Skipping Google Drive ingest test because the GCP_INGEST_SERVICE_KEY env var is not set."
|
||||||
echo "The Google Drive test content can be found at https://drive.google.com/drive/folders/1OQZ66OHBE30rNsNa7dweGLfRmXvkT_jr"
|
echo "The Google Drive test content can be found at https://drive.google.com/drive/folders/1OQZ66OHBE30rNsNa7dweGLfRmXvkT_jr"
|
||||||
exit 0
|
exit 8
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Create temporary service key file
|
# Create temporary service key file
|
||||||
|
@ -26,7 +26,7 @@ trap cleanup EXIT
|
|||||||
|
|
||||||
if [ -z "$HUBSPOT_API_TOKEN" ]; then
|
if [ -z "$HUBSPOT_API_TOKEN" ]; then
|
||||||
echo "Skipping HubSpot ingest test because the HUBSPOT_API_TOKEN env var is not set."
|
echo "Skipping HubSpot ingest test because the HUBSPOT_API_TOKEN env var is not set."
|
||||||
exit 0
|
exit 8
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Required arguments:
|
# Required arguments:
|
||||||
|
@ -27,7 +27,7 @@ trap cleanup EXIT
|
|||||||
|
|
||||||
if [ -z "$JIRA_INGEST_USER_EMAIL" ] || [ -z "$JIRA_INGEST_API_TOKEN" ]; then
|
if [ -z "$JIRA_INGEST_USER_EMAIL" ] || [ -z "$JIRA_INGEST_API_TOKEN" ]; then
|
||||||
echo "Skipping Jira ingest test because the JIRA_INGEST_USER_EMAIL or JIRA_INGEST_API_TOKEN env var is not set."
|
echo "Skipping Jira ingest test because the JIRA_INGEST_USER_EMAIL or JIRA_INGEST_API_TOKEN env var is not set."
|
||||||
exit 0
|
exit 8
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Required arguments:
|
# Required arguments:
|
||||||
|
@ -26,7 +26,7 @@ trap cleanup EXIT
|
|||||||
|
|
||||||
if [ -z "$NOTION_API_KEY" ]; then
|
if [ -z "$NOTION_API_KEY" ]; then
|
||||||
echo "Skipping Notion ingest test because the NOTION_API_KEY env var is not set."
|
echo "Skipping Notion ingest test because the NOTION_API_KEY env var is not set."
|
||||||
exit 0
|
exit 8
|
||||||
fi
|
fi
|
||||||
|
|
||||||
RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py}
|
RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py}
|
||||||
|
@ -26,7 +26,7 @@ trap cleanup EXIT
|
|||||||
|
|
||||||
if [ -z "$MS_CLIENT_ID" ] || [ -z "$MS_CLIENT_CRED" ] || [ -z "$MS_USER_PNAME" ]; then
|
if [ -z "$MS_CLIENT_ID" ] || [ -z "$MS_CLIENT_CRED" ] || [ -z "$MS_USER_PNAME" ]; then
|
||||||
echo "Skipping OneDrive ingest test because the MS_CLIENT_ID, MS_CLIENT_CRED, MS_USER_PNAME env var is not set."
|
echo "Skipping OneDrive ingest test because the MS_CLIENT_ID, MS_CLIENT_CRED, MS_USER_PNAME env var is not set."
|
||||||
exit 0
|
exit 8
|
||||||
fi
|
fi
|
||||||
|
|
||||||
RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py}
|
RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py}
|
||||||
|
@ -26,7 +26,7 @@ trap cleanup EXIT
|
|||||||
|
|
||||||
if [ -z "$MS_CLIENT_ID" ] || [ -z "$MS_CLIENT_CRED" ] || [ -z "$MS_TENANT_ID" ] || [ -z "$MS_USER_EMAIL" ]; then
|
if [ -z "$MS_CLIENT_ID" ] || [ -z "$MS_CLIENT_CRED" ] || [ -z "$MS_TENANT_ID" ] || [ -z "$MS_USER_EMAIL" ]; then
|
||||||
echo "Skipping Outlook ingest test because the MS_CLIENT_ID or MS_CLIENT_CRED or MS_TENANT_ID or MS_USER_EMAIL env var is not set."
|
echo "Skipping Outlook ingest test because the MS_CLIENT_ID or MS_CLIENT_CRED or MS_TENANT_ID or MS_USER_EMAIL env var is not set."
|
||||||
exit 0
|
exit 8
|
||||||
fi
|
fi
|
||||||
|
|
||||||
RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py}
|
RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py}
|
||||||
|
@ -29,12 +29,12 @@ trap cleanup EXIT
|
|||||||
|
|
||||||
if [ -z "$SALESFORCE_USERNAME" ] || [ -z "$SALESFORCE_CONSUMER_KEY" ]; then
|
if [ -z "$SALESFORCE_USERNAME" ] || [ -z "$SALESFORCE_CONSUMER_KEY" ]; then
|
||||||
echo "Skipping Salesforce ingest test because SALESFORCE_USERNAME and SALESFORCE_CONSUMER_KEY env vars not set"
|
echo "Skipping Salesforce ingest test because SALESFORCE_USERNAME and SALESFORCE_CONSUMER_KEY env vars not set"
|
||||||
exit 0
|
exit 8
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ -z "$SALESFORCE_PRIVATE_KEY" ] && [ -z "$SALESFORCE_PRIVATE_KEY_PATH" ]; then
|
if [ -z "$SALESFORCE_PRIVATE_KEY" ] && [ -z "$SALESFORCE_PRIVATE_KEY_PATH" ]; then
|
||||||
echo "Skipping Salesforce ingest test because neither SALESFORCE_PRIVATE_KEY nor SALESFORCE_PRIVATE_KEY_PATH env vars are set."
|
echo "Skipping Salesforce ingest test because neither SALESFORCE_PRIVATE_KEY nor SALESFORCE_PRIVATE_KEY_PATH env vars are set."
|
||||||
exit 0
|
exit 8
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ -z "$SALESFORCE_PRIVATE_KEY_PATH" ]; then
|
if [ -z "$SALESFORCE_PRIVATE_KEY_PATH" ]; then
|
||||||
|
@ -26,12 +26,12 @@ trap cleanup EXIT
|
|||||||
|
|
||||||
if [ -z "$SHAREPOINT_CLIENT_ID" ] || [ -z "$SHAREPOINT_CRED" ]; then
|
if [ -z "$SHAREPOINT_CLIENT_ID" ] || [ -z "$SHAREPOINT_CRED" ]; then
|
||||||
echo "Skipping Sharepoint ingest test because the SHAREPOINT_CLIENT_ID or SHAREPOINT_CRED env var is not set."
|
echo "Skipping Sharepoint ingest test because the SHAREPOINT_CLIENT_ID or SHAREPOINT_CRED env var is not set."
|
||||||
exit 0
|
exit 8
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ -z "$SHAREPOINT_PERMISSIONS_APP_ID" ] || [ -z "$SHAREPOINT_PERMISSIONS_APP_CRED" ] || [ -z "$SHAREPOINT_PERMISSIONS_TENANT" ] ; then
|
if [ -z "$SHAREPOINT_PERMISSIONS_APP_ID" ] || [ -z "$SHAREPOINT_PERMISSIONS_APP_CRED" ] || [ -z "$SHAREPOINT_PERMISSIONS_TENANT" ] ; then
|
||||||
echo "Skipping Sharepoint ingest test because the SHAREPOINT_PERMISSIONS_APP_ID, SHAREPOINT_PERMISSIONS_APP_CRED, or SHAREPOINT_PERMISSIONS_TENANT env var is not set."
|
echo "Skipping Sharepoint ingest test because the SHAREPOINT_PERMISSIONS_APP_ID, SHAREPOINT_PERMISSIONS_APP_CRED, or SHAREPOINT_PERMISSIONS_TENANT env var is not set."
|
||||||
exit 0
|
exit 8
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# excluding metadata.last_modified since this will always update as date processed because the Sharepoint connector creates documents on the fly
|
# excluding metadata.last_modified since this will always update as date processed because the Sharepoint connector creates documents on the fly
|
||||||
|
@ -27,7 +27,7 @@ trap cleanup EXIT
|
|||||||
|
|
||||||
if [ -z "$SHAREPOINT_CLIENT_ID" ] || [ -z "$SHAREPOINT_CRED" ]; then
|
if [ -z "$SHAREPOINT_CLIENT_ID" ] || [ -z "$SHAREPOINT_CRED" ]; then
|
||||||
echo "Skipping Sharepoint ingest test because the SHAREPOINT_CLIENT_ID or SHAREPOINT_CRED env var is not set."
|
echo "Skipping Sharepoint ingest test because the SHAREPOINT_CLIENT_ID or SHAREPOINT_CRED env var is not set."
|
||||||
exit 0
|
exit 8
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# excluding metadata.last_modified since this will always update as date processed because the Sharepoint connector creates documents on the fly
|
# excluding metadata.last_modified since this will always update as date processed because the Sharepoint connector creates documents on the fly
|
||||||
|
@ -26,7 +26,7 @@ trap cleanup EXIT
|
|||||||
|
|
||||||
if [ -z "$SLACK_TOKEN" ]; then
|
if [ -z "$SLACK_TOKEN" ]; then
|
||||||
echo "Skipping Slack ingest test because the SLACK_TOKEN env var is not set."
|
echo "Skipping Slack ingest test because the SLACK_TOKEN env var is not set."
|
||||||
exit 0
|
exit 8
|
||||||
fi
|
fi
|
||||||
|
|
||||||
RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py}
|
RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py}
|
||||||
|
@ -1,8 +1,14 @@
|
|||||||
#!/usr/bin/env bash
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
set -eu -o pipefail
|
set -u -o pipefail
|
||||||
|
|
||||||
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
|
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
|
||||||
|
SKIPPED_FILES_LOG=$SCRIPT_DIR/skipped-files.txt
|
||||||
|
# If the file already exists, reset it
|
||||||
|
if [ -f "$SKIPPED_FILES_LOG" ]; then
|
||||||
|
rm "$SKIPPED_FILES_LOG"
|
||||||
|
touch "$SKIPPED_FILES_LOG"
|
||||||
|
fi
|
||||||
cd "$SCRIPT_DIR"/.. || exit 1
|
cd "$SCRIPT_DIR"/.. || exit 1
|
||||||
|
|
||||||
# NOTE(crag): sets number of tesseract threads to 1 which may help with more reproducible outputs
|
# NOTE(crag): sets number of tesseract threads to 1 which may help with more reproducible outputs
|
||||||
@ -52,17 +58,17 @@ for test in "${all_tests[@]}"; do
|
|||||||
echo "--------- SKIPPING SCRIPT $test ---------"
|
echo "--------- SKIPPING SCRIPT $test ---------"
|
||||||
continue
|
continue
|
||||||
fi
|
fi
|
||||||
if [[ "${tests_to_ignore[*]}" =~ $test ]]; then
|
|
||||||
echo "--------- RUNNING SCRIPT $test --- IGNORING FAILURES"
|
|
||||||
set +e
|
|
||||||
echo "Running ./test_unstructured_ingest/$test"
|
|
||||||
./test_unstructured_ingest/dest/"$test"
|
|
||||||
set -e
|
|
||||||
echo "--------- FINISHED SCRIPT $test ---------"
|
|
||||||
else
|
|
||||||
echo "--------- RUNNING SCRIPT $test ---------"
|
echo "--------- RUNNING SCRIPT $test ---------"
|
||||||
echo "Running ./test_unstructured_ingest/$test"
|
echo "Running ./test_unstructured_ingest/$test"
|
||||||
./test_unstructured_ingest/dest/"$test"
|
./test_unstructured_ingest/dest/"$test"
|
||||||
echo "--------- FINISHED SCRIPT $test ---------"
|
rc=$?
|
||||||
|
if [[ $rc -eq 8 ]]; then
|
||||||
|
echo "$test (skipped due to missing env var)" | tee -a "$SKIPPED_FILES_LOG"
|
||||||
|
elif [[ "${tests_to_ignore[*]}" =~ $test ]]; then
|
||||||
|
echo "$test (skipped checking error code: $rc)" | tee -a "$SKIPPED_FILES_LOG"
|
||||||
|
continue
|
||||||
|
elif [[ $rc -ne 0 ]]; then
|
||||||
|
exit $rc
|
||||||
fi
|
fi
|
||||||
|
echo "--------- FINISHED SCRIPT $test ---------"
|
||||||
done
|
done
|
||||||
|
@ -1,51 +1,57 @@
|
|||||||
#!/usr/bin/env bash
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
set -eu -o pipefail
|
set -u -o pipefail
|
||||||
|
|
||||||
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
|
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
|
||||||
|
SKIPPED_FILES_LOG=$SCRIPT_DIR/skipped-files.txt
|
||||||
|
# If the file already exists, reset it
|
||||||
|
if [ -f "$SKIPPED_FILES_LOG" ]; then
|
||||||
|
rm "$SKIPPED_FILES_LOG"
|
||||||
|
touch "$SKIPPED_FILES_LOG"
|
||||||
|
fi
|
||||||
cd "$SCRIPT_DIR"/.. || exit 1
|
cd "$SCRIPT_DIR"/.. || exit 1
|
||||||
|
|
||||||
# NOTE(crag): sets number of tesseract threads to 1 which may help with more reproducible outputs
|
# NOTE(crag): sets number of tesseract threads to 1 which may help with more reproducible outputs
|
||||||
export OMP_THREAD_LIMIT=1
|
export OMP_THREAD_LIMIT=1
|
||||||
|
|
||||||
all_tests=(
|
all_tests=(
|
||||||
's3.sh'
|
's3.sh'
|
||||||
's3-minio.sh'
|
's3-minio.sh'
|
||||||
'azure.sh'
|
'azure.sh'
|
||||||
'biomed-api.sh'
|
'biomed-api.sh'
|
||||||
'biomed-path.sh'
|
'biomed-path.sh'
|
||||||
# NOTE(yuming): The pdf-fast-reprocess test should be put after any tests that save downloaded files
|
# NOTE(yuming): The pdf-fast-reprocess test should be put after any tests that save downloaded files
|
||||||
'pdf-fast-reprocess.sh'
|
'pdf-fast-reprocess.sh'
|
||||||
'salesforce.sh'
|
'salesforce.sh'
|
||||||
'box.sh'
|
'box.sh'
|
||||||
'discord.sh'
|
'discord.sh'
|
||||||
'dropbox.sh'
|
'dropbox.sh'
|
||||||
'github.sh'
|
'github.sh'
|
||||||
'gitlab.sh'
|
'gitlab.sh'
|
||||||
'google-drive.sh'
|
'google-drive.sh'
|
||||||
'wikipedia.sh'
|
'wikipedia.sh'
|
||||||
'local.sh'
|
'local.sh'
|
||||||
'slack.sh'
|
'slack.sh'
|
||||||
'against-api.sh'
|
'against-api.sh'
|
||||||
'gcs.sh'
|
'gcs.sh'
|
||||||
'onedrive.sh'
|
'onedrive.sh'
|
||||||
'outlook.sh'
|
'outlook.sh'
|
||||||
'elasticsearch.sh'
|
'elasticsearch.sh'
|
||||||
'confluence-diff.sh'
|
'confluence-diff.sh'
|
||||||
'confluence-large.sh'
|
'confluence-large.sh'
|
||||||
'airtable-diff.sh'
|
'airtable-diff.sh'
|
||||||
# NOTE(ryan): This test is disabled because it is triggering too many requests to the API
|
# NOTE(ryan): This test is disabled because it is triggering too many requests to the API
|
||||||
# 'airtable-large.sh'
|
# 'airtable-large.sh'
|
||||||
'local-single-file.sh'
|
'local-single-file.sh'
|
||||||
'local-single-file-with-encoding.sh'
|
'local-single-file-with-encoding.sh'
|
||||||
'local-single-file-with-pdf-infer-table-structure.sh'
|
'local-single-file-with-pdf-infer-table-structure.sh'
|
||||||
'notion.sh'
|
'notion.sh'
|
||||||
'delta-table.sh'
|
'delta-table.sh'
|
||||||
'jira.sh'
|
'jira.sh'
|
||||||
'sharepoint.sh'
|
'sharepoint.sh'
|
||||||
'sharepoint-with-permissions.sh'
|
'sharepoint-with-permissions.sh'
|
||||||
'hubspot.sh'
|
'hubspot.sh'
|
||||||
'local-embed.sh'
|
'local-embed.sh'
|
||||||
)
|
)
|
||||||
|
|
||||||
full_python_matrix_tests=(
|
full_python_matrix_tests=(
|
||||||
@ -66,6 +72,8 @@ function print_last_run() {
|
|||||||
if [ "$CURRENT_TEST" != "none" ]; then
|
if [ "$CURRENT_TEST" != "none" ]; then
|
||||||
echo "Last ran script: $CURRENT_TEST"
|
echo "Last ran script: $CURRENT_TEST"
|
||||||
fi
|
fi
|
||||||
|
echo "######## SKIPPED TESTS: ########"
|
||||||
|
cat "$SKIPPED_FILES_LOG"
|
||||||
}
|
}
|
||||||
|
|
||||||
trap print_last_run EXIT
|
trap print_last_run EXIT
|
||||||
@ -85,21 +93,23 @@ for test in "${all_tests[@]}"; do
|
|||||||
echo "--------- SKIPPING SCRIPT $test ---------"
|
echo "--------- SKIPPING SCRIPT $test ---------"
|
||||||
continue
|
continue
|
||||||
fi
|
fi
|
||||||
if [[ "${tests_to_ignore[*]}" =~ $test ]]; then
|
|
||||||
echo "--------- RUNNING SCRIPT $test --- IGNORING FAILURES"
|
|
||||||
set +e
|
|
||||||
echo "Running ./test_unstructured_ingest/$test"
|
|
||||||
./test_unstructured_ingest/src/"$test"
|
|
||||||
set -e
|
|
||||||
echo "--------- FINISHED SCRIPT $test ---------"
|
|
||||||
else
|
|
||||||
echo "--------- RUNNING SCRIPT $test ---------"
|
echo "--------- RUNNING SCRIPT $test ---------"
|
||||||
echo "Running ./test_unstructured_ingest/$test"
|
echo "Running ./test_unstructured_ingest/$test"
|
||||||
./test_unstructured_ingest/src/"$test"
|
./test_unstructured_ingest/src/"$test"
|
||||||
echo "--------- FINISHED SCRIPT $test ---------"
|
rc=$?
|
||||||
|
if [[ $rc -eq 8 ]]; then
|
||||||
|
echo "$test (skipped due to missing env var)" | tee -a "$SKIPPED_FILES_LOG"
|
||||||
|
elif [[ "${tests_to_ignore[*]}" =~ $test ]]; then
|
||||||
|
echo "$test (skipped checking error code: $rc)" | tee -a "$SKIPPED_FILES_LOG"
|
||||||
|
continue
|
||||||
|
elif [[ $rc -ne 0 ]]; then
|
||||||
|
exit $rc
|
||||||
fi
|
fi
|
||||||
|
echo "--------- FINISHED SCRIPT $test ---------"
|
||||||
done
|
done
|
||||||
|
|
||||||
|
set +e
|
||||||
|
|
||||||
all_eval=(
|
all_eval=(
|
||||||
'text-extraction'
|
'text-extraction'
|
||||||
'element-type'
|
'element-type'
|
||||||
|
Loading…
x
Reference in New Issue
Block a user