mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-12-30 08:37:20 +00:00
* refactor: remove azure-core, pydoc and hf-hub pins * fix: remove extra-comma * fix: force minimum version of azure forms recognizer * refactor: allow newer ocr libs * refactor: update more dependencies and container versions * refactor: remove extra comment * docs: pre-commit manual run * refactor: remove unnecessary dependency * tests: update weaviate container image version
140 lines
5.2 KiB
Bash
Executable File
140 lines
5.2 KiB
Bash
Executable File
#!/bin/bash
|
|
|
|
export LAUNCH_GRAPHDB=0 # See tut 10 - GraphDB is already running in CI
|
|
export TIKA_LOG_PATH=$PWD # Avoid permission denied errors while importing tika
|
|
|
|
python_path=$1
|
|
files_changed=$2
|
|
exclusion_list=$3
|
|
make_python_path_editable=$4
|
|
containers_policy=$5
|
|
no_got_tutorials='4_FAQ_style_QA 5_Evaluation 7_RAG_Generator 8_Preprocessing 10_Knowledge_Graph 15_TableQA 16_Document_Classifier_at_Index_Time'
|
|
|
|
echo "Files changed in this PR: $files_changed"
|
|
echo "Excluding: $exclusion_list"
|
|
echo "Python path is editable: $make_python_path_editable"
|
|
echo "Containers policy: $containers_policy"
|
|
|
|
# Collect the tutorials to run
|
|
scripts_to_run=""
|
|
for script in $files_changed; do
|
|
|
|
if [[ "$script" != *"tutorials/Tutorial"* ]] || ([[ "$script" != *".py"* ]] && [[ "$script" != *".ipynb"* ]]); then
|
|
echo "- not a tutorial: $script"
|
|
continue
|
|
fi
|
|
|
|
skip_to_next=0
|
|
for excluded in $exclusion_list; do
|
|
if [[ "$script" == *"$excluded"* ]]; then skip_to_next=1; fi
|
|
done
|
|
if [[ $skip_to_next == 1 ]]; then
|
|
echo "- excluded: $script"
|
|
continue
|
|
fi
|
|
|
|
scripts_to_run="$scripts_to_run $script"
|
|
done
|
|
|
|
|
|
# Run the containers
|
|
docker run -d -p 9200:9200 --name elasticsearch -e "discovery.type=single-node" -e "ES_JAVA_OPTS=-Xms128m -Xmx256m" elasticsearch:7.9.2
|
|
docker run -d -p 9998:9998 --name tika -e "TIKA_CHILD_JAVA_OPTS=-JXms128m" -e "TIKA_CHILD_JAVA_OPTS=-JXmx128m" apache/tika:1.28.4
|
|
|
|
|
|
failed=""
|
|
for script in $scripts_to_run; do
|
|
|
|
echo ""
|
|
echo "##################################################################################"
|
|
echo "##################################################################################"
|
|
echo "## Running $script ..."
|
|
echo "##################################################################################"
|
|
echo "##################################################################################"
|
|
|
|
# Do not cache GoT data
|
|
reduce_dataset=1
|
|
for no_got_tut in $no_got_tutorials; do
|
|
if [[ "$script" == *"$no_got_tut"* ]]; then
|
|
reduce_dataset=0
|
|
fi
|
|
done
|
|
|
|
if [[ $reduce_dataset == 1 ]]; then
|
|
# Copy the reduced GoT data into a folder named after the tutorial
|
|
# to trigger the caching mechanism of `fetch_archive_from_http`
|
|
echo "Using reduced GoT dataset"
|
|
no_prefix=${script#"tutorials/Tutorial"}
|
|
split_on_underscore=(${no_prefix//_/ })
|
|
cp -r data/tutorials data/tutorial${split_on_underscore[0]}
|
|
else
|
|
echo "NOT using reduced GoT dataset!"
|
|
fi
|
|
|
|
# FIXME Make the Python path editable
|
|
# espnet needs to edit files on the PYTHONPATH during execution. However, by default GH runners don't allow
|
|
# workflows to edit files into that directory, so in case of tutorials using espnet, we need to make PYTHONPATH
|
|
# editable first. For now it's only Tutorial 17.
|
|
# Still unclear why it's needed to repeat this operation, but if Tutorial 17 is run twice (once for the .py
|
|
# and once for .ipynb version) the error re-appears.
|
|
if [[ $make_python_path_editable == "EDITABLE" ]] && [[ "$script" == *"Tutorial17_"* ]]; then
|
|
sudo find $python_path/lib -type f -exec chmod 777 {} \;
|
|
fi
|
|
|
|
if [[ "$script" == *".py" ]]; then
|
|
output=$(time python $script)
|
|
else
|
|
output=$(sudo $python_path/bin/ipython -c "%run $script")
|
|
fi
|
|
|
|
echo $output > $script-output.txt
|
|
if [ $? -eq 0 ]; then
|
|
echo "Execution completed successfully."
|
|
else
|
|
echo "===================================================="
|
|
echo "| $script FAILED!"
|
|
echo "===================================================="
|
|
echo "Output of the execution: "
|
|
echo $output
|
|
failed=$failed" "$script
|
|
fi
|
|
|
|
# Restart the necessary containers
|
|
# Note: Tika does not store data and therefore can be left running
|
|
if [[ "$make_python_path_editable" == "RESTART" ]]; then
|
|
docker stop elasticsearch
|
|
docker rm elasticsearch
|
|
docker run -d -p 9200:9200 --name elasticsearch -e "discovery.type=single-node" -e "ES_JAVA_OPTS=-Xms128m -Xmx256m" elasticsearch:7.9.2
|
|
fi
|
|
|
|
# Clean up datasets and SQLite DBs to avoid crashing the next tutorial
|
|
git clean -f
|
|
|
|
done
|
|
|
|
# causes permission errors on Post Cache
|
|
sudo rm -rf data/
|
|
sudo rm -rf /home/runner/work/haystack/haystack/elasticsearch-7.9.2/
|
|
|
|
|
|
if [[ $failed == "" ]]; then
|
|
echo ""
|
|
echo ""
|
|
echo "------------------------------------------"
|
|
echo " All tutorials were executed successfully "
|
|
echo "------------------------------------------"
|
|
exit 0
|
|
|
|
else
|
|
echo ""
|
|
echo "##################################################################################"
|
|
echo "## ##"
|
|
echo "## Some tutorials have failed! ##"
|
|
echo "## ##"
|
|
echo "##################################################################################"
|
|
for script in $failed; do
|
|
echo "## - $script"
|
|
done
|
|
echo "##################################################################################"
|
|
exit 1
|
|
fi |