haystack/.github/utils/tutorials.sh
Sara Zan 091711b8c4
Fix Tutorials and Tutorials (nightly) (#2737)
* Remove caching and install audio deps

* Fix `Tutorials` as well

* Run all tutorials even though some fail

* Forgot fi

* fix failure condition

* proper bash string equality

* Enable debug logs

* remove audio files

* Update Documentation & Code Style

* Use the setup action in the Tutorial CI as well

* Try with a file that exists

* Update Documentation & Code Style

* Fix the comments in the tutorials

* Update Documentation & Code Style

* Fix tutorials.sh

* Remove debug logging

* import pprint and try editable install

* Update Documentation & Code Style

* extract no run list

* Add tutorial18 to no run list nightly

* import pprint correctly

* Update Documentation & Code Style

* try making site-packages editable

* Make pythonpath editable every time Tut17 is run on CI

* typo

* fix imports in tut5

* add git clean

* Update Documentation & Code Style

* add comments and remove` -e`

* accidentally deleted a line

* Update .github/utils/tutorials.sh

Co-authored-by: Massimiliano Pippi <mpippi@gmail.com>

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: Massimiliano Pippi <mpippi@gmail.com>
2022-07-12 11:22:17 +02:00

115 lines
4.1 KiB
Bash
Executable File

#!/bin/bash
export LAUNCH_GRAPHDB=0 # See tut 10 - GraphDB is already running in CI
export TIKA_LOG_PATH=$PWD # Avoid permission denied errors while importing tika
python_path=$1
files_changed=$2
exclusion_list=$3
make_python_path_editable=$4
no_got_tutorials='4_FAQ_style_QA 5_Evaluation 7_RAG_Generator 8_Preprocessing 10_Knowledge_Graph 15_TableQA 16_Document_Classifier_at_Index_Time'
echo "Files changed in this PR: $files_changed"
echo "Excluding: $exclusion_list"
# Collect the tutorials to run
scripts_to_run=""
for script in $files_changed; do
if [[ "$script" != *"tutorials/Tutorial"* ]] || ([[ "$script" != *".py"* ]] && [[ "$script" != *".ipynb"* ]]); then
echo "- not a tutorial: $script"
continue
fi
skip_to_next=0
for excluded in $exclusion_list; do
if [[ "$script" == *"$excluded"* ]]; then skip_to_next=1; fi
done
if [[ $skip_to_next == 1 ]]; then
echo "- excluded: $script"
continue
fi
scripts_to_run="$scripts_to_run $script"
done
failed=""
for script in $scripts_to_run; do
echo ""
echo "##################################################################################"
echo "##################################################################################"
echo "## Running $script ..."
echo "##################################################################################"
echo "##################################################################################"
# Do not cache GoT data
reduce_dataset=1
for no_got_tut in $no_got_tutorials; do
if [[ "$script" == *"$no_got_tut"* ]]; then
reduce_dataset=0
fi
done
if [[ $reduce_dataset == 1 ]]; then
# Copy the reduced GoT data into a folder named after the tutorial
# to trigger the caching mechanism of `fetch_archive_from_http`
echo "Using reduced GoT dataset"
no_prefix=${script#"tutorials/Tutorial"}
split_on_underscore=(${no_prefix//_/ })
cp -r data/tutorials data/tutorial${split_on_underscore[0]}
else
echo "NOT using reduced GoT dataset!"
fi
# FIXME Make the Python path editable
# espnet needs to edit files on the PYTHONPATH during execution. However, by default GH runners don't allow
# workflows to edit files into that directory, so in case of tutorials using espnet, we need to make PYTHONPATH
# editable first. For now it's only Tutorial 17.
# Still unclear why it's needed to repeat this operation, but if Tutorial 17 is run twice (once for the .py
# and once for .ipynb version) the error re-appears.
if [[ $make_python_path_editable == "EDITABLE" ]] && [[ "$script" == *"Tutorial17_"* ]]; then
sudo find $python_path/lib -type f -exec chmod 777 {} \;
fi
if [[ "$script" == *".py" ]]; then
time python $script
else
sudo $python_path/bin/ipython -c "%run $script"
fi
if [ ! $? -eq 0 ]; then
failed=$failed" "$script
fi
# Clean up datasets and SQLite DBs to avoid crashing the next tutorial
git clean -f
done
# causes permission errors on Post Cache
sudo rm -rf data/
sudo rm -rf /home/runner/work/haystack/haystack/elasticsearch-7.9.2/
if [[ $failed == "" ]]; then
echo ""
echo ""
echo "------------------------------------------"
echo " All tutorials were executed successfully "
echo "------------------------------------------"
exit 0
else
echo ""
echo "##################################################################################"
echo "## ##"
echo "## Some tutorials have failed! ##"
echo "## ##"
echo "##################################################################################"
for script in $failed; do
echo "## - $script"
done
echo "##################################################################################"
exit 1
fi