mirror of
https://github.com/deepset-ai/haystack.git
synced 2026-01-07 20:46:31 +00:00
* Remove caching and install audio deps * Fix `Tutorials` as well * Run all tutorials even though some fail * Forgot fi * fix failure condition * proper bash string equality * Enable debug logs * remove audio files * Update Documentation & Code Style * Use the setup action in the Tutorial CI as well * Try with a file that exists * Update Documentation & Code Style * Fix the comments in the tutorials * Update Documentation & Code Style * Fix tutorials.sh * Remove debug logging * import pprint and try editable install * Update Documentation & Code Style * extract no run list * Add tutorial18 to no run list nightly * import pprint correctly * Update Documentation & Code Style * try making site-packages editable * Make pythonpath editable every time Tut17 is run on CI * typo * fix imports in tut5 * add git clean * Update Documentation & Code Style * add comments and remove` -e` * accidentally deleted a line * Update .github/utils/tutorials.sh Co-authored-by: Massimiliano Pippi <mpippi@gmail.com> Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: Massimiliano Pippi <mpippi@gmail.com>
115 lines
4.1 KiB
Bash
Executable File
115 lines
4.1 KiB
Bash
Executable File
#!/bin/bash
|
|
|
|
export LAUNCH_GRAPHDB=0 # See tut 10 - GraphDB is already running in CI
|
|
export TIKA_LOG_PATH=$PWD # Avoid permission denied errors while importing tika
|
|
|
|
python_path=$1
|
|
files_changed=$2
|
|
exclusion_list=$3
|
|
make_python_path_editable=$4
|
|
no_got_tutorials='4_FAQ_style_QA 5_Evaluation 7_RAG_Generator 8_Preprocessing 10_Knowledge_Graph 15_TableQA 16_Document_Classifier_at_Index_Time'
|
|
|
|
echo "Files changed in this PR: $files_changed"
|
|
echo "Excluding: $exclusion_list"
|
|
|
|
# Collect the tutorials to run
|
|
scripts_to_run=""
|
|
for script in $files_changed; do
|
|
|
|
if [[ "$script" != *"tutorials/Tutorial"* ]] || ([[ "$script" != *".py"* ]] && [[ "$script" != *".ipynb"* ]]); then
|
|
echo "- not a tutorial: $script"
|
|
continue
|
|
fi
|
|
|
|
skip_to_next=0
|
|
for excluded in $exclusion_list; do
|
|
if [[ "$script" == *"$excluded"* ]]; then skip_to_next=1; fi
|
|
done
|
|
if [[ $skip_to_next == 1 ]]; then
|
|
echo "- excluded: $script"
|
|
continue
|
|
fi
|
|
|
|
scripts_to_run="$scripts_to_run $script"
|
|
done
|
|
|
|
failed=""
|
|
for script in $scripts_to_run; do
|
|
|
|
echo ""
|
|
echo "##################################################################################"
|
|
echo "##################################################################################"
|
|
echo "## Running $script ..."
|
|
echo "##################################################################################"
|
|
echo "##################################################################################"
|
|
|
|
# Do not cache GoT data
|
|
reduce_dataset=1
|
|
for no_got_tut in $no_got_tutorials; do
|
|
if [[ "$script" == *"$no_got_tut"* ]]; then
|
|
reduce_dataset=0
|
|
fi
|
|
done
|
|
|
|
if [[ $reduce_dataset == 1 ]]; then
|
|
# Copy the reduced GoT data into a folder named after the tutorial
|
|
# to trigger the caching mechanism of `fetch_archive_from_http`
|
|
echo "Using reduced GoT dataset"
|
|
no_prefix=${script#"tutorials/Tutorial"}
|
|
split_on_underscore=(${no_prefix//_/ })
|
|
cp -r data/tutorials data/tutorial${split_on_underscore[0]}
|
|
else
|
|
echo "NOT using reduced GoT dataset!"
|
|
fi
|
|
|
|
# FIXME Make the Python path editable
|
|
# espnet needs to edit files on the PYTHONPATH during execution. However, by default GH runners don't allow
|
|
# workflows to edit files into that directory, so in case of tutorials using espnet, we need to make PYTHONPATH
|
|
# editable first. For now it's only Tutorial 17.
|
|
# Still unclear why it's needed to repeat this operation, but if Tutorial 17 is run twice (once for the .py
|
|
# and once for .ipynb version) the error re-appears.
|
|
if [[ $make_python_path_editable == "EDITABLE" ]] && [[ "$script" == *"Tutorial17_"* ]]; then
|
|
sudo find $python_path/lib -type f -exec chmod 777 {} \;
|
|
fi
|
|
|
|
if [[ "$script" == *".py" ]]; then
|
|
time python $script
|
|
else
|
|
sudo $python_path/bin/ipython -c "%run $script"
|
|
fi
|
|
|
|
if [ ! $? -eq 0 ]; then
|
|
failed=$failed" "$script
|
|
fi
|
|
|
|
# Clean up datasets and SQLite DBs to avoid crashing the next tutorial
|
|
git clean -f
|
|
|
|
done
|
|
|
|
# causes permission errors on Post Cache
|
|
sudo rm -rf data/
|
|
sudo rm -rf /home/runner/work/haystack/haystack/elasticsearch-7.9.2/
|
|
|
|
|
|
if [[ $failed == "" ]]; then
|
|
echo ""
|
|
echo ""
|
|
echo "------------------------------------------"
|
|
echo " All tutorials were executed successfully "
|
|
echo "------------------------------------------"
|
|
exit 0
|
|
|
|
else
|
|
echo ""
|
|
echo "##################################################################################"
|
|
echo "## ##"
|
|
echo "## Some tutorials have failed! ##"
|
|
echo "## ##"
|
|
echo "##################################################################################"
|
|
for script in $failed; do
|
|
echo "## - $script"
|
|
done
|
|
echo "##################################################################################"
|
|
exit 1
|
|
fi |