haystack/.github/workflows/tutorials.yml
Vladimir Blagojevic ffb7e4e4bd
GPL tutorial - add GPU header and open in colab button (#2736)
* GPL tutorial - add GPU header and open in colab button

* Add GPL tutorial to run exclusion list
2022-07-04 05:23:39 -04:00

75 lines
2.6 KiB
YAML

name: Tutorials
on:
workflow_dispatch: # Activate this workflow manually
pull_request:
paths:
- 'tutorials/*.*'
jobs:
run:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
with:
fetch-depth: 0
- run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_ENV
- name: Set up Python 3.7
uses: actions/setup-python@v2
with:
python-version: 3.7
- name: Cache Python
uses: actions/cache@v2
with:
path: ${{ env.pythonLocation }}
key: linux-${{ env.date }}-${{ hashFiles('**/setup.py') }}-${{ hashFiles('**/setup.cfg') }}-${{ hashFiles('**/pyproject.toml') }}
- name: Run Elasticsearch
run: docker run -d -p 9200:9200 -e "discovery.type=single-node" -e "ES_JAVA_OPTS=-Xms128m -Xmx128m" elasticsearch:7.9.2
- name: Run Apache Tika
run: docker run -d -p 9998:9998 -e "TIKA_CHILD_JAVA_OPTS=-JXms128m" -e "TIKA_CHILD_JAVA_OPTS=-JXmx128m" apache/tika:1.24.1
- name: Run GraphDB
run: docker run -d -p 7200:7200 --name graphdb-instance-tutorial docker-registry.ontotext.com/graphdb-free:9.4.1-adoptopenjdk11
- name: Install pdftotext
run: wget --no-check-certificate https://dl.xpdfreader.com/xpdf-tools-linux-4.04.tar.gz && tar -xvf xpdf-tools-linux-4.04.tar.gz && sudo cp xpdf-tools-linux-4.04/bin64/pdftotext /usr/local/bin
- name: Install graphviz
run: sudo apt install libgraphviz-dev graphviz
# Haystack needs to be reinstalled at this stage to make sure the current commit's version is the one getting tested.
# The cache can last way longer than a specific action's run, so older Haystack version could be carried over.
- name: Reinstall Haystack
run: |
pip install --upgrade pip
pip install .[all]
pip install torch-scatter -f https://data.pyg.org/whl/torch-1.12.0+cpu.html
pip install pygraphviz
pip install ipython nbformat
- name: Cache mini GoT dataset
run: |
mkdir -p data/tutorials
cd data/tutorials
wget https://s3.eu-central-1.amazonaws.com/deepset.ai-farm-qa/datasets/documents/wiki_gameofthrones_txt1_mini.zip -q &> /dev/null
unzip wiki_gameofthrones_txt1_mini.zip
rm wiki_gameofthrones_txt1_mini.zip
- uses: jitterbit/get-changed-files@v1
id: diff
continue-on-error: true
with:
format: space-delimited
token: ${{ secrets.GITHUB_TOKEN }}
- name: Run tutorials
run: ./.github/utils/tutorials.sh ${{ env.pythonLocation }} "${{ steps.diff.outputs.added_modified }}" "Tutorial2_ Tutorial9_ Tutorial13_ Tutorial18_"