haystack/.github/workflows/tutorials_nightly.yml
Daniel Bichuetti e1f399284f
refactor: update dependencies and remove pins (#3147)
* refactor: remove azure-core, pydoc and hf-hub pins

* fix: remove extra-comma

* fix: force minimum version of azure forms recognizer

* refactor: allow newer ocr libs

* refactor: update more dependencies and container versions

* refactor: remove extra comment

* docs: pre-commit manual run

* refactor: remove unnecessary dependency

* tests: update weaviate container image version
2022-09-05 14:30:35 +02:00

111 lines
3.8 KiB
YAML

name: Tutorials (nightly)
on:
workflow_dispatch: # Activate this workflow manually
schedule:
- cron: '0 0 * * *'
env:
# Tutorials that require a GPU to run, so can't be run on CI without self-hosted runners
DONT_RUN: Tutorial2_ Tutorial9_ Tutorial13_ Tutorial18_
jobs:
notebooks:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Set up Python 3.7
uses: actions/setup-python@v2
with:
python-version: 3.7
- name: Run Elasticsearch
run: docker run -d -p 9200:9200 -e "discovery.type=single-node" -e "ES_JAVA_OPTS=-Xms128m -Xmx256m" elasticsearch:7.9.2
- name: Run Apache Tika
run: docker run -d -p 9998:9998 -e "TIKA_CHILD_JAVA_OPTS=-JXms128m" -e "TIKA_CHILD_JAVA_OPTS=-JXmx128m" apache/tika:1.28.4
- name: Run GraphDB
run: docker run -d -p 7200:7200 --name graphdb-instance-tutorial docker-registry.ontotext.com/graphdb-free:9.4.1-adoptopenjdk11
- name: Install pdftotext
run: wget --no-check-certificate https://dl.xpdfreader.com/xpdf-tools-linux-4.04.tar.gz && tar -xvf xpdf-tools-linux-4.04.tar.gz && sudo cp xpdf-tools-linux-4.04/bin64/pdftotext /usr/local/bin
# Some tutorials require these libraries to run
# - Tutorial 17 for the audio libs
# - Tutorial 11, 14, 15, 16 for pygraphviz
- name: Install graphviz and audio libs
run: sudo apt install libgraphviz-dev graphviz libsndfile1 ffmpeg
# Some tutorials require these libraries to run
# - Tutorial 15
- name: Install torch-scatter
run: pip install torch-scatter -f https://data.pyg.org/whl/torch-1.12.0+cpu.html
- name: Install Haystack
run: |
pip install --upgrade pip
pip install .[all]
pip install pygraphviz
pip install ipython nbformat
- name: Cache mini GoT dataset
run: |
mkdir -p data/tutorials
cd data/tutorials
wget https://s3.eu-central-1.amazonaws.com/deepset.ai-farm-qa/datasets/documents/wiki_gameofthrones_txt1_mini.zip -q &> /dev/null
unzip wiki_gameofthrones_txt1_mini.zip
rm wiki_gameofthrones_txt1_mini.zip
- name: Run tutorials
run: ./.github/utils/tutorials.sh ${{ env.pythonLocation }} "tutorials/*.ipynb" "${{ env.DONT_RUN }}" "EDITABLE" "RESTART"
scripts:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Set up Python 3.7
uses: actions/setup-python@v2
with:
python-version: 3.7
- name: Install pdftotext
run: wget --no-check-certificate https://dl.xpdfreader.com/xpdf-tools-linux-4.04.tar.gz && tar -xvf xpdf-tools-linux-4.04.tar.gz && sudo cp xpdf-tools-linux-4.04/bin64/pdftotext /usr/local/bin
# Some tutorials require these libraries to run
# - Tutorial 17 for the audio libs
# - Tutorial 11, 14, 15, 16 for pygraphviz
- name: Install graphviz and audio libs
run: sudo apt install libgraphviz-dev graphviz libsndfile1 ffmpeg
# Some tutorials require these libraries to run
# - Tutorial 15
- name: Install torch-scatter
run: pip install torch-scatter -f https://data.pyg.org/whl/torch-1.12.0+cpu.html
- name: Install Haystack
run: |
pip install --upgrade pip
pip install .[all]
pip install pygraphviz
pip install ipython nbformat
- name: Cache mini GoT dataset
run: |
mkdir -p data/tutorials
cd data/tutorials
wget https://s3.eu-central-1.amazonaws.com/deepset.ai-farm-qa/datasets/documents/wiki_gameofthrones_txt1_mini.zip -q &> /dev/null
unzip wiki_gameofthrones_txt1_mini.zip
rm wiki_gameofthrones_txt1_mini.zip
- name: Run tutorials
run: ./.github/utils/tutorials.sh ${{ env.pythonLocation }} "tutorials/*.py" "${{ env.DONT_RUN }}" "EDITABLE" "RESTART"