Vladimir Blagojevic 86d56b4dfe
Add HF model caching for integration tests (#2909)
* Add HF model caching for integration tests

* Remove windows mode caching - not worth it
2022-07-29 18:17:05 +02:00

639 lines
20 KiB
YAML

name: Tests
on:
workflow_dispatch: # Activate this workflow manually
push:
branches:
- master
pull_request:
types:
- opened
- reopened
- synchronize
- ready_for_review
env:
PYTEST_PARAMS: --maxfail=5 --durations=10 --suppress-no-test-exit-code
SUITES_EXCLUDED_FROM_WINDOWS:
--ignore=test/pipelines/test_ray.py
--ignore=test/document_stores/test_knowledge_graph.py
--ignore=test/nodes/test_audio.py
--ignore=test/nodes/test_connector.py
--ignore=test/nodes/test_summarizer_translation.py
--ignore=test/nodes/test_summarizer.py
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
jobs:
mypy:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Setup Python
uses: actions/setup-python@v2
with:
# Mypy can't run properly on 3.7 as it misses support for Literal types.
# FIXME once we drop support for 3.7, use the cache.
python-version: 3.8
- name: Install dependencies
run: |
# FIXME installing the packages before running mypy raises
# a lot of errors which were never detected before!
# pip install .
# pip install rest_api/
# pip install ui/
# FIXME --install-types does not work properly yet, see https://github.com/python/mypy/issues/10600
# Hotfixing by installing type packages explicitly.
# Run mypy --install-types haystack locally to ensure the list is still up to date
# mypy --install-types --non-interactive .
pip install mypy pydantic types-Markdown types-PyYAML types-requests types-setuptools types-six types-tabulate types-chardet types-emoji types-protobuf
- name: Mypy
run: |
echo "=== haystack/ ==="
mypy haystack
echo "=== rest_api/ ==="
mypy rest_api --exclude=rest_api/build/ --exclude=rest_api/test/
echo "=== ui/ ==="
mypy ui --exclude=ui/build/ --exclude=ui/test/
pylint:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Checkout
uses: actions/checkout@v2
- name: Setup Python
uses: ./.github/actions/python_cache/
- name: Pylint
run: |
pylint -ry -j 0 haystack/ rest_api/ ui/
unit-tests-linux:
needs:
- mypy
- pylint
strategy:
fail-fast: false # Avoid cancelling the others if one of these fails
matrix:
folder:
- "nodes"
- "pipelines"
- "modeling"
- "others"
- "document_stores/test_opensearch.py"
runs-on: ubuntu-latest
timeout-minutes: 30
steps:
- uses: actions/checkout@v2
- name: Setup Python
uses: ./.github/actions/python_cache/
- name: Install torch-scatter
run: pip install torch-scatter -f https://data.pyg.org/whl/torch-1.12.0+cpu.html
# TODO Let's try to remove this one from the unit tests
- name: Install pdftotext
run: wget --no-check-certificate https://dl.xpdfreader.com/xpdf-tools-linux-4.04.tar.gz && tar -xvf xpdf-tools-linux-4.04.tar.gz && sudo cp xpdf-tools-linux-4.04/bin64/pdftotext /usr/local/bin
# TODO evaluate if tests that need these libraries are really unit tests
- name: Install audio libraries
run: |
sudo apt-get update
sudo apt-get install libsndfile1 ffmpeg
- name: Install Haystack
run: pip install .[audio]
- name: Run tests
env:
TOKENIZERS_PARALLELISM: 'false'
run: |
pytest ${{ env.PYTEST_PARAMS }} -m "not elasticsearch and not faiss and not milvus and not milvus1 and not weaviate and not pinecone and not integration" test/${{ matrix.folder }} --document_store_type=memory
unit-tests-windows:
needs:
- mypy
- pylint
strategy:
fail-fast: false # Avoid cancelling the others if one of these fails
matrix:
folder:
- "nodes"
- "pipelines"
- "modeling"
#- "others"
runs-on: windows-latest
if: contains(github.event.pull_request.labels.*.name, 'topic:windows') || !github.event.pull_request.draft
timeout-minutes: 30
steps:
- uses: actions/checkout@v2
- name: Setup Python
uses: ./.github/actions/python_cache/
with:
prefix: windows
- name: Install pdftotext
run: |
choco install xpdf-utils
choco install openjdk11
refreshenv
# - name: Install sndfile (audio support) # https://github.com/libsndfile/libsndfile/releases/download/1.1.0/libsndfile-1.1.0-win64.zip
- name: Install Haystack
run: pip install .
- name: Run tests
env:
TOKENIZERS_PARALLELISM: 'false'
run: |
pytest ${{ env.PYTEST_PARAMS }} -m "not elasticsearch and not faiss and not milvus and not milvus1 and not weaviate and not pinecone and not integration" ${{ env.SUITES_EXCLUDED_FROM_WINDOWS }} test/${{ matrix.folder }} --document_store_type=memory
elasticsearch-tests-linux:
needs:
- mypy
- pylint
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Setup Elasticsearch
run: |
docker run -d -p 9200:9200 -e "discovery.type=single-node" -e "ES_JAVA_OPTS=-Xms128m -Xmx256m" elasticsearch:7.9.2
# TODO Let's try to remove this one from the unit tests
- name: Install pdftotext
run: wget --no-check-certificate https://dl.xpdfreader.com/xpdf-tools-linux-4.04.tar.gz && tar -xvf xpdf-tools-linux-4.04.tar.gz && sudo cp xpdf-tools-linux-4.04/bin64/pdftotext /usr/local/bin
- name: Setup Python
uses: ./.github/actions/python_cache/
- name: Install Haystack
run: pip install .
- name: Run tests
env:
TOKENIZERS_PARALLELISM: 'false'
run: |
pytest ${{ env.PYTEST_PARAMS }} -m "elasticsearch and not integration" test/document_stores/ --document_store_type=elasticsearch
elasticsearch-tests-windows:
needs:
- mypy
- pylint
runs-on: windows-latest
if: contains(github.event.pull_request.labels.*.name, 'topic:windows') || !github.event.pull_request.draft
steps:
- uses: actions/checkout@v2
- name: Set up Elasticsearch and pdftotext
run: |
choco install xpdf-utils
choco install openjdk11
refreshenv
choco install elasticsearch --version=7.9.2
refreshenv
Get-Service elasticsearch-service-x64 | Start-Service
- name: Install Haystack
run: pip install .
- name: Setup Python
uses: ./.github/actions/python_cache/
with:
prefix: windows
- name: Run tests
env:
TOKENIZERS_PARALLELISM: 'false'
run: |
pytest ${{ env.PYTEST_PARAMS }} -m "elasticsearch and not integration" test/document_stores/ ${{ env.SUITES_EXCLUDED_FROM_WINDOWS }} --document_store_type=elasticsearch
faiss-tests-linux:
needs:
- mypy
- pylint
runs-on: ubuntu-latest
if: contains(github.event.pull_request.labels.*.name, 'topic:faiss') || !github.event.pull_request.draft
steps:
- uses: actions/checkout@v2
- name: Setup Python
uses: ./.github/actions/python_cache/
# TODO Let's try to remove this one from the unit tests
- name: Install pdftotext
run: wget --no-check-certificate https://dl.xpdfreader.com/xpdf-tools-linux-4.04.tar.gz && tar -xvf xpdf-tools-linux-4.04.tar.gz && sudo cp xpdf-tools-linux-4.04/bin64/pdftotext /usr/local/bin
- name: Install Haystack
run: pip install .[faiss]
- name: Run tests
env:
TOKENIZERS_PARALLELISM: 'false'
run: |
pytest ${{ env.PYTEST_PARAMS }} -m "not integration" test/document_stores/ --document_store_type=faiss
faiss-tests-windows:
needs:
- mypy
- pylint
runs-on: windows-latest
if: contains(github.event.pull_request.labels.*.name, 'topic:faiss') && contains(github.event.pull_request.labels.*.name, 'topic:windows') || !github.event.pull_request.draft || !github.event.pull_request.draft
steps:
- uses: actions/checkout@v2
- name: Setup Python
uses: ./.github/actions/python_cache/
with:
prefix: windows
- name: Install pdftotext
run: |
choco install xpdf-utils
choco install openjdk11
refreshenv
- name: Install Haystack
run: pip install .[faiss]
- name: Run tests
env:
TOKENIZERS_PARALLELISM: 'false'
run: |
pytest ${{ env.PYTEST_PARAMS }} -m "not integration" ${{ env.SUITES_EXCLUDED_FROM_WINDOWS }} test/document_stores/ --document_store_type=faiss
milvus-tests-linux:
needs:
- mypy
- pylint
runs-on: ubuntu-latest
if: contains(github.event.pull_request.labels.*.name, 'topic:milvus') || !github.event.pull_request.draft
steps:
- uses: actions/checkout@v2
- name: Setup Python
uses: ./.github/actions/python_cache/
- name: Setup Milvus
run: |
cd ../../ # Avoid causing permission issues on hashFiles later by creating unreadable folders like "volumes"
wget https://github.com/milvus-io/milvus/releases/download/v2.0.0/milvus-standalone-docker-compose.yml -O docker-compose.yml
sudo docker-compose up -d
sudo docker-compose ps
# TODO Let's try to remove this one from the unit tests
- name: Install pdftotext
run: wget --no-check-certificate https://dl.xpdfreader.com/xpdf-tools-linux-4.04.tar.gz && tar -xvf xpdf-tools-linux-4.04.tar.gz && sudo cp xpdf-tools-linux-4.04/bin64/pdftotext /usr/local/bin
- name: Install Haystack
run: pip install .[milvus]
- name: Run tests
env:
TOKENIZERS_PARALLELISM: 'false'
run: |
pytest ${{ env.PYTEST_PARAMS }} -m "not integration" test/document_stores/ --document_store_type=milvus
# FIXME: seems like we can't run containers on Windows
# milvus-tests-windows:
# needs:
# - mypy
# - pylint
# runs-on: windows-latest
# if: contains(github.event.pull_request.labels.*.name, 'topic:milvus') && contains(github.event.pull_request.labels.*.name, 'topic:windows') || !github.event.pull_request.draft || !github.event.pull_request.draft
# steps:
# - uses: actions/checkout@v2
# - name: Setup Python
# uses: ./.github/actions/python_cache/
# with:
# prefix: windows
# - name: Setup Milvus
# run: |
# cd ../../ # Avoid causing permission issues on hashFiles later by creating unreadable folders like "volumes"
# wget https://github.com/milvus-io/milvus/releases/download/v2.0.0/milvus-standalone-docker-compose.yml -O docker-compose.yml
# sudo docker-compose up -d
# sudo docker-compose ps
# - name: Install pdftotext
# run: |
# choco install xpdf-utils
# choco install openjdk11
# refreshenv
# - name: Install Haystack
# run: pip install .[milvus]
# - name: Run tests
# env:
# TOKENIZERS_PARALLELISM: 'false'
# run: |
# pytest ${{ env.PYTEST_PARAMS }} -m "not integration" ${{ env.SUITES_EXCLUDED_FROM_WINDOWS }} test/document_stores/ --document_store_type=milvus
weaviate-tests-linux:
needs:
- mypy
- pylint
runs-on: ubuntu-latest
if: contains(github.event.pull_request.labels.*.name, 'topic:weaviate') || !github.event.pull_request.draft
steps:
- uses: actions/checkout@v2
- name: Setup Python
uses: ./.github/actions/python_cache/
- name: Setup Weaviate
run: docker run -d -p 8080:8080 --name haystack_test_weaviate --env AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED='true' --env PERSISTENCE_DATA_PATH='/var/lib/weaviate' --env ENABLE_EXPERIMENTAL_BM25='true' semitechnologies/weaviate:1.14.1
# TODO Let's try to remove this one from the unit tests
- name: Install pdftotext
run: wget --no-check-certificate https://dl.xpdfreader.com/xpdf-tools-linux-4.04.tar.gz && tar -xvf xpdf-tools-linux-4.04.tar.gz && sudo cp xpdf-tools-linux-4.04/bin64/pdftotext /usr/local/bin
- name: Install Haystack
run: pip install .[weaviate]
- name: Run tests
env:
TOKENIZERS_PARALLELISM: 'false'
run: |
pytest ${{ env.PYTEST_PARAMS }} -m "not integration" test/document_stores/ --document_store_type=weaviate
# FIXME: seems like we can't run containers on Windows
# weaviate-tests-windows:
# needs:
# - mypy
# - pylint
# runs-on: windows-latest
# if: contains(github.event.pull_request.labels.*.name, 'topic:weaviate') && contains(github.event.pull_request.labels.*.name, 'topic:windows') || !github.event.pull_request.draft
# steps:
# - uses: actions/checkout@v2
# - name: Setup Python
# uses: ./.github/actions/python_cache/
# with:
# prefix: windows
# - name: Setup Weaviate
# run: docker run -d -p 8080:8080 --name haystack_test_weaviate --env AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED='true' --env PERSISTENCE_DATA_PATH='/var/lib/weaviate' --env ENABLE_EXPERIMENTAL_BM25='true' semitechnologies/weaviate:1.14.1
# - name: Install pdftotext
# run: |
# choco install xpdf-utils
# choco install openjdk11
# refreshenv
# - name: Install Haystack
# run: pip install .[weaviate]
# - name: Run tests
# env:
# TOKENIZERS_PARALLELISM: 'false'
# run: |
# pytest ${{ env.PYTEST_PARAMS }} -m "not integration" ${{ env.SUITES_EXCLUDED_FROM_WINDOWS }} test/document_stores/ --document_store_type=weaviate
pinecone-tests-linux:
needs:
- mypy
- pylint
runs-on: ubuntu-latest
if: contains(github.event.pull_request.labels.*.name, 'topic:pinecone') || !github.event.pull_request.draft
steps:
- uses: actions/checkout@v2
- name: Setup Python
uses: ./.github/actions/python_cache/
# TODO Let's try to remove this one from the unit tests
- name: Install pdftotext
run: wget --no-check-certificate https://dl.xpdfreader.com/xpdf-tools-linux-4.04.tar.gz && tar -xvf xpdf-tools-linux-4.04.tar.gz && sudo cp xpdf-tools-linux-4.04/bin64/pdftotext /usr/local/bin
- name: Install Haystack
run: pip install .[pinecone]
- name: Run tests
env:
PINECONE_API_KEY: ${{ secrets.PINECONE_API_KEY }}
TOKENIZERS_PARALLELISM: 'false'
run: |
pytest ${{ env.PYTEST_PARAMS }} -m "not integration" test/document_stores/ --document_store_type=pinecone
pinecone-tests-windows:
needs:
- mypy
- pylint
runs-on: windows-latest
if: contains(github.event.pull_request.labels.*.name, 'topic:pinecone') && contains(github.event.pull_request.labels.*.name, 'topic:windows') || !github.event.pull_request.draft
steps:
- uses: actions/checkout@v2
- name: Setup Python
uses: ./.github/actions/python_cache/
with:
prefix: windows
- name: Install pdftotext
run: |
choco install xpdf-utils
choco install openjdk11
refreshenv
- name: Install Haystack
run: pip install .[pinecone]
- name: Run tests
env:
TOKENIZERS_PARALLELISM: 'false'
PINECONE_API_KEY: ${{ secrets.PINECONE_API_KEY }}
run: |
pytest ${{ env.PYTEST_PARAMS }} -m "not integration" ${{ env.SUITES_EXCLUDED_FROM_WINDOWS }} test/document_stores/ --document_store_type=pinecone
rest-and-ui:
needs:
- mypy
- pylint
strategy:
matrix:
os: [windows-latest, ubuntu-latest]
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v2
- name: Setup Python
uses: ./.github/actions/python_cache/
- name: Install REST API and UI
run: |
pip install -U rest_api/
pip install -U ui/
- name: Run tests
run: |
pytest ${{ env.PYTEST_PARAMS }} rest_api/ ui/
integration-tests-linux:
needs:
- unit-tests-linux
- elasticsearch-tests-linux
timeout-minutes: 60
strategy:
fail-fast: false # Avoid cancelling the others if one of these fails
matrix:
folder:
- "nodes"
- "pipelines"
- "modeling"
- "others"
- "document_stores/test_opensearch.py"
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Setup Python
uses: ./.github/actions/python_cache/
- name: Cache HF models
id: cache-hf-models
uses: actions/cache@v3
with:
path: ~/.cache/huggingface/transformers/
key: hf-models
- name: Download models
if: steps.cache-hf-models.outputs.cache-hit != 'true'
run: |
python -c "from transformers import AutoModel;[AutoModel.from_pretrained(model_name) for model_name in ['vblagoje/bart_lfqa','yjernite/bart_eli5', 'google/pegasus-xsum', 'vblagoje/dpr-ctx_encoder-single-lfqa-wiki', 'vblagoje/dpr-question_encoder-single-lfqa-wiki', 'facebook/dpr-question_encoder-single-nq-base', 'facebook/dpr-ctx_encoder-single-nq-base', 'yjernite/retribert-base-uncased']]"
- name: Run Elasticsearch
run: |
docker run -d -p 9200:9200 -e "discovery.type=single-node" -e "ES_JAVA_OPTS=-Xms128m -Xmx256m" elasticsearch:7.9.2
- name: Run Opensearch
run: |
docker run -d -p 9201:9200 -p 9600:9600 -e "discovery.type=single-node" opensearchproject/opensearch:1.2.4
- name: Run Milvus
run: |
cd ../../ # Avoid causing permission issues on hashFiles later by creating unreadable folders like "volumes"
wget https://github.com/milvus-io/milvus/releases/download/v2.0.0/milvus-standalone-docker-compose.yml -O docker-compose.yml
sudo docker-compose up -d
sudo docker-compose ps
- name: Run Weaviate
run: docker run -d -p 8080:8080 --name haystack_test_weaviate --env AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED='true' --env PERSISTENCE_DATA_PATH='/var/lib/weaviate' --env ENABLE_EXPERIMENTAL_BM25='true' semitechnologies/weaviate:1.14.1
- name: Run GraphDB
run: docker run -d -p 7200:7200 --name haystack_test_graphdb deepset/graphdb-free:9.4.1-adoptopenjdk11
- name: Run Apache Tika
run: docker run -d -p 9998:9998 -e "TIKA_CHILD_JAVA_OPTS=-JXms128m" -e "TIKA_CHILD_JAVA_OPTS=-JXmx128m" apache/tika:1.24.1
- name: Run Parsr
run: docker run -d -p 3001:3001 axarev/parsr:v1.2.2
- name: Install pdftotext
run: wget --no-check-certificate https://dl.xpdfreader.com/xpdf-tools-linux-4.04.tar.gz && tar -xvf xpdf-tools-linux-4.04.tar.gz && sudo cp xpdf-tools-linux-4.04/bin64/pdftotext /usr/local/bin
- name: Install tesseract
run: |
sudo apt update
sudo apt-get install tesseract-ocr libtesseract-dev poppler-utils
- name: Install audio libraries
run: |
sudo apt-get update
sudo apt-get install libsndfile1 ffmpeg
- name: Install Haystack
run: pip install .
- name: Run tests
env:
TOKENIZERS_PARALLELISM: 'false' # Avoid logspam by tokenizers
run: |
pytest ${{ env.PYTEST_PARAMS }} -m "integration" test/${{ matrix.folder }}
integration-tests-windows:
needs:
- unit-tests-windows
- elasticsearch-tests-windows
runs-on: windows-latest
if: contains(github.event.pull_request.labels.*.name, 'topic:windows') || !github.event.pull_request.draft
timeout-minutes: 30
strategy:
fail-fast: false # Avoid cancelling the others if one of these fails
matrix:
folder:
- "nodes"
- "pipelines"
- "modeling"
- "others"
steps:
- uses: actions/checkout@v2
- name: Set up Elasticsearch and pdftotext
run: |
choco install xpdf-utils
choco install openjdk11
refreshenv
choco install tesseract --pre
choco install elasticsearch --version=7.9.2
refreshenv
Get-Service elasticsearch-service-x64 | Start-Service
- name: Setup Python
uses: ./.github/actions/python_cache/
with:
prefix: windows
- name: Install Haystack
run: pip install .
- name: Run tests
env:
TOKENIZERS_PARALLELISM: 'false' # Avoid logspam by tokenizers
# FIXME many tests are disabled here!
run: |
pytest ${{ env.PYTEST_PARAMS }} -m "integration and not tika and not graphdb" ${{ env.SUITES_EXCLUDED_FROM_WINDOWS }} test/${{ matrix.folder }} --document_store_type=memory,faiss,elasticsearch