name: Linux CI on: # Activate this workflow manually workflow_dispatch: # Activate this workflow when the PR is opened and code is added to it # Note: using pull_request instead of push to keep the CI workflows # running on our repo, not the contributor's. See autoformat.yml pull_request: types: - opened - synchronize paths-ignore: - '**/*.md' - '**/*.txt' - '**/*.png' - '**/*.gif' # Activate this workflow on every push of code changes on master push: branches: - master paths-ignore: - '**/*.md' - '**/*.txt' - '**/*.png' - '**/*.gif' jobs: build-cache: runs-on: ubuntu-20.04 timeout-minutes: 45 steps: - run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_ENV - uses: actions/checkout@v2 - uses: actions/setup-python@v2 with: python-version: 3.7 - name: Cache id: cache-python-env uses: actions/cache@v2 with: path: ${{ env.pythonLocation }} # The cache will be rebuild every day and at every change of the dependency files key: linux-${{ env.date }}-${{ hashFiles('**/setup.py') }}-${{ hashFiles('**/setup.cfg') }}-${{ hashFiles('**/pyproject.toml') }} - name: Install dependencies if: steps.cache-python-env.outputs.cache-hit != 'true' run: | pip install --upgrade pip pip install .[test] pip install rest_api/ pip install ui/ pip install torch-scatter -f https://data.pyg.org/whl/torch-1.11.0+cpu.html echo "=== pip freeze ===" pip freeze type-check: runs-on: ubuntu-20.04 timeout-minutes: 45 steps: - uses: actions/checkout@v2 - uses: actions/setup-python@v2 with: # Mypy can't run properly on 3.7 as it misses support for Literal types. # FIXME once we drop support for 3.7, use the cache. python-version: 3.8 - name: Setup mypy run: | # FIXME installing the packages before running mypy raises # a lot of errors which were never detected before! # pip install . # pip install rest_api/ # pip install ui/ # FIXME --install-types does not work properly yet, see https://github.com/python/mypy/issues/10600 # Hotfixing by installing type packages explicitly. # Run mypy --install-types haystack locally to ensure the list is still up to date # mypy --install-types --non-interactive . pip install mypy pydantic types-Markdown types-PyYAML types-requests types-setuptools types-six types-tabulate types-chardet types-emoji types-protobuf - name: Test with mypy run: | echo "=== haystack/ ===" mypy haystack echo "=== rest_api/ ===" mypy rest_api --exclude=rest_api/build/ --exclude=rest_api/test/ echo "=== ui/ ===" mypy ui --exclude=ui/build/ --exclude=ui/test/ linter: runs-on: ubuntu-20.04 timeout-minutes: 45 steps: - uses: actions/checkout@v2 - run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_ENV - uses: actions/setup-python@v2 with: python-version: 3.7 - name: Cache Python uses: actions/cache@v2 with: path: ${{ env.pythonLocation }} key: linux-${{ env.date }}-${{ hashFiles('**/setup.py') }}-${{ hashFiles('**/setup.cfg') }}-${{ hashFiles('**/pyproject.toml') }} - name: Install Dependencies (on cache miss only) # The cache might miss during the execution of an action: there should always be a fallback step to # rebuild it in case it goes missing if: steps.cache.outputs.cache-hit != 'true' run: | pip install --upgrade pip pip install .[all] pip install rest_api/ pip install ui/ pip install torch-scatter -f https://data.pyg.org/whl/torch-1.11.0+cpu.html echo "=== pip freeze ===" pip freeze - name: Linter run: | pylint -ry haystack/ pylint -ry rest_api/ pylint -ry ui/ # This CI action mirrors autoformat.yml, with the difference that it # runs on Haystack's end. If the contributor hasn't run autoformat.yml, # then this check will fail. code-and-docs-check: needs: build-cache runs-on: ubuntu-latest timeout-minutes: 45 if: ${{ github.event_name }} != "push" steps: - run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_ENV - uses: actions/checkout@v2 with: fetch-depth: 0 repository: ${{github.event.pull_request.head.repo.full_name}} ref: ${{ github.head_ref }} - name: Set up Python 3.7 uses: actions/setup-python@v2 with: python-version: 3.7 - name: Cache Python uses: actions/cache@v2 with: path: ${{ env.pythonLocation }} key: linux-${{ env.date }}-${{ hashFiles('**/setup.py') }}-${{ hashFiles('**/setup.cfg') }}-${{ hashFiles('**/pyproject.toml') }} - name: Install Dependencies (on cache miss only) # The cache might miss during the execution of an action: there should always be a fallback step to # rebuild it in case it goes missing if: steps.cache.outputs.cache-hit != 'true' run: | pip install --upgrade pip pip install .[all] pip install rest_api/ pip install ui/ pip install torch-scatter -f https://data.pyg.org/whl/torch-1.11.0+cpu.html # Get any additional commit that might have been pushed in the meantime - name: Pull changes (if any) run: git pull origin ${{ github.head_ref }} - name: Code and Docs Updates run: ./.github/utils/code_and_docs.sh # If there is anything to commit, fail - name: Check git status run: | if [[ `git status --porcelain` ]]; then git status echo "" echo "This means that the 'autoformat.yml' action didn't run." echo "Please enable GitHub Action on your fork to pass this check!" echo "See https://github.com/deepset-ai/haystack/blob/master/CONTRIBUTING.md#forks for instructions" exit 1 fi prepare-matrix: needs: build-cache runs-on: ubuntu-20.04 timeout-minutes: 45 steps: - uses: actions/checkout@v2 - id: set-matrix run: | find $(find . -type d -name test -not -path "./*env*/*") -type f -name test_*.py | jq -SR . | jq -cs . echo "::set-output name=matrix::$(find $(find . -type d -name test -not -path "./*env*/*") -type f -name test_*.py | jq -SR . | jq -cs .)" outputs: matrix: ${{ steps.set-matrix.outputs.matrix }} tests: needs: prepare-matrix runs-on: ubuntu-20.04 timeout-minutes: 45 strategy: matrix: test-path: ${{fromJson(needs.prepare-matrix.outputs.matrix)}} fail-fast: false steps: - uses: actions/checkout@v2 - run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_ENV - name: Set up Python 3.7 uses: actions/setup-python@v2 with: python-version: 3.7 - name: Cache Python uses: actions/cache@v2 with: path: ${{ env.pythonLocation }} key: linux-${{ env.date }}-${{ hashFiles('**/setup.py') }}-${{ hashFiles('**/setup.cfg') }}-${{ hashFiles('**/pyproject.toml') }} - name: Run Elasticsearch run: docker run -d -p 9200:9200 -e "discovery.type=single-node" -e "ES_JAVA_OPTS=-Xms128m -Xmx128m" elasticsearch:7.9.2 - name: Run Opensearch run: docker run -d -p 9201:9200 -p 9600:9600 -e "discovery.type=single-node" opensearchproject/opensearch:1.2.4 - name: Run Milvus run: | cd ../../ # Avoid causing permission issues on hashFiles later by creating unreadable folders like "volumes" wget https://github.com/milvus-io/milvus/releases/download/v2.0.0/milvus-standalone-docker-compose.yml -O docker-compose.yml sudo docker-compose up -d sudo docker-compose ps - name: Run Weaviate run: docker run -d -p 8080:8080 --name haystack_test_weaviate --env AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED='true' --env PERSISTENCE_DATA_PATH='/var/lib/weaviate' semitechnologies/weaviate:1.11.0 - name: Run GraphDB run: docker run -d -p 7200:7200 --name haystack_test_graphdb deepset/graphdb-free:9.4.1-adoptopenjdk11 - name: Run Apache Tika run: docker run -d -p 9998:9998 -e "TIKA_CHILD_JAVA_OPTS=-JXms128m" -e "TIKA_CHILD_JAVA_OPTS=-JXmx128m" apache/tika:1.24.1 - name: Run Parsr run: docker run -d -p 3001:3001 axarev/parsr:v1.2.2 - name: Install pdftotext run: wget --no-check-certificate https://dl.xpdfreader.com/xpdf-tools-linux-4.04.tar.gz && tar -xvf xpdf-tools-linux-4.04.tar.gz && sudo cp xpdf-tools-linux-4.04/bin64/pdftotext /usr/local/bin - name: Install tesseract run: | sudo apt update sudo apt-get install tesseract-ocr libtesseract-dev poppler-utils - name: Install Dependencies (on cache miss only) # The cache might miss during the execution of an action: there should always be a fallback step to # rebuild it in case it goes missing if: steps.cache.outputs.cache-hit != 'true' run: | pip install --upgrade pip pip install .[all] pip install rest_api/ pip install ui/ pip install torch-scatter -f https://data.pyg.org/whl/torch-1.11.0+cpu.html # Haystack needs to be reinstalled at this stage to make sure the current commit's version is the one getting tested. # The cache can last way longer than a specific action's run, so older Haystack version could be carried over. - name: Reinstall Haystack run: | pip install .[test] pip install rest_api/ pip install ui/ - name: Run tests run: pytest -s ${{ matrix.test-path }} test-milvus1: needs: build-cache runs-on: ubuntu-20.04 timeout-minutes: 45 steps: - uses: actions/checkout@v2 - run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_ENV - name: Set up Python 3.7 uses: actions/setup-python@v2 with: python-version: 3.7 - name: Cache Python uses: actions/cache@v2 with: path: ${{ env.pythonLocation }} key: linux-${{ env.date }}-${{ hashFiles('**/setup.py') }}-${{ hashFiles('**/setup.cfg') }}-${{ hashFiles('**/pyproject.toml') }} - name: Run Milvus 1 run: docker run -d -p 19530:19530 -p 19121:19121 milvusdb/milvus:1.1.0-cpu-d050721-5e559c - name: Install pdftotext run: wget --no-check-certificate https://dl.xpdfreader.com/xpdf-tools-linux-4.04.tar.gz && tar -xvf xpdf-tools-linux-4.04.tar.gz && sudo cp xpdf-tools-linux-4.04/bin64/pdftotext /usr/local/bin - name: Install Dependencies (on cache miss only) # The cache might miss during the execution of an action: there should always be a fallback step to # rebuild it in case it goes missing if: steps.cache.outputs.cache-hit != 'true' run: | pip install --upgrade pip pip install .[test] pip install torch-scatter -f https://data.pyg.org/whl/torch-1.11.0+cpu.html # Haystack needs to be reinstalled at this stage to make sure the current commit's version is the one getting tested. # The cache can last way longer than a specific action's run, so older Haystack version could be carried over. - name: Reinstall Haystack run: | pip install .[test] pip install .[milvus1] - name: Run Milvus1 tests run: | export MILVUS1_ENABLED=1 pytest -s test/document_stores/test_document_store.py test/pipelines/test_eval.py test/document_stores/test_faiss_and_milvus.py test/pipelines/test_pipeline.py test/nodes/test_retriever.py test/pipelines/test_standard_pipelines.py --document_store_type="milvus1" test-pinecone: needs: build-cache runs-on: ubuntu-20.04 # we have to recreate indexes multiple times: that takes a significant amount of time on pinecone timeout-minutes: 60 steps: - uses: actions/checkout@v2 - run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_ENV - name: Set up Python 3.7 uses: actions/setup-python@v2 with: python-version: 3.7 - name: Cache Python uses: actions/cache@v2 with: path: ${{ env.pythonLocation }} key: linux-${{ env.date }}-${{ hashFiles('**/setup.py') }}-${{ hashFiles('**/setup.cfg') }}-${{ hashFiles('**/pyproject.toml') }} - name: Install pdftotext run: wget --no-check-certificate https://dl.xpdfreader.com/xpdf-tools-linux-4.04.tar.gz && tar -xvf xpdf-tools-linux-4.04.tar.gz && sudo cp xpdf-tools-linux-4.04/bin64/pdftotext /usr/local/bin # Haystack needs to be reinstalled at this stage to make sure the current commit's version is the one getting tested. # The cache can last way longer than a specific action's run, so older Haystack version could be carried over. - name: Reinstall Haystack run: | pip install .[test] - name: Files related to Pinecone id: pinecone-files uses: tj-actions/changed-files@v18.4 with: files: | **/document_stores/pinecone.py **/document_stores/sql.py **/document_stores/base.py **/document_stores/filter_utils.py - name: Run tests env: PINECONE_API_KEY: ${{ secrets.PINECONE_API_KEY }} if: steps.pinecone-files.outputs.any_changed == 'true' run: | pytest -s test/document_stores/test_document_store.py test/pipelines/test_pipeline.py test/pipelines/test_standard_pipelines.py test/pipelines/test_pipeline_extractive_qa.py --document_store_type="pinecone"