feat: separate out preview tests (#5639)

* add preview workflows

* feedback

* feedback

* use preview extra

* remove coverage and add separate e2e

* rename workflow file for consistency

* trigger ci

* undo trigger

* torch import in testing

* add deps to unit tests

* feedback

* run container instead of service

* comment

* add if statement

* fix tika version

* separate out win integration tests

* separate out all CIs

* try installing docker on macos

* exclude tika

* remove tika docker
This commit is contained in:
ZanSara 2023-09-29 13:16:08 +02:00 committed by GitHub
parent d61df24b27
commit 81b2e83d04
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 410 additions and 6 deletions

View File

@ -4,8 +4,10 @@ on:
workflow_run:
workflows:
- "end-to-end"
- "end-to-end (Preview)"
- "Linting"
- "Tests"
- "Tests (Preview)"
- "REST API Tests"
types:
- completed

View File

@ -13,6 +13,7 @@ on:
- ready_for_review
paths:
- "e2e/**/*.py"
- "!e2e/preview/**/*.py" # See e2e_preview.yml
- ".github/workflows/e2e.yml"
env:

42
.github/workflows/e2e_preview.yml vendored Normal file
View File

@ -0,0 +1,42 @@
# If you change this name also do it in ci_metrics.yml
name: end-to-end (Preview)
on:
workflow_dispatch: # Activate this workflow manually
schedule:
- cron: "0 0 * * *"
pull_request:
types:
- opened
- reopened
- synchronize
- ready_for_review
paths:
- "e2e/preview/**/*.py"
- ".github/workflows/e2e_preview.yml"
env:
PYTHON_VERSION: "3.8"
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
jobs:
run:
timeout-minutes: 60
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v4
with:
python-version: ${{ env.PYTHON_VERSION }}
- name: Install dependencies
run: |
sudo apt-get update
sudo apt install ffmpeg # for local Whisper tests
- name: Install Haystack
run: pip install .[dev,preview] langdetect transformers[torch,sentencepiece]==4.32.1 'sentence-transformers>=2.2.0' pypdf openai-whisper tika 'azure-ai-formrecognizer>=3.2.0b2'
- name: Run tests
run: pytest e2e/preview

View File

@ -17,6 +17,8 @@ on:
paths:
- "**.py"
- "pyproject.toml"
- "!haystack/preview/**/*.py" # See tests_preview.yml
- "!test/preview/**/*.py" # See tests_preview.yml
- "!.github/**/*.py"
- "!rest_api/**/*.py"
- "!docs/**/*.py"

324
.github/workflows/tests_preview.yml vendored Normal file
View File

@ -0,0 +1,324 @@
# If you change this name also do it in tests_preview_skipper.yml
name: Tests (Preview)
on:
workflow_dispatch: # Activate this workflow manually
push:
branches:
- main
# release branches have the form v1.9.x
- "v[0-9].*[0-9].x"
pull_request:
types:
- opened
- reopened
- synchronize
- ready_for_review
paths:
- "haystack/preview/**/*.py"
- "test/preview/**/*.py"
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
COHERE_API_KEY: ${{ secrets.COHERE_API_KEY }}
CORE_AZURE_CS_ENDPOINT: ${{ secrets.CORE_AZURE_CS_ENDPOINT }}
CORE_AZURE_CS_API_KEY: ${{ secrets.CORE_AZURE_CS_API_KEY }}
PYTHON_VERSION: "3.8"
jobs:
black:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
with:
python-version: ${{ env.PYTHON_VERSION }}
- name: Install Black
run: |
pip install --upgrade pip
pip install .[formatting]
- name: Check status
run: |
if ! black . --check; then
git status
echo "###################################################################################################"
echo "# "
echo "# CHECK FAILED! Black found issues with your code formatting."
echo "# "
echo "# Either:"
echo "# 1. Run Black locally before committing:"
echo "# "
echo "# pip install .[formatting]"
echo "# black ."
echo "# "
echo "# 2. Install the pre-commit hook:"
echo "# "
echo "# pre-commit install"
echo "# "
echo "# 3. See https://github.com/deepset-ai/haystack/blob/main/CONTRIBUTING.md for help."
echo "# "
echo "# If you have further problems, please open an issue: https://github.com/deepset-ai/haystack/issues"
echo "# "
echo "##################################################################################################"
exit 1
fi
- name: Calculate alert data
id: calculator
shell: bash
if: (success() || failure()) && github.ref_name == 'main'
run: |
if [ "${{ job.status }}" = "success" ]; then
echo "alert_type=success" >> "$GITHUB_OUTPUT";
else
echo "alert_type=error" >> "$GITHUB_OUTPUT";
fi
- name: Send event to Datadog
if: (success() || failure()) && github.ref_name == 'main'
uses: masci/datadog@v1
with:
api-key: ${{ secrets.CORE_DATADOG_API_KEY }}
api-url: https://api.datadoghq.eu
events: |
- title: "${{ github.workflow }} workflow"
text: "Job ${{ github.job }} in branch ${{ github.ref_name }}"
alert_type: "${{ steps.calculator.outputs.alert_type }}"
source_type_name: "Github"
host: ${{ github.repository_owner }}
tags:
- "project:${{ github.repository }}"
- "job:${{ github.job }}"
- "run_id:${{ github.run_id }}"
- "workflow:${{ github.workflow }}"
- "branch:${{ github.ref_name }}"
- "url:https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}"
unit-tests:
name: Unit / ${{ matrix.os }}
needs: black
strategy:
fail-fast: false
matrix:
os:
- ubuntu-latest
- windows-latest
- macos-latest
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
with:
python-version: ${{ env.PYTHON_VERSION }}
- name: Install Haystack
run: pip install .[dev,preview] langdetect transformers[torch,sentencepiece]==4.32.1 'sentence-transformers>=2.2.0' pypdf openai-whisper tika 'azure-ai-formrecognizer>=3.2.0b2'
- name: Run
run: pytest -m "unit" test/preview
- name: Calculate alert data
id: calculator
shell: bash
if: (success() || failure()) && github.ref_name == 'main'
run: |
if [ "${{ job.status }}" = "success" ]; then
echo "alert_type=success" >> "$GITHUB_OUTPUT";
else
echo "alert_type=error" >> "$GITHUB_OUTPUT";
fi
- name: Send event to Datadog
if: (success() || failure()) && github.ref_name == 'main'
uses: masci/datadog@v1
with:
api-key: ${{ secrets.CORE_DATADOG_API_KEY }}
api-url: https://api.datadoghq.eu
events: |
- title: "${{ github.workflow }} workflow"
text: "Job ${{ github.job }} in branch ${{ github.ref_name }}"
alert_type: "${{ steps.calculator.outputs.alert_type }}"
source_type_name: "Github"
host: ${{ github.repository_owner }}
tags:
- "project:${{ github.repository }}"
- "job:${{ github.job }}"
- "run_id:${{ github.run_id }}"
- "workflow:${{ github.workflow }}"
- "branch:${{ github.ref_name }}"
- "url:https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}"
integration-tests-linux:
name: Integration / ubuntu-latest
needs: unit-tests
strategy:
fail-fast: false
matrix:
os:
- ubuntu-latest
runs-on: ${{ matrix.os }}
services:
tika:
image: apache/tika:2.9.0.0
ports:
- 9998:9998
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
with:
python-version: ${{ env.PYTHON_VERSION }}
- name: Install dependencies
run: |
sudo apt update
sudo apt install ffmpeg # for local Whisper tests
- name: Install Haystack
run: pip install .[dev,preview] langdetect transformers[torch,sentencepiece]==4.32.1 'sentence-transformers>=2.2.0' pypdf openai-whisper tika 'azure-ai-formrecognizer>=3.2.0b2'
- name: Run
run: pytest --maxfail=5 -m "integration" test/preview
- name: Calculate alert data
id: calculator
shell: bash
if: (success() || failure()) && github.ref_name == 'main'
run: |
if [ "${{ job.status }}" = "success" ]; then
echo "alert_type=success" >> "$GITHUB_OUTPUT";
else
echo "alert_type=error" >> "$GITHUB_OUTPUT";
fi
- name: Send event to Datadog
if: (success() || failure()) && github.ref_name == 'main'
uses: masci/datadog@v1
with:
api-key: ${{ secrets.CORE_DATADOG_API_KEY }}
api-url: https://api.datadoghq.eu
events: |
- title: "${{ github.workflow }} workflow"
text: "Job ${{ github.job }} in branch ${{ github.ref_name }}"
alert_type: "${{ steps.calculator.outputs.alert_type }}"
source_type_name: "Github"
host: ${{ github.repository_owner }}
tags:
- "project:${{ github.repository }}"
- "job:${{ github.job }}"
- "run_id:${{ github.run_id }}"
- "workflow:${{ github.workflow }}"
- "branch:${{ github.ref_name }}"
- "url:https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}"
integration-tests-macos:
name: Integration / macos-latest
needs: unit-tests
runs-on: macos-latest
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
with:
python-version: ${{ env.PYTHON_VERSION }}
- name: Install dependencies
run: |
brew install ffmpeg # for local Whisper tests
brew install docker
colima start
- name: Install Haystack
run: pip install .[dev,preview] langdetect transformers[torch,sentencepiece]==4.32.1 'sentence-transformers>=2.2.0' pypdf openai-whisper tika 'azure-ai-formrecognizer>=3.2.0b2'
- name: Run Tika
run: docker run -d -p 9998:9998 apache/tika:2.9.0.0
- name: Run
run: pytest --maxfail=5 -m "integration" test/preview
- name: Calculate alert data
id: calculator
shell: bash
if: (success() || failure()) && github.ref_name == 'main'
run: |
if [ "${{ job.status }}" = "success" ]; then
echo "alert_type=success" >> "$GITHUB_OUTPUT";
else
echo "alert_type=error" >> "$GITHUB_OUTPUT";
fi
- name: Send event to Datadog
if: (success() || failure()) && github.ref_name == 'main'
uses: masci/datadog@v1
with:
api-key: ${{ secrets.CORE_DATADOG_API_KEY }}
api-url: https://api.datadoghq.eu
events: |
- title: "${{ github.workflow }} workflow"
text: "Job ${{ github.job }} in branch ${{ github.ref_name }}"
alert_type: "${{ steps.calculator.outputs.alert_type }}"
source_type_name: "Github"
host: ${{ github.repository_owner }}
tags:
- "project:${{ github.repository }}"
- "job:${{ github.job }}"
- "run_id:${{ github.run_id }}"
- "workflow:${{ github.workflow }}"
- "branch:${{ github.ref_name }}"
- "url:https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}"
integration-tests-windows:
name: Integration / windows-latest
needs: unit-tests
runs-on: windows-latest
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
with:
python-version: ${{ env.PYTHON_VERSION }}
- name: Install Haystack
run: pip install .[dev,preview] langdetect transformers[torch,sentencepiece]==4.32.1 'sentence-transformers>=2.2.0' pypdf openai-whisper tika 'azure-ai-formrecognizer>=3.2.0b2'
- name: Run
run: pytest --maxfail=5 -m "integration" test/preview -k 'not tika'
- name: Calculate alert data
id: calculator
shell: bash
if: (success() || failure()) && github.ref_name == 'main'
run: |
if [ "${{ job.status }}" = "success" ]; then
echo "alert_type=success" >> "$GITHUB_OUTPUT";
else
echo "alert_type=error" >> "$GITHUB_OUTPUT";
fi
- name: Send event to Datadog
if: (success() || failure()) && github.ref_name == 'main'
uses: masci/datadog@v1
with:
api-key: ${{ secrets.CORE_DATADOG_API_KEY }}
api-url: https://api.datadoghq.eu
events: |
- title: "${{ github.workflow }} workflow"
text: "Job ${{ github.job }} in branch ${{ github.ref_name }}"
alert_type: "${{ steps.calculator.outputs.alert_type }}"
source_type_name: "Github"
host: ${{ github.repository_owner }}
tags:
- "project:${{ github.repository }}"
- "job:${{ github.job }}"
- "run_id:${{ github.run_id }}"
- "workflow:${{ github.workflow }}"
- "branch:${{ github.ref_name }}"
- "url:https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}"

View File

@ -0,0 +1,21 @@
# If you change this name also do it in tests_preview.yml
name: Tests (Preview)
on:
pull_request:
types:
- opened
- reopened
- synchronize
- ready_for_review
paths-ignore:
- "haystack/preview/**/*.py"
- "test/preview/**/*.py"
jobs:
catch-all:
name: Catch-all check
runs-on: ubuntu-latest
steps:
- name: Skip preview tests
run: echo "Skipped!"

View File

@ -10,6 +10,8 @@ on:
- ready_for_review
paths-ignore:
- "**.py"
- "!haystack/preview/**/*.py" # See tests_preview.yml
- "!test/preview/**/*.py" # See tests_preview.yml
- "pyproject.toml"
- "!.github/**/*.py"
- "!rest_api/**/*.py"

View File

@ -1,7 +1,10 @@
import os
import random
import logging
import numpy as np
import torch
logger = logging.getLogger(__name__)
def set_all_seeds(seed: int, deterministic_cudnn: bool = False) -> None:
@ -16,9 +19,16 @@ def set_all_seeds(seed: int, deterministic_cudnn: bool = False) -> None:
"""
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
os.environ["PYTHONHASHSEED"] = str(seed)
torch.cuda.manual_seed_all(seed)
if deterministic_cudnn:
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
try:
import torch
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
if deterministic_cudnn:
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
except (ImportError, ModuleNotFoundError) as exc:
logger.info("Could not set PyTorch seed because torch is not installed. Exception: %s", exc)