diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 99bbe037f..7539bf9c5 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -114,7 +114,6 @@ jobs: - nodes - agents - cli - - preview - prompt - pipelines - utils @@ -184,6 +183,70 @@ jobs: - "branch:${{ github.ref_name }}" - "url:https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}" + unit-tests-preview: + name: Unit / preview / ${{ matrix.os }} + needs: black + strategy: + fail-fast: false + matrix: + os: + - ubuntu-latest + - windows-latest + runs-on: ${{ matrix.os }} + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v4 + with: + python-version: ${{ env.PYTHON_VERSION }} + + - name: Install Haystack + run: pip install .[preview,dev] langdetect transformers[torch,sentencepiece]==4.32.1 sentence-transformers>=2.2.0 pypdf openai-whisper tika + + - name: Run + run: pytest --cov-report xml:coverage.xml --cov="haystack" -m "unit" test/preview + + - name: Coveralls Parallel + # We upload only coverage for ubuntu as handling both os + # complicates the workflow too much for little to no gain + if: matrix.os == 'ubuntu-latest' + uses: coverallsapp/github-action@v2 + with: + path-to-lcov: coverage.xml + flag-name: preview + parallel: true + + - name: Calculate alert data + id: calculator + shell: bash + if: (success() || failure()) && github.ref_name == 'main' + run: | + if [ "${{ job.status }}" = "success" ]; then + echo "alert_type=success" >> "$GITHUB_OUTPUT"; + else + echo "alert_type=error" >> "$GITHUB_OUTPUT"; + fi + + - name: Send event to Datadog + if: (success() || failure()) && github.ref_name == 'main' + uses: masci/datadog@v1 + with: + api-key: ${{ secrets.CORE_DATADOG_API_KEY }} + api-url: https://api.datadoghq.eu + events: | + - title: "${{ github.workflow }} workflow" + text: "Job ${{ github.job }} in branch ${{ github.ref_name }}" + alert_type: "${{ steps.calculator.outputs.alert_type }}" + source_type_name: "Github" + host: ${{ github.repository_owner }} + tags: + - "project:${{ github.repository }}" + - "job:${{ github.job }}" + - "run_id:${{ github.run_id }}" + - "workflow:${{ github.workflow }}" + - "branch:${{ github.ref_name }}" + - "url:https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}" + upload-coverage: needs: unit-tests runs-on: ubuntu-latest @@ -852,8 +915,7 @@ jobs: sudo apt install ffmpeg # for local Whisper tests - name: Install Haystack - # FIXME Use haystack-ai dependency list - run: pip install .[dev,inference,file-conversion] langdetect + run: pip install .[dev,preview] langdetect transformers[torch,sentencepiece]==4.32.1 sentence-transformers>=2.2.0 pypdf openai-whisper tika - name: Run tests run: | diff --git a/haystack/preview/components/audio/whisper_local.py b/haystack/preview/components/audio/whisper_local.py index 8a6337c9c..09420d64b 100644 --- a/haystack/preview/components/audio/whisper_local.py +++ b/haystack/preview/components/audio/whisper_local.py @@ -3,10 +3,12 @@ from typing import List, Optional, Dict, Any, Union, BinaryIO, Literal, get_args import logging from pathlib import Path -import torch -import whisper - from haystack.preview import component, Document, default_to_dict, default_from_dict, ComponentError +from haystack.preview.lazy_imports import LazyImport + +with LazyImport("Run 'pip install openai-whisper'") as whisper_import: + import torch + import whisper logger = logging.getLogger(__name__) @@ -38,6 +40,7 @@ class LocalWhisperTranscriber: - `large-v2` :param device: Name of the torch device to use for inference. If None, CPU is used. """ + whisper_import.check() if model_name_or_path not in get_args(WhisperLocalModel): raise ValueError( f"Model name '{model_name_or_path}' not recognized. Choose one among: " diff --git a/haystack/preview/components/file_converters/tika.py b/haystack/preview/components/file_converters/tika.py index fe1cb31fa..ae45456dc 100644 --- a/haystack/preview/components/file_converters/tika.py +++ b/haystack/preview/components/file_converters/tika.py @@ -6,7 +6,7 @@ from haystack.preview.lazy_imports import LazyImport from haystack.preview import component, Document, default_to_dict, default_from_dict -with LazyImport("Run 'pip install farm-haystack[file-conversion]' or 'pip install tika'") as tika_import: +with LazyImport("Run 'pip install tika'") as tika_import: from tika import parser as tika_parser logger = logging.getLogger(__name__) diff --git a/haystack/preview/components/file_converters/txt.py b/haystack/preview/components/file_converters/txt.py index 03ff832d2..6f23d1c0d 100644 --- a/haystack/preview/components/file_converters/txt.py +++ b/haystack/preview/components/file_converters/txt.py @@ -8,7 +8,7 @@ from tqdm import tqdm from haystack.preview.lazy_imports import LazyImport from haystack.preview import Document, component, default_to_dict, default_from_dict -with LazyImport("Run 'pip install farm-haystack[preprocessing]'") as langdetect_import: +with LazyImport("Run 'pip install langdetect'") as langdetect_import: import langdetect diff --git a/haystack/preview/components/readers/extractive.py b/haystack/preview/components/readers/extractive.py index 32d02a85d..081646dd2 100644 --- a/haystack/preview/components/readers/extractive.py +++ b/haystack/preview/components/readers/extractive.py @@ -6,7 +6,9 @@ import warnings from haystack.preview import component, default_from_dict, default_to_dict, ComponentError, Document, ExtractedAnswer from haystack.preview.lazy_imports import LazyImport -with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_and_transformers_import: +with LazyImport( + "Run 'pip install transformers[torch,sentencepiece]==4.32.1 sentence-transformers>=2.2.0'" +) as torch_and_transformers_import: from transformers import AutoModelForQuestionAnswering, AutoTokenizer from tokenizers import Encoding import torch diff --git a/haystack/preview/embedding_backends/sentence_transformers_backend.py b/haystack/preview/embedding_backends/sentence_transformers_backend.py index c04169ead..55a1b3dfb 100644 --- a/haystack/preview/embedding_backends/sentence_transformers_backend.py +++ b/haystack/preview/embedding_backends/sentence_transformers_backend.py @@ -2,7 +2,7 @@ from typing import List, Optional, Union, Dict from haystack.preview.lazy_imports import LazyImport -with LazyImport(message="Run 'pip install farm-haystack[inference]'") as sentence_transformers_import: +with LazyImport(message="Run 'pip install sentence-transformers>=2.2.0'") as sentence_transformers_import: from sentence_transformers import SentenceTransformer diff --git a/pyproject.toml b/pyproject.toml index f556bce8d..df689a098 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -78,13 +78,6 @@ dependencies = [ # Schema validation "jsonschema", - # Preview - "canals==0.8.0", - "openai", - "Jinja2", - "openai-whisper", # FIXME https://github.com/deepset-ai/haystack/issues/5731 - "pypdf", - # Agent events "events", @@ -92,6 +85,18 @@ dependencies = [ ] [project.optional-dependencies] +preview = [ + "canals==0.8.0", + "requests", + "pandas", + "rank_bm25", + "tqdm", + "tenacity", + "lazy-imports", + + "Jinja2", + "openai", +] inference = [ "transformers[torch,sentencepiece]==4.32.1", "sentence-transformers>=2.2.0", # See haystack/nodes/retriever/_embedding_encoder.py, _SentenceTransformersEmbeddingEncoder @@ -240,11 +245,11 @@ formatting = [ ] all = [ - "farm-haystack[inference,docstores,audio,crawler,preprocessing,file-conversion,pdf,ocr,ray,onnx,beir,metrics,aws]", + "farm-haystack[inference,docstores,audio,crawler,preprocessing,file-conversion,pdf,ocr,ray,onnx,beir,metrics,aws,preview]", ] all-gpu = [ # beir is incompatible with faiss-gpu: https://github.com/beir-cellar/beir/issues/71 - "farm-haystack[inference,docstores-gpu,audio,crawler,preprocessing,file-conversion,pdf,ocr,ray,onnx-gpu,metrics,aws]", + "farm-haystack[inference,docstores-gpu,audio,crawler,preprocessing,file-conversion,pdf,ocr,ray,onnx-gpu,metrics,aws,preview]", ] [project.scripts] diff --git a/releasenotes/notes/preview-extra-6dfdca55d17cbc7f.yaml b/releasenotes/notes/preview-extra-6dfdca55d17cbc7f.yaml new file mode 100644 index 000000000..223e2cbef --- /dev/null +++ b/releasenotes/notes/preview-extra-6dfdca55d17cbc7f.yaml @@ -0,0 +1,4 @@ +preview: + - | + Create a dedicated dependency list for the preview package, `farm-haystack[preview]`. + Using `haystack-ai` is still the recommended way to test Haystack 2.0.