feat: preview extra (#5869)

* copy the deps list over from haystack-ai

* fix lazyimport usage

* keep jinja and openai

* fix ci

* reno

* separate out preview unit tests

* fix import error message for tika

* tika

* add preview to all

* wrap torch

* remove comment

* unwrap openai and jinja
This commit is contained in:
ZanSara 2023-09-26 12:48:15 +02:00 committed by GitHub
parent e9d34fc0e3
commit 6cb7d16e22
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 95 additions and 19 deletions

View File

@ -114,7 +114,6 @@ jobs:
- nodes
- agents
- cli
- preview
- prompt
- pipelines
- utils
@ -184,6 +183,70 @@ jobs:
- "branch:${{ github.ref_name }}"
- "url:https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}"
unit-tests-preview:
name: Unit / preview / ${{ matrix.os }}
needs: black
strategy:
fail-fast: false
matrix:
os:
- ubuntu-latest
- windows-latest
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v4
with:
python-version: ${{ env.PYTHON_VERSION }}
- name: Install Haystack
run: pip install .[preview,dev] langdetect transformers[torch,sentencepiece]==4.32.1 sentence-transformers>=2.2.0 pypdf openai-whisper tika
- name: Run
run: pytest --cov-report xml:coverage.xml --cov="haystack" -m "unit" test/preview
- name: Coveralls Parallel
# We upload only coverage for ubuntu as handling both os
# complicates the workflow too much for little to no gain
if: matrix.os == 'ubuntu-latest'
uses: coverallsapp/github-action@v2
with:
path-to-lcov: coverage.xml
flag-name: preview
parallel: true
- name: Calculate alert data
id: calculator
shell: bash
if: (success() || failure()) && github.ref_name == 'main'
run: |
if [ "${{ job.status }}" = "success" ]; then
echo "alert_type=success" >> "$GITHUB_OUTPUT";
else
echo "alert_type=error" >> "$GITHUB_OUTPUT";
fi
- name: Send event to Datadog
if: (success() || failure()) && github.ref_name == 'main'
uses: masci/datadog@v1
with:
api-key: ${{ secrets.CORE_DATADOG_API_KEY }}
api-url: https://api.datadoghq.eu
events: |
- title: "${{ github.workflow }} workflow"
text: "Job ${{ github.job }} in branch ${{ github.ref_name }}"
alert_type: "${{ steps.calculator.outputs.alert_type }}"
source_type_name: "Github"
host: ${{ github.repository_owner }}
tags:
- "project:${{ github.repository }}"
- "job:${{ github.job }}"
- "run_id:${{ github.run_id }}"
- "workflow:${{ github.workflow }}"
- "branch:${{ github.ref_name }}"
- "url:https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}"
upload-coverage:
needs: unit-tests
runs-on: ubuntu-latest
@ -852,8 +915,7 @@ jobs:
sudo apt install ffmpeg # for local Whisper tests
- name: Install Haystack
# FIXME Use haystack-ai dependency list
run: pip install .[dev,inference,file-conversion] langdetect
run: pip install .[dev,preview] langdetect transformers[torch,sentencepiece]==4.32.1 sentence-transformers>=2.2.0 pypdf openai-whisper tika
- name: Run tests
run: |

View File

@ -3,10 +3,12 @@ from typing import List, Optional, Dict, Any, Union, BinaryIO, Literal, get_args
import logging
from pathlib import Path
import torch
import whisper
from haystack.preview import component, Document, default_to_dict, default_from_dict, ComponentError
from haystack.preview.lazy_imports import LazyImport
with LazyImport("Run 'pip install openai-whisper'") as whisper_import:
import torch
import whisper
logger = logging.getLogger(__name__)
@ -38,6 +40,7 @@ class LocalWhisperTranscriber:
- `large-v2`
:param device: Name of the torch device to use for inference. If None, CPU is used.
"""
whisper_import.check()
if model_name_or_path not in get_args(WhisperLocalModel):
raise ValueError(
f"Model name '{model_name_or_path}' not recognized. Choose one among: "

View File

@ -6,7 +6,7 @@ from haystack.preview.lazy_imports import LazyImport
from haystack.preview import component, Document, default_to_dict, default_from_dict
with LazyImport("Run 'pip install farm-haystack[file-conversion]' or 'pip install tika'") as tika_import:
with LazyImport("Run 'pip install tika'") as tika_import:
from tika import parser as tika_parser
logger = logging.getLogger(__name__)

View File

@ -8,7 +8,7 @@ from tqdm import tqdm
from haystack.preview.lazy_imports import LazyImport
from haystack.preview import Document, component, default_to_dict, default_from_dict
with LazyImport("Run 'pip install farm-haystack[preprocessing]'") as langdetect_import:
with LazyImport("Run 'pip install langdetect'") as langdetect_import:
import langdetect

View File

@ -6,7 +6,9 @@ import warnings
from haystack.preview import component, default_from_dict, default_to_dict, ComponentError, Document, ExtractedAnswer
from haystack.preview.lazy_imports import LazyImport
with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_and_transformers_import:
with LazyImport(
"Run 'pip install transformers[torch,sentencepiece]==4.32.1 sentence-transformers>=2.2.0'"
) as torch_and_transformers_import:
from transformers import AutoModelForQuestionAnswering, AutoTokenizer
from tokenizers import Encoding
import torch

View File

@ -2,7 +2,7 @@ from typing import List, Optional, Union, Dict
from haystack.preview.lazy_imports import LazyImport
with LazyImport(message="Run 'pip install farm-haystack[inference]'") as sentence_transformers_import:
with LazyImport(message="Run 'pip install sentence-transformers>=2.2.0'") as sentence_transformers_import:
from sentence_transformers import SentenceTransformer

View File

@ -78,13 +78,6 @@ dependencies = [
# Schema validation
"jsonschema",
# Preview
"canals==0.8.0",
"openai",
"Jinja2",
"openai-whisper", # FIXME https://github.com/deepset-ai/haystack/issues/5731
"pypdf",
# Agent events
"events",
@ -92,6 +85,18 @@ dependencies = [
]
[project.optional-dependencies]
preview = [
"canals==0.8.0",
"requests",
"pandas",
"rank_bm25",
"tqdm",
"tenacity",
"lazy-imports",
"Jinja2",
"openai",
]
inference = [
"transformers[torch,sentencepiece]==4.32.1",
"sentence-transformers>=2.2.0", # See haystack/nodes/retriever/_embedding_encoder.py, _SentenceTransformersEmbeddingEncoder
@ -240,11 +245,11 @@ formatting = [
]
all = [
"farm-haystack[inference,docstores,audio,crawler,preprocessing,file-conversion,pdf,ocr,ray,onnx,beir,metrics,aws]",
"farm-haystack[inference,docstores,audio,crawler,preprocessing,file-conversion,pdf,ocr,ray,onnx,beir,metrics,aws,preview]",
]
all-gpu = [
# beir is incompatible with faiss-gpu: https://github.com/beir-cellar/beir/issues/71
"farm-haystack[inference,docstores-gpu,audio,crawler,preprocessing,file-conversion,pdf,ocr,ray,onnx-gpu,metrics,aws]",
"farm-haystack[inference,docstores-gpu,audio,crawler,preprocessing,file-conversion,pdf,ocr,ray,onnx-gpu,metrics,aws,preview]",
]
[project.scripts]

View File

@ -0,0 +1,4 @@
preview:
- |
Create a dedicated dependency list for the preview package, `farm-haystack[preview]`.
Using `haystack-ai` is still the recommended way to test Haystack 2.0.