feat!: Add extra for inference dependencies such as torch (#5147)

* feat!: add extra for inference dependencies such as torch

* add inference extra to 'all' and 'all-gpu' extra

* install inference extra in selected integration tests

* import LazyImport

* review feedback

* add import error messages and update readme

* remove extra dot
This commit is contained in:
Julian Risch 2023-06-20 09:54:10 +02:00 committed by GitHub
parent 916e8452f5
commit 30fdf2b5df
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
29 changed files with 49 additions and 38 deletions

View File

@ -210,7 +210,7 @@ jobs:
python-version: ${{ env.PYTHON_VERSION }}
- name: Install Haystack
run: pip install .[elasticsearch,dev,preprocessing]
run: pip install .[elasticsearch,dev,preprocessing,inference]
- name: Run tests
run: |
@ -608,7 +608,7 @@ jobs:
python-version: ${{ env.PYTHON_VERSION }}
- name: Install Haystack
run: pip install .[dev,preprocessing]
run: pip install .[dev,preprocessing,inference]
- name: Run tests
run: |
@ -662,7 +662,7 @@ jobs:
python-version: ${{ env.PYTHON_VERSION }}
- name: Install Haystack
run: pip install .[dev,preprocessing]
run: pip install .[dev,preprocessing,inference]
- name: Run tests
run: |
@ -716,7 +716,7 @@ jobs:
python-version: ${{ env.PYTHON_VERSION }}
- name: Install Haystack
run: pip install .[dev,preprocessing]
run: pip install .[dev,preprocessing,inference]
- name: Run tests
run: |

View File

@ -76,13 +76,18 @@ This command installs everything needed for basic Pipelines that use an in-memor
**Full Installation**
To use more advanced features, like certain DocumentStores, FileConverters, OCR, or Ray,
To use more advanced features, like certain DocumentStores, FileConverters, OCR, local inference with pytorch, or Ray,
you need to install further dependencies. The following command installs the [latest release](https://github.com/deepset-ai/haystack/releases) of Haystack and all its dependencies:
```sh
pip install 'farm-haystack[all]' ## or 'all-gpu' for the GPU-enabled dependencies
```
If you want to install only the dependencies needed for model inference on your local hardware (not remote API endpoints), such as torch and sentence-transformers, you can use the following command:
```sh
pip install 'farm-haystack[inference]' ## installs torch, sentence-transformers, sentencepiece, and huggingface-hub
```
If you want to try out the newest features that are not in an official release yet, you can install the unstable version from the main branch with the following command:
```sh

View File

@ -25,7 +25,7 @@ from haystack.lazy_imports import LazyImport
logger = logging.getLogger(__name__)
with LazyImport() as torch_import:
with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_import:
import torch
from haystack.modeling.utils import initialize_device_settings # pylint: disable=ungrouped-imports
@ -92,6 +92,7 @@ class InMemoryDocumentStore(KeywordDocumentStore):
You can learn more about these parameters by visiting https://github.com/dorianbrown/rank_bm25
By default, no parameters are set.
"""
torch_import.check()
if bm25_parameters is None:
bm25_parameters = {}
super().__init__()

View File

@ -8,7 +8,7 @@ from typing import Any, Dict, Optional
from haystack import __version__
from haystack.lazy_imports import LazyImport
with LazyImport() as torch_import:
with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_import:
import torch
with LazyImport() as transformers_import:

View File

@ -32,7 +32,7 @@ logger = logging.getLogger(__name__)
SPECIAL_TOKENIZER_CHARS = r"^(##|Ġ|▁)"
with LazyImport(message="Run 'pip install transformers[torch,sentencepiece]'.") as transformers_import:
with LazyImport(message="Run 'pip install farm-haystack[inference]'") as transformers_import:
import transformers
from transformers import PreTrainedTokenizer, RobertaTokenizer, AutoConfig, AutoFeatureExtractor, AutoTokenizer

View File

@ -13,7 +13,7 @@ from haystack.lazy_imports import LazyImport
logger = logging.getLogger(__name__)
with LazyImport() as torch_and_transformers_import:
with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_and_transformers_import:
import torch
from transformers import (
RagTokenizer,

View File

@ -11,7 +11,7 @@ from haystack.utils.import_utils import is_whisper_available
from haystack.lazy_imports import LazyImport
with LazyImport() as torch_import:
with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_import:
import torch

View File

@ -12,7 +12,7 @@ from haystack.lazy_imports import LazyImport
logger = logging.getLogger(__name__)
with LazyImport() as torch_and_transformers_import:
with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_and_transformers_import:
import torch
from transformers import pipeline
from haystack.modeling.utils import initialize_device_settings # pylint: disable=ungrouped-imports

View File

@ -8,7 +8,7 @@ from haystack.schema import Document
from haystack.nodes.document_classifier.base import BaseDocumentClassifier
from haystack.lazy_imports import LazyImport
with LazyImport() as torch_and_transformers_import:
with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_and_transformers_import:
import torch
from transformers import pipeline
from haystack.modeling.utils import initialize_device_settings # pylint: disable=ungrouped-imports

View File

@ -33,7 +33,7 @@ from haystack.lazy_imports import LazyImport
logger = logging.getLogger(__name__)
with LazyImport() as torch_and_transformers_import:
with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_and_transformers_import:
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import AutoTokenizer, AutoModelForTokenClassification

View File

@ -13,7 +13,7 @@ from haystack.lazy_imports import LazyImport
logger = logging.getLogger(__name__)
with LazyImport() as torch_and_transformers_import:
with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_and_transformers_import:
import torch
from transformers import pipeline
from haystack.modeling.utils import initialize_device_settings # pylint: disable=ungrouped-imports

View File

@ -13,7 +13,7 @@ from haystack.lazy_imports import LazyImport
logger = logging.getLogger(__name__)
with LazyImport() as torch_and_transformers_import:
with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_and_transformers_import:
import torch
from sentence_transformers import CrossEncoder
from haystack.modeling.utils import initialize_device_settings # pylint: disable=ungrouped-imports

View File

@ -10,7 +10,7 @@ from haystack.lazy_imports import LazyImport
logger = logging.getLogger(__name__)
with LazyImport() as torch_and_transformers_import:
with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_and_transformers_import:
import torch
from transformers import (
pipeline,

View File

@ -7,7 +7,7 @@ from haystack.nodes.prompt.invocation_layer import PromptModelInvocationLayer
from haystack.schema import Document, MultiLabel
from haystack.lazy_imports import LazyImport
with LazyImport() as torch_import:
with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_import:
import torch

View File

@ -10,7 +10,7 @@ from haystack.nodes.prompt.prompt_model import PromptModel
from haystack.nodes.prompt.prompt_template import PromptTemplate
from haystack.lazy_imports import LazyImport
with LazyImport() as torch_import:
with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_import:
import torch

View File

@ -11,7 +11,7 @@ from haystack.lazy_imports import LazyImport
logger = logging.getLogger(__name__)
with LazyImport() as torch_and_transformers_import:
with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_and_transformers_import:
import torch
from transformers import pipeline
from haystack.modeling.utils import initialize_device_settings # pylint: disable=ungrouped-imports

View File

@ -14,7 +14,7 @@ from haystack.lazy_imports import LazyImport
logger = logging.getLogger(__name__)
with LazyImport() as torch_and_transformers_import:
with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_and_transformers_import:
import torch
from transformers import AutoModelForSeq2SeqLM
from transformers import AutoTokenizer
@ -93,6 +93,7 @@ class QuestionGenerator(BaseComponent):
parameter is not used and a single CPU device is used for inference.
"""
torch_and_transformers_import.check()
super().__init__()
self.devices, _ = initialize_device_settings(devices=devices, use_cuda=use_gpu, multi_gpu=False)
if len(self.devices) > 1:

View File

@ -13,7 +13,7 @@ from haystack.lazy_imports import LazyImport
logger = logging.getLogger(__name__)
with LazyImport() as torch_and_transformers_import:
with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_and_transformers_import:
import torch
from torch.nn import DataParallel
from transformers import AutoModelForSequenceClassification, AutoTokenizer

View File

@ -24,7 +24,7 @@ from haystack.lazy_imports import LazyImport
logger = logging.getLogger(__name__)
with LazyImport() as torch_and_transformers_import:
with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_and_transformers_import:
import torch
from haystack.modeling.data_handler.data_silo import DataSilo, DistillationDataSilo
from haystack.modeling.data_handler.processor import SquadProcessor, Processor

View File

@ -18,7 +18,7 @@ logger = logging.getLogger(__name__)
TableQuestionAnsweringPipeline = object
TapasPreTrainedModel = object
with LazyImport() as torch_and_transformers_import:
with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_and_transformers_import:
import torch
from transformers import ( # type: ignore
TapasTokenizer,

View File

@ -12,7 +12,7 @@ from haystack.lazy_imports import LazyImport
logger = logging.getLogger(__name__)
with LazyImport() as torch_and_transformers_import:
with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_and_transformers_import:
import torch
from transformers import pipeline
from transformers.data.processors.squad import SquadExample

View File

@ -31,7 +31,7 @@ if TYPE_CHECKING:
logger = logging.getLogger(__name__)
with LazyImport() as torch_and_transformers_import:
with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_and_transformers_import:
import torch
from sentence_transformers import InputExample, SentenceTransformer
from torch.utils.data import DataLoader

View File

@ -26,7 +26,7 @@ from haystack.lazy_imports import LazyImport
logger = logging.getLogger(__name__)
with LazyImport() as torch_and_transformers_import:
with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_and_transformers_import:
import torch
from torch.nn import DataParallel
from torch.utils.data.sampler import SequentialSampler

View File

@ -15,7 +15,7 @@ from haystack.lazy_imports import LazyImport
logger = logging.getLogger(__name__)
with LazyImport() as torch_and_transformers_import:
with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_and_transformers_import:
import torch
from haystack.utils.torch_utils import get_devices # pylint: disable=ungrouped-imports
from haystack.modeling.model.multimodal import get_model # pylint: disable=ungrouped-imports

View File

@ -15,7 +15,7 @@ from haystack.lazy_imports import LazyImport
logger = logging.getLogger(__name__)
with LazyImport() as torch_and_transformers_import:
with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_and_transformers_import:
import torch

View File

@ -11,7 +11,7 @@ from haystack.lazy_imports import LazyImport
logger = logging.getLogger(__name__)
with LazyImport() as torch_and_transformers_import:
with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_and_transformers_import:
import torch
from sentence_transformers import CrossEncoder
from haystack.modeling.utils import initialize_device_settings # pylint: disable=ungrouped-imports

View File

@ -13,7 +13,7 @@ from haystack.lazy_imports import LazyImport
logger = logging.getLogger(__name__)
with LazyImport() as torch_and_transformers_import:
with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_and_transformers_import:
import torch
from transformers import pipeline
from haystack.modeling.utils import initialize_device_settings # pylint: disable=ungrouped-imports

View File

@ -13,7 +13,7 @@ from haystack.lazy_imports import LazyImport
logger = logging.getLogger(__name__)
with LazyImport() as torch_and_transformers_import:
with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_and_transformers_import:
import torch
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
from haystack.modeling.utils import initialize_device_settings # pylint: disable=ungrouped-imports

View File

@ -48,7 +48,7 @@ classifiers = [
dependencies = [
"requests",
"pydantic",
"transformers[torch,sentencepiece]==4.30.1",
"transformers==4.30.1",
"pandas",
"rank_bm25",
"scikit-learn>=1.0.0", # TF-IDF, SklearnQueryClassifier and metrics
@ -62,16 +62,15 @@ dependencies = [
"quantulum3", # quantities extraction from text
"posthog", # telemetry
# audio's espnet-model-zoo requires huggingface-hub version <0.8 while we need >=0.5 to be able to use create_repo in FARMReader
"huggingface-hub>=0.5.0",
"tenacity", # retry decorator
"sseclient-py", # server side events for OpenAI streaming
"more_itertools", # utilities
# Web Retriever
"boilerpy3",
# See haystack/nodes/retriever/_embedding_encoder.py, _SentenceTransformersEmbeddingEncoder
"sentence-transformers>=2.2.0",
# Multimodal Embedder haystack/nodes/retriever/multimodal/embedder.py
"Pillow",
# OpenAI tokenizer
"tiktoken>=0.3.2",
@ -89,6 +88,11 @@ dependencies = [
]
[project.optional-dependencies]
inference = [
"transformers[torch,sentencepiece]==4.30.1",
"sentence-transformers>=2.2.0", # See haystack/nodes/retriever/_embedding_encoder.py, _SentenceTransformersEmbeddingEncoder
"huggingface-hub>=0.5.0",
]
elasticsearch = [
"elasticsearch>=7.17,<8",
]
@ -212,11 +216,11 @@ formatting = [
]
all = [
"farm-haystack[docstores,audio,crawler,preprocessing,file-conversion,pdf,ocr,ray,onnx,beir,metrics]",
"farm-haystack[inference,docstores,audio,crawler,preprocessing,file-conversion,pdf,ocr,ray,onnx,beir,metrics]",
]
all-gpu = [
# beir is incompatible with faiss-gpu: https://github.com/beir-cellar/beir/issues/71
"farm-haystack[docstores-gpu,audio,crawler,preprocessing,file-conversion,pdf,ocr,ray,onnx-gpu,metrics]",
"farm-haystack[inference,docstores-gpu,audio,crawler,preprocessing,file-conversion,pdf,ocr,ray,onnx-gpu,metrics]",
]
[project.scripts]