feat!: Add extra for inference dependencies such as torch (#5147)

* feat!: add extra for inference dependencies such as torch

* add inference extra to 'all' and 'all-gpu' extra

* install inference extra in selected integration tests

* import LazyImport

* review feedback

* add import error messages and update readme

* remove extra dot
This commit is contained in:
Julian Risch 2023-06-20 09:54:10 +02:00 committed by GitHub
parent 916e8452f5
commit 30fdf2b5df
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
29 changed files with 49 additions and 38 deletions

View File

@ -210,7 +210,7 @@ jobs:
python-version: ${{ env.PYTHON_VERSION }} python-version: ${{ env.PYTHON_VERSION }}
- name: Install Haystack - name: Install Haystack
run: pip install .[elasticsearch,dev,preprocessing] run: pip install .[elasticsearch,dev,preprocessing,inference]
- name: Run tests - name: Run tests
run: | run: |
@ -608,7 +608,7 @@ jobs:
python-version: ${{ env.PYTHON_VERSION }} python-version: ${{ env.PYTHON_VERSION }}
- name: Install Haystack - name: Install Haystack
run: pip install .[dev,preprocessing] run: pip install .[dev,preprocessing,inference]
- name: Run tests - name: Run tests
run: | run: |
@ -662,7 +662,7 @@ jobs:
python-version: ${{ env.PYTHON_VERSION }} python-version: ${{ env.PYTHON_VERSION }}
- name: Install Haystack - name: Install Haystack
run: pip install .[dev,preprocessing] run: pip install .[dev,preprocessing,inference]
- name: Run tests - name: Run tests
run: | run: |
@ -716,7 +716,7 @@ jobs:
python-version: ${{ env.PYTHON_VERSION }} python-version: ${{ env.PYTHON_VERSION }}
- name: Install Haystack - name: Install Haystack
run: pip install .[dev,preprocessing] run: pip install .[dev,preprocessing,inference]
- name: Run tests - name: Run tests
run: | run: |

View File

@ -76,13 +76,18 @@ This command installs everything needed for basic Pipelines that use an in-memor
**Full Installation** **Full Installation**
To use more advanced features, like certain DocumentStores, FileConverters, OCR, or Ray, To use more advanced features, like certain DocumentStores, FileConverters, OCR, local inference with pytorch, or Ray,
you need to install further dependencies. The following command installs the [latest release](https://github.com/deepset-ai/haystack/releases) of Haystack and all its dependencies: you need to install further dependencies. The following command installs the [latest release](https://github.com/deepset-ai/haystack/releases) of Haystack and all its dependencies:
```sh ```sh
pip install 'farm-haystack[all]' ## or 'all-gpu' for the GPU-enabled dependencies pip install 'farm-haystack[all]' ## or 'all-gpu' for the GPU-enabled dependencies
``` ```
If you want to install only the dependencies needed for model inference on your local hardware (not remote API endpoints), such as torch and sentence-transformers, you can use the following command:
```sh
pip install 'farm-haystack[inference]' ## installs torch, sentence-transformers, sentencepiece, and huggingface-hub
```
If you want to try out the newest features that are not in an official release yet, you can install the unstable version from the main branch with the following command: If you want to try out the newest features that are not in an official release yet, you can install the unstable version from the main branch with the following command:
```sh ```sh

View File

@ -25,7 +25,7 @@ from haystack.lazy_imports import LazyImport
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
with LazyImport() as torch_import: with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_import:
import torch import torch
from haystack.modeling.utils import initialize_device_settings # pylint: disable=ungrouped-imports from haystack.modeling.utils import initialize_device_settings # pylint: disable=ungrouped-imports
@ -92,6 +92,7 @@ class InMemoryDocumentStore(KeywordDocumentStore):
You can learn more about these parameters by visiting https://github.com/dorianbrown/rank_bm25 You can learn more about these parameters by visiting https://github.com/dorianbrown/rank_bm25
By default, no parameters are set. By default, no parameters are set.
""" """
torch_import.check()
if bm25_parameters is None: if bm25_parameters is None:
bm25_parameters = {} bm25_parameters = {}
super().__init__() super().__init__()

View File

@ -8,7 +8,7 @@ from typing import Any, Dict, Optional
from haystack import __version__ from haystack import __version__
from haystack.lazy_imports import LazyImport from haystack.lazy_imports import LazyImport
with LazyImport() as torch_import: with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_import:
import torch import torch
with LazyImport() as transformers_import: with LazyImport() as transformers_import:

View File

@ -32,7 +32,7 @@ logger = logging.getLogger(__name__)
SPECIAL_TOKENIZER_CHARS = r"^(##|Ġ|▁)" SPECIAL_TOKENIZER_CHARS = r"^(##|Ġ|▁)"
with LazyImport(message="Run 'pip install transformers[torch,sentencepiece]'.") as transformers_import: with LazyImport(message="Run 'pip install farm-haystack[inference]'") as transformers_import:
import transformers import transformers
from transformers import PreTrainedTokenizer, RobertaTokenizer, AutoConfig, AutoFeatureExtractor, AutoTokenizer from transformers import PreTrainedTokenizer, RobertaTokenizer, AutoConfig, AutoFeatureExtractor, AutoTokenizer

View File

@ -13,7 +13,7 @@ from haystack.lazy_imports import LazyImport
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
with LazyImport() as torch_and_transformers_import: with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_and_transformers_import:
import torch import torch
from transformers import ( from transformers import (
RagTokenizer, RagTokenizer,

View File

@ -11,7 +11,7 @@ from haystack.utils.import_utils import is_whisper_available
from haystack.lazy_imports import LazyImport from haystack.lazy_imports import LazyImport
with LazyImport() as torch_import: with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_import:
import torch import torch

View File

@ -12,7 +12,7 @@ from haystack.lazy_imports import LazyImport
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
with LazyImport() as torch_and_transformers_import: with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_and_transformers_import:
import torch import torch
from transformers import pipeline from transformers import pipeline
from haystack.modeling.utils import initialize_device_settings # pylint: disable=ungrouped-imports from haystack.modeling.utils import initialize_device_settings # pylint: disable=ungrouped-imports

View File

@ -8,7 +8,7 @@ from haystack.schema import Document
from haystack.nodes.document_classifier.base import BaseDocumentClassifier from haystack.nodes.document_classifier.base import BaseDocumentClassifier
from haystack.lazy_imports import LazyImport from haystack.lazy_imports import LazyImport
with LazyImport() as torch_and_transformers_import: with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_and_transformers_import:
import torch import torch
from transformers import pipeline from transformers import pipeline
from haystack.modeling.utils import initialize_device_settings # pylint: disable=ungrouped-imports from haystack.modeling.utils import initialize_device_settings # pylint: disable=ungrouped-imports

View File

@ -33,7 +33,7 @@ from haystack.lazy_imports import LazyImport
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
with LazyImport() as torch_and_transformers_import: with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_and_transformers_import:
import torch import torch
from torch.utils.data import Dataset, DataLoader from torch.utils.data import Dataset, DataLoader
from transformers import AutoTokenizer, AutoModelForTokenClassification from transformers import AutoTokenizer, AutoModelForTokenClassification

View File

@ -13,7 +13,7 @@ from haystack.lazy_imports import LazyImport
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
with LazyImport() as torch_and_transformers_import: with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_and_transformers_import:
import torch import torch
from transformers import pipeline from transformers import pipeline
from haystack.modeling.utils import initialize_device_settings # pylint: disable=ungrouped-imports from haystack.modeling.utils import initialize_device_settings # pylint: disable=ungrouped-imports

View File

@ -13,7 +13,7 @@ from haystack.lazy_imports import LazyImport
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
with LazyImport() as torch_and_transformers_import: with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_and_transformers_import:
import torch import torch
from sentence_transformers import CrossEncoder from sentence_transformers import CrossEncoder
from haystack.modeling.utils import initialize_device_settings # pylint: disable=ungrouped-imports from haystack.modeling.utils import initialize_device_settings # pylint: disable=ungrouped-imports

View File

@ -10,7 +10,7 @@ from haystack.lazy_imports import LazyImport
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
with LazyImport() as torch_and_transformers_import: with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_and_transformers_import:
import torch import torch
from transformers import ( from transformers import (
pipeline, pipeline,

View File

@ -7,7 +7,7 @@ from haystack.nodes.prompt.invocation_layer import PromptModelInvocationLayer
from haystack.schema import Document, MultiLabel from haystack.schema import Document, MultiLabel
from haystack.lazy_imports import LazyImport from haystack.lazy_imports import LazyImport
with LazyImport() as torch_import: with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_import:
import torch import torch

View File

@ -10,7 +10,7 @@ from haystack.nodes.prompt.prompt_model import PromptModel
from haystack.nodes.prompt.prompt_template import PromptTemplate from haystack.nodes.prompt.prompt_template import PromptTemplate
from haystack.lazy_imports import LazyImport from haystack.lazy_imports import LazyImport
with LazyImport() as torch_import: with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_import:
import torch import torch

View File

@ -11,7 +11,7 @@ from haystack.lazy_imports import LazyImport
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
with LazyImport() as torch_and_transformers_import: with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_and_transformers_import:
import torch import torch
from transformers import pipeline from transformers import pipeline
from haystack.modeling.utils import initialize_device_settings # pylint: disable=ungrouped-imports from haystack.modeling.utils import initialize_device_settings # pylint: disable=ungrouped-imports

View File

@ -14,7 +14,7 @@ from haystack.lazy_imports import LazyImport
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
with LazyImport() as torch_and_transformers_import: with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_and_transformers_import:
import torch import torch
from transformers import AutoModelForSeq2SeqLM from transformers import AutoModelForSeq2SeqLM
from transformers import AutoTokenizer from transformers import AutoTokenizer
@ -93,6 +93,7 @@ class QuestionGenerator(BaseComponent):
parameter is not used and a single CPU device is used for inference. parameter is not used and a single CPU device is used for inference.
""" """
torch_and_transformers_import.check()
super().__init__() super().__init__()
self.devices, _ = initialize_device_settings(devices=devices, use_cuda=use_gpu, multi_gpu=False) self.devices, _ = initialize_device_settings(devices=devices, use_cuda=use_gpu, multi_gpu=False)
if len(self.devices) > 1: if len(self.devices) > 1:

View File

@ -13,7 +13,7 @@ from haystack.lazy_imports import LazyImport
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
with LazyImport() as torch_and_transformers_import: with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_and_transformers_import:
import torch import torch
from torch.nn import DataParallel from torch.nn import DataParallel
from transformers import AutoModelForSequenceClassification, AutoTokenizer from transformers import AutoModelForSequenceClassification, AutoTokenizer

View File

@ -24,7 +24,7 @@ from haystack.lazy_imports import LazyImport
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
with LazyImport() as torch_and_transformers_import: with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_and_transformers_import:
import torch import torch
from haystack.modeling.data_handler.data_silo import DataSilo, DistillationDataSilo from haystack.modeling.data_handler.data_silo import DataSilo, DistillationDataSilo
from haystack.modeling.data_handler.processor import SquadProcessor, Processor from haystack.modeling.data_handler.processor import SquadProcessor, Processor

View File

@ -18,7 +18,7 @@ logger = logging.getLogger(__name__)
TableQuestionAnsweringPipeline = object TableQuestionAnsweringPipeline = object
TapasPreTrainedModel = object TapasPreTrainedModel = object
with LazyImport() as torch_and_transformers_import: with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_and_transformers_import:
import torch import torch
from transformers import ( # type: ignore from transformers import ( # type: ignore
TapasTokenizer, TapasTokenizer,

View File

@ -12,7 +12,7 @@ from haystack.lazy_imports import LazyImport
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
with LazyImport() as torch_and_transformers_import: with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_and_transformers_import:
import torch import torch
from transformers import pipeline from transformers import pipeline
from transformers.data.processors.squad import SquadExample from transformers.data.processors.squad import SquadExample

View File

@ -31,7 +31,7 @@ if TYPE_CHECKING:
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
with LazyImport() as torch_and_transformers_import: with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_and_transformers_import:
import torch import torch
from sentence_transformers import InputExample, SentenceTransformer from sentence_transformers import InputExample, SentenceTransformer
from torch.utils.data import DataLoader from torch.utils.data import DataLoader

View File

@ -26,7 +26,7 @@ from haystack.lazy_imports import LazyImport
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
with LazyImport() as torch_and_transformers_import: with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_and_transformers_import:
import torch import torch
from torch.nn import DataParallel from torch.nn import DataParallel
from torch.utils.data.sampler import SequentialSampler from torch.utils.data.sampler import SequentialSampler

View File

@ -15,7 +15,7 @@ from haystack.lazy_imports import LazyImport
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
with LazyImport() as torch_and_transformers_import: with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_and_transformers_import:
import torch import torch
from haystack.utils.torch_utils import get_devices # pylint: disable=ungrouped-imports from haystack.utils.torch_utils import get_devices # pylint: disable=ungrouped-imports
from haystack.modeling.model.multimodal import get_model # pylint: disable=ungrouped-imports from haystack.modeling.model.multimodal import get_model # pylint: disable=ungrouped-imports

View File

@ -15,7 +15,7 @@ from haystack.lazy_imports import LazyImport
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
with LazyImport() as torch_and_transformers_import: with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_and_transformers_import:
import torch import torch

View File

@ -11,7 +11,7 @@ from haystack.lazy_imports import LazyImport
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
with LazyImport() as torch_and_transformers_import: with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_and_transformers_import:
import torch import torch
from sentence_transformers import CrossEncoder from sentence_transformers import CrossEncoder
from haystack.modeling.utils import initialize_device_settings # pylint: disable=ungrouped-imports from haystack.modeling.utils import initialize_device_settings # pylint: disable=ungrouped-imports

View File

@ -13,7 +13,7 @@ from haystack.lazy_imports import LazyImport
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
with LazyImport() as torch_and_transformers_import: with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_and_transformers_import:
import torch import torch
from transformers import pipeline from transformers import pipeline
from haystack.modeling.utils import initialize_device_settings # pylint: disable=ungrouped-imports from haystack.modeling.utils import initialize_device_settings # pylint: disable=ungrouped-imports

View File

@ -13,7 +13,7 @@ from haystack.lazy_imports import LazyImport
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
with LazyImport() as torch_and_transformers_import: with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_and_transformers_import:
import torch import torch
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
from haystack.modeling.utils import initialize_device_settings # pylint: disable=ungrouped-imports from haystack.modeling.utils import initialize_device_settings # pylint: disable=ungrouped-imports

View File

@ -48,7 +48,7 @@ classifiers = [
dependencies = [ dependencies = [
"requests", "requests",
"pydantic", "pydantic",
"transformers[torch,sentencepiece]==4.30.1", "transformers==4.30.1",
"pandas", "pandas",
"rank_bm25", "rank_bm25",
"scikit-learn>=1.0.0", # TF-IDF, SklearnQueryClassifier and metrics "scikit-learn>=1.0.0", # TF-IDF, SklearnQueryClassifier and metrics
@ -62,7 +62,6 @@ dependencies = [
"quantulum3", # quantities extraction from text "quantulum3", # quantities extraction from text
"posthog", # telemetry "posthog", # telemetry
# audio's espnet-model-zoo requires huggingface-hub version <0.8 while we need >=0.5 to be able to use create_repo in FARMReader # audio's espnet-model-zoo requires huggingface-hub version <0.8 while we need >=0.5 to be able to use create_repo in FARMReader
"huggingface-hub>=0.5.0",
"tenacity", # retry decorator "tenacity", # retry decorator
"sseclient-py", # server side events for OpenAI streaming "sseclient-py", # server side events for OpenAI streaming
"more_itertools", # utilities "more_itertools", # utilities
@ -70,8 +69,8 @@ dependencies = [
# Web Retriever # Web Retriever
"boilerpy3", "boilerpy3",
# See haystack/nodes/retriever/_embedding_encoder.py, _SentenceTransformersEmbeddingEncoder # Multimodal Embedder haystack/nodes/retriever/multimodal/embedder.py
"sentence-transformers>=2.2.0", "Pillow",
# OpenAI tokenizer # OpenAI tokenizer
"tiktoken>=0.3.2", "tiktoken>=0.3.2",
@ -89,6 +88,11 @@ dependencies = [
] ]
[project.optional-dependencies] [project.optional-dependencies]
inference = [
"transformers[torch,sentencepiece]==4.30.1",
"sentence-transformers>=2.2.0", # See haystack/nodes/retriever/_embedding_encoder.py, _SentenceTransformersEmbeddingEncoder
"huggingface-hub>=0.5.0",
]
elasticsearch = [ elasticsearch = [
"elasticsearch>=7.17,<8", "elasticsearch>=7.17,<8",
] ]
@ -212,11 +216,11 @@ formatting = [
] ]
all = [ all = [
"farm-haystack[docstores,audio,crawler,preprocessing,file-conversion,pdf,ocr,ray,onnx,beir,metrics]", "farm-haystack[inference,docstores,audio,crawler,preprocessing,file-conversion,pdf,ocr,ray,onnx,beir,metrics]",
] ]
all-gpu = [ all-gpu = [
# beir is incompatible with faiss-gpu: https://github.com/beir-cellar/beir/issues/71 # beir is incompatible with faiss-gpu: https://github.com/beir-cellar/beir/issues/71
"farm-haystack[docstores-gpu,audio,crawler,preprocessing,file-conversion,pdf,ocr,ray,onnx-gpu,metrics]", "farm-haystack[inference,docstores-gpu,audio,crawler,preprocessing,file-conversion,pdf,ocr,ray,onnx-gpu,metrics]",
] ]
[project.scripts] [project.scripts]