diff --git a/docs/_src/api/api/ranker.md b/docs/_src/api/api/ranker.md index 6e7db33cd..e9de4491b 100644 --- a/docs/_src/api/api/ranker.md +++ b/docs/_src/api/api/ranker.md @@ -92,7 +92,7 @@ p.add_node(component=ranker, name="Ranker", inputs=["ESRetriever"]) #### \_\_init\_\_ ```python -def __init__(model_name_or_path: Union[str, Path], model_version: Optional[str] = None, top_k: int = 10, use_gpu: bool = True, devices: Optional[List[Union[int, str, torch.device]]] = None) +def __init__(model_name_or_path: Union[str, Path], model_version: Optional[str] = None, top_k: int = 10, use_gpu: bool = True, devices: Optional[List[Union[str, torch.device]]] = None) ``` **Arguments**: @@ -103,7 +103,10 @@ See https://huggingface.co/cross-encoder for full list of available models - `model_version`: The version of model to use from the HuggingFace model hub. Can be tag name, branch name, or commit hash. - `top_k`: The maximum number of documents to return - `use_gpu`: Whether to use all available GPUs or the CPU. Falls back on CPU if no GPU is available. -- `devices`: List of GPU devices to limit inference to certain GPUs and not use all available ones (e.g. ["cuda:0"]). +- `devices`: List of GPU (or CPU) devices, to limit inference to certain GPUs and not use all available ones +The strings will be converted into pytorch devices, so use the string notation described here: +https://pytorch.org/docs/stable/tensor_attributes.html?highlight=torch%20device#torch.torch.device +(e.g. ["cuda:0"]). diff --git a/docs/_src/api/api/reader.md b/docs/_src/api/api/reader.md index bdc70da8d..8e280d260 100644 --- a/docs/_src/api/api/reader.md +++ b/docs/_src/api/api/reader.md @@ -398,7 +398,7 @@ Dict containing query and answers #### eval\_on\_file ```python -def eval_on_file(data_dir: str, test_filename: str, device: Optional[str] = None) +def eval_on_file(data_dir: Union[Path, str], test_filename: str, device: Optional[Union[str, torch.device]] = None) ``` Performs evaluation on a SQuAD-formatted file. @@ -410,16 +410,18 @@ Returns a dict containing the following metrics: **Arguments**: -- `data_dir` (`Path or str`): The directory in which the test set can be found -- `test_filename` (`str`): The name of the file containing the test data in SQuAD format. -- `device` (`str`): The device on which the tensors should be processed. Choose from "cpu" and "cuda" or use the Reader's device by default. +- `data_dir`: The directory in which the test set can be found +- `test_filename`: The name of the file containing the test data in SQuAD format. +- `device`: The device on which the tensors should be processed. +Choose from torch.device("cpu") and torch.device("cuda") (or simply "cpu" or "cuda") +or use the Reader's device by default. #### eval ```python -def eval(document_store: BaseDocumentStore, device: Optional[str] = None, label_index: str = "label", doc_index: str = "eval_document", label_origin: str = "gold-label", calibrate_conf_scores: bool = False) +def eval(document_store: BaseDocumentStore, device: Optional[Union[str, torch.device]] = None, label_index: str = "label", doc_index: str = "eval_document", label_origin: str = "gold-label", calibrate_conf_scores: bool = False) ``` Performs evaluation on evaluation documents in the DocumentStore. @@ -432,7 +434,9 @@ Returns a dict containing the following metrics: **Arguments**: - `document_store`: DocumentStore containing the evaluation documents -- `device`: The device on which the tensors should be processed. Choose from "cpu" and "cuda" or use the Reader's device by default. +- `device`: The device on which the tensors should be processed. +Choose from torch.device("cpu") and torch.device("cuda") (or simply "cpu" or "cuda") +or use the Reader's device by default. - `label_index`: Index/Table name where labeled questions are stored - `doc_index`: Index/Table name where documents that are used for evaluation are stored - `label_origin`: Field name where the gold labels are stored @@ -443,7 +447,7 @@ Returns a dict containing the following metrics: #### calibrate\_confidence\_scores ```python -def calibrate_confidence_scores(document_store: BaseDocumentStore, device: Optional[str] = None, label_index: str = "label", doc_index: str = "eval_document", label_origin: str = "gold_label") +def calibrate_confidence_scores(document_store: BaseDocumentStore, device: Optional[Union[str, torch.device]] = None, label_index: str = "label", doc_index: str = "eval_document", label_origin: str = "gold_label") ``` Calibrates confidence scores on evaluation documents in the DocumentStore. @@ -451,7 +455,9 @@ Calibrates confidence scores on evaluation documents in the DocumentStore. **Arguments**: - `document_store`: DocumentStore containing the evaluation documents -- `device`: The device on which the tensors should be processed. Choose from "cpu" and "cuda" or use the Reader's device by default. +- `device`: The device on which the tensors should be processed. +Choose from torch.device("cpu") and torch.device("cuda") (or simply "cpu" or "cuda") +or use the Reader's device by default. - `label_index`: Index/Table name where labeled questions are stored - `doc_index`: Index/Table name where documents that are used for evaluation are stored - `label_origin`: Field name where the gold labels are stored diff --git a/docs/_src/api/api/retriever.md b/docs/_src/api/api/retriever.md index 0a3e638b0..90e7db44f 100644 --- a/docs/_src/api/api/retriever.md +++ b/docs/_src/api/api/retriever.md @@ -312,7 +312,7 @@ Karpukhin, Vladimir, et al. (2020): "Dense Passage Retrieval for Open-Domain Que #### \_\_init\_\_ ```python -def __init__(document_store: BaseDocumentStore, query_embedding_model: Union[Path, str] = "facebook/dpr-question_encoder-single-nq-base", passage_embedding_model: Union[Path, str] = "facebook/dpr-ctx_encoder-single-nq-base", model_version: Optional[str] = None, max_seq_len_query: int = 64, max_seq_len_passage: int = 256, top_k: int = 10, use_gpu: bool = True, batch_size: int = 16, embed_title: bool = True, use_fast_tokenizers: bool = True, infer_tokenizer_classes: bool = False, similarity_function: str = "dot_product", global_loss_buffer_size: int = 150000, progress_bar: bool = True, devices: Optional[List[Union[int, str, torch.device]]] = None, use_auth_token: Optional[Union[str, bool]] = None) +def __init__(document_store: BaseDocumentStore, query_embedding_model: Union[Path, str] = "facebook/dpr-question_encoder-single-nq-base", passage_embedding_model: Union[Path, str] = "facebook/dpr-ctx_encoder-single-nq-base", model_version: Optional[str] = None, max_seq_len_query: int = 64, max_seq_len_passage: int = 256, top_k: int = 10, use_gpu: bool = True, batch_size: int = 16, embed_title: bool = True, use_fast_tokenizers: bool = True, infer_tokenizer_classes: bool = False, similarity_function: str = "dot_product", global_loss_buffer_size: int = 150000, progress_bar: bool = True, devices: Optional[List[Union[str, torch.device]]] = None, use_auth_token: Optional[Union[str, bool]] = None) ``` Init the Retriever incl. the two encoder models from a local or remote model checkpoint. @@ -362,8 +362,11 @@ Options: `dot_product` (Default) or `cosine` Increase if errors like "encoded data exceeds max_size ..." come up - `progress_bar`: Whether to show a tqdm progress bar or not. Can be helpful to disable in production deployments to keep the logs clean. -- `devices`: List of GPU devices to limit inference to certain GPUs and not use all available ones (e.g. ["cuda:0"]). -As multi-GPU training is currently not implemented for DPR, training will only use the first device provided in this list. +- `devices`: List of GPU (or CPU) devices, to limit inference to certain GPUs and not use all available ones +These strings will be converted into pytorch devices, so use the string notation described here: +https://pytorch.org/docs/stable/tensor_attributes.html?highlight=torch%20device#torch.torch.device +(e.g. ["cuda:0"]). Note: as multi-GPU training is currently not implemented for DPR, training +will only use the first device provided in this list. - `use_auth_token`: API token used to download private models from Huggingface. If this parameter is set to `True`, the local token will be used, which must be previously created via `transformer-cli login`. Additional information can be found here https://huggingface.co/transformers/main_classes/model.html#transformers.PreTrainedModel.from_pretrained @@ -520,7 +523,7 @@ Kostić, Bogdan, et al. (2021): "Multi-modal Retrieval of Tables and Texts Using #### \_\_init\_\_ ```python -def __init__(document_store: BaseDocumentStore, query_embedding_model: Union[Path, str] = "deepset/bert-small-mm_retrieval-question_encoder", passage_embedding_model: Union[Path, str] = "deepset/bert-small-mm_retrieval-passage_encoder", table_embedding_model: Union[Path, str] = "deepset/bert-small-mm_retrieval-table_encoder", model_version: Optional[str] = None, max_seq_len_query: int = 64, max_seq_len_passage: int = 256, max_seq_len_table: int = 256, top_k: int = 10, use_gpu: bool = True, batch_size: int = 16, embed_meta_fields: List[str] = ["name", "section_title", "caption"], use_fast_tokenizers: bool = True, infer_tokenizer_classes: bool = False, similarity_function: str = "dot_product", global_loss_buffer_size: int = 150000, progress_bar: bool = True, devices: Optional[List[Union[int, str, torch.device]]] = None, use_auth_token: Optional[Union[str, bool]] = None) +def __init__(document_store: BaseDocumentStore, query_embedding_model: Union[Path, str] = "deepset/bert-small-mm_retrieval-question_encoder", passage_embedding_model: Union[Path, str] = "deepset/bert-small-mm_retrieval-passage_encoder", table_embedding_model: Union[Path, str] = "deepset/bert-small-mm_retrieval-table_encoder", model_version: Optional[str] = None, max_seq_len_query: int = 64, max_seq_len_passage: int = 256, max_seq_len_table: int = 256, top_k: int = 10, use_gpu: bool = True, batch_size: int = 16, embed_meta_fields: List[str] = ["name", "section_title", "caption"], use_fast_tokenizers: bool = True, infer_tokenizer_classes: bool = False, similarity_function: str = "dot_product", global_loss_buffer_size: int = 150000, progress_bar: bool = True, devices: Optional[List[Union[str, torch.device]]] = None, use_auth_token: Optional[Union[str, bool]] = None) ``` Init the Retriever incl. the two encoder models from a local or remote model checkpoint. @@ -556,8 +559,11 @@ Options: `dot_product` (Default) or `cosine` Increase if errors like "encoded data exceeds max_size ..." come up - `progress_bar`: Whether to show a tqdm progress bar or not. Can be helpful to disable in production deployments to keep the logs clean. -- `devices`: List of GPU devices to limit inference to certain GPUs and not use all available ones (e.g. ["cuda:0"]). -As multi-GPU training is currently not implemented for DPR, training will only use the first device provided in this list. +- `devices`: List of GPU (or CPU) devices, to limit inference to certain GPUs and not use all available ones +These strings will be converted into pytorch devices, so use the string notation described here: +https://pytorch.org/docs/stable/tensor_attributes.html?highlight=torch%20device#torch.torch.device +(e.g. ["cuda:0"]). Note: as multi-GPU training is currently not implemented for TableTextRetriever, +training will only use the first device provided in this list. - `use_auth_token`: API token used to download private models from Huggingface. If this parameter is set to `True`, the local token will be used, which must be previously created via `transformer-cli login`. Additional information can be found here https://huggingface.co/transformers/main_classes/model.html#transformers.PreTrainedModel.from_pretrained @@ -695,7 +701,7 @@ class EmbeddingRetriever(BaseRetriever) #### \_\_init\_\_ ```python -def __init__(document_store: BaseDocumentStore, embedding_model: str, model_version: Optional[str] = None, use_gpu: bool = True, batch_size: int = 32, max_seq_len: int = 512, model_format: str = "farm", pooling_strategy: str = "reduce_mean", emb_extraction_layer: int = -1, top_k: int = 10, progress_bar: bool = True, devices: Optional[List[Union[int, str, torch.device]]] = None, use_auth_token: Optional[Union[str, bool]] = None) +def __init__(document_store: BaseDocumentStore, embedding_model: str, model_version: Optional[str] = None, use_gpu: bool = True, batch_size: int = 32, max_seq_len: int = 512, model_format: str = "farm", pooling_strategy: str = "reduce_mean", emb_extraction_layer: int = -1, top_k: int = 10, progress_bar: bool = True, devices: Optional[List[Union[str, torch.device]]] = None, use_auth_token: Optional[Union[str, bool]] = None) ``` **Arguments**: @@ -721,8 +727,11 @@ Options: Default: -1 (very last layer). - `top_k`: How many documents to return per query. - `progress_bar`: If true displays progress bar during embedding. -- `devices`: List of GPU devices to limit inference to certain GPUs and not use all available ones (e.g. ["cuda:0"]). -As multi-GPU training is currently not implemented for DPR, training will only use the first device provided in this list. +- `devices`: List of GPU (or CPU) devices, to limit inference to certain GPUs and not use all available ones +These strings will be converted into pytorch devices, so use the string notation described here: +https://pytorch.org/docs/stable/tensor_attributes.html?highlight=torch%20device#torch.torch.device +(e.g. ["cuda:0"]). Note: As multi-GPU training is currently not implemented for EmbeddingRetriever, +training will only use the first device provided in this list. - `use_auth_token`: API token used to download private models from Huggingface. If this parameter is set to `True`, the local token will be used, which must be previously created via `transformer-cli login`. Additional information can be found here https://huggingface.co/transformers/main_classes/model.html#transformers.PreTrainedModel.from_pretrained diff --git a/haystack/modeling/conversion/transformers.py b/haystack/modeling/conversion/transformers.py index 1aa7d4645..80a7b71c6 100644 --- a/haystack/modeling/conversion/transformers.py +++ b/haystack/modeling/conversion/transformers.py @@ -1,11 +1,13 @@ import logging from typing import Union +import torch from transformers import AutoModelForQuestionAnswering from haystack.modeling.model import adaptive_model as am from haystack.modeling.model.language_model import LanguageModel from haystack.modeling.model.prediction_head import QuestionAnsweringHead +from haystack.modeling.data_handler.processor import Processor logger = logging.getLogger(__name__) @@ -46,10 +48,10 @@ class Converter: @staticmethod def convert_from_transformers( model_name_or_path, - device, - revision=None, - task_type=None, - processor=None, + device: Union[str, torch.device], + revision: str = None, + task_type: str = "question_answering", + processor: Processor = None, use_auth_token: Union[bool, str] = None, **kwargs, ): @@ -65,14 +67,10 @@ class Converter: - deepset/bert-large-uncased-whole-word-masking-squad2 See https://huggingface.co/models for full list - :param device: "cpu" or "cuda" + :param device: torch.device("cpu") or torch.device("cuda") :param revision: The version of model to use from the HuggingFace model hub. Can be tag name, branch name, or commit hash. - :type revision: str - :param task_type: One of : - - 'question_answering' - More tasks coming soon ... - :param processor: populates prediction head with information coming from tasks - :type processor: Processor + Right now accepts only 'question_answering'. + :param processor: populates prediction head with information coming from tasks. :return: AdaptiveModel """ diff --git a/haystack/modeling/data_handler/data_silo.py b/haystack/modeling/data_handler/data_silo.py index ae2a51c52..e6f8eff3d 100644 --- a/haystack/modeling/data_handler/data_silo.py +++ b/haystack/modeling/data_handler/data_silo.py @@ -785,7 +785,7 @@ class DistillationDataSilo(DataSilo): self, teacher_model: "FARMReader", teacher_batch_size: int, - device: str, + device: torch.device, processor: Processor, batch_size: int, eval_batch_size: Optional[int] = None, diff --git a/haystack/modeling/evaluation/eval.py b/haystack/modeling/evaluation/eval.py index 6d757940e..1b92f2a8f 100644 --- a/haystack/modeling/evaluation/eval.py +++ b/haystack/modeling/evaluation/eval.py @@ -20,11 +20,11 @@ class Evaluator: Handles evaluation of a given model over a specified dataset. """ - def __init__(self, data_loader: torch.utils.data.DataLoader, tasks, device: str, report: bool = True): + def __init__(self, data_loader: torch.utils.data.DataLoader, tasks, device: torch.device, report: bool = True): """ :param data_loader: The PyTorch DataLoader that will return batches of data from the evaluation dataset :param tesks: - :param device: The device on which the tensors should be processed. Choose from "cpu" and "cuda". + :param device: The device on which the tensors should be processed. Choose from torch.device("cpu") and torch.device("cuda"). :param report: Whether an eval report should be generated (e.g. classification report per class). """ self.data_loader = data_loader diff --git a/haystack/modeling/infer.py b/haystack/modeling/infer.py index 7fc2bf5ea..fcd304339 100644 --- a/haystack/modeling/infer.py +++ b/haystack/modeling/infer.py @@ -128,7 +128,7 @@ class Inferencer: use_fast: bool = True, tokenizer_args: Dict = None, multithreading_rust: bool = True, - devices: Optional[List[Union[int, str, torch.device]]] = None, + devices: Optional[List[torch.device]] = None, use_auth_token: Union[bool, str] = None, **kwargs, ): diff --git a/haystack/modeling/model/adaptive_model.py b/haystack/modeling/model/adaptive_model.py index d1f4a6cbe..1bd885ce6 100644 --- a/haystack/modeling/model/adaptive_model.py +++ b/haystack/modeling/model/adaptive_model.py @@ -169,7 +169,7 @@ class AdaptiveModel(nn.Module, BaseAdaptiveModel): prediction_heads: List[PredictionHead], embeds_dropout_prob: float, lm_output_types: Union[str, List[str]], - device: str, + device: torch.device, loss_aggregation_fn: Optional[Callable] = None, ): """ @@ -182,7 +182,7 @@ class AdaptiveModel(nn.Module, BaseAdaptiveModel): "per_sequence", a single embedding will be extracted to represent the full input sequence. Can either be a single string, or a list of strings, one for each prediction head. - :param device: The device on which this model will operate. Either "cpu" or "cuda". + :param device: The device on which this model will operate. Either torch.device("cpu") or torch.device("cuda"). :param loss_aggregation_fn: Function to aggregate the loss of multiple prediction heads. Input: loss_per_head (list of tensors), global_step (int), batch (dict) Output: aggregated loss (tensor) @@ -258,13 +258,13 @@ class AdaptiveModel(nn.Module, BaseAdaptiveModel): # Need to save config and pipeline @classmethod - def load( # type: ignore + def load( # type: ignore cls, - load_dir: Union[str, Path], # type: ignore - device: str, # type: ignore - strict: bool = True, # type: ignore - lm_name: Optional[str] = None, # type: ignore - processor: Optional[Processor] = None, # type: ignore + load_dir: Union[str, Path], + device: Union[str, torch.device], + strict: bool = True, + lm_name: Optional[str] = None, + processor: Optional[Processor] = None, ): """ Loads an AdaptiveModel from a directory. The directory must contain: @@ -277,12 +277,13 @@ class AdaptiveModel(nn.Module, BaseAdaptiveModel): * vocab.txt vocab file for language model, turning text to Wordpiece Tokens :param load_dir: Location where the AdaptiveModel is stored. - :param device: To which device we want to sent the model, either cpu or cuda. + :param device: To which device we want to sent the model, either torch.device("cpu") or torch.device("cuda"). :param lm_name: The name to assign to the loaded language model. :param strict: Whether to strictly enforce that the keys loaded from saved model match the ones in the PredictionHead (see torch.nn.module.load_state_dict()). :param processor: Processor to populate prediction head with information coming from tasks. """ + device = torch.device(device) # Language Model if lm_name: language_model = LanguageModel.load(load_dir, haystack_lm_name=lm_name) @@ -489,9 +490,9 @@ class AdaptiveModel(nn.Module, BaseAdaptiveModel): def convert_from_transformers( cls, model_name_or_path: Union[str, Path], - device: str, + device: torch.device, revision: Optional[str] = None, - task_type: Optional[str] = None, + task_type: str = "question_answering", processor: Optional[Processor] = None, use_auth_token: Optional[Union[bool, str]] = None, **kwargs, @@ -509,12 +510,9 @@ class AdaptiveModel(nn.Module, BaseAdaptiveModel): See https://huggingface.co/models for full list :param revision: The version of model to use from the HuggingFace model hub. Can be tag name, branch name, or commit hash. - :param device: "cpu" or "cuda" - :param task_type: One of : - - 'question_answering' - More tasks coming soon ... + :param device: On which hardware the conversion should take place. Choose from torch.device("cpu") or torch.device("cuda") + :param task_type: 'question_answering'. More tasks coming soon ... :param processor: Processor to populate prediction head with information coming from tasks. - :type processor: Processor :return: AdaptiveModel """ return conv.Converter.convert_from_transformers( @@ -570,7 +568,7 @@ class AdaptiveModel(nn.Module, BaseAdaptiveModel): tokenizer_name_or_path=model_name, task_type=task_type, max_seq_len=256, doc_stride=128, use_fast=True ) processor.save(output_path) - model = AdaptiveModel.convert_from_transformers(model_name, device="cpu", task_type=task_type) + model = AdaptiveModel.convert_from_transformers(model_name, device=torch.device("cpu"), task_type=task_type) model.save(output_path) os.remove(output_path / "language_model.bin") # remove the actual PyTorch model(only configs are required) @@ -617,14 +615,14 @@ class ONNXAdaptiveModel(BaseAdaptiveModel): language_model_class: str, language: str, prediction_heads: List[PredictionHead], - device: str, + device: torch.device, ): """ :param onnx_session: ? # TODO :param language_model_class: Class of LanguageModel :param langauge: Language the model is trained for. :param prediction_heads: A list of models that take embeddings and return logits for a given task. - :param device: The device on which this model will operate. Either "cpu" or "cuda". + :param device: The device on which this model will operate. Either torch.device("cpu") or torch.device("cuda"). """ import onnxruntime @@ -642,13 +640,14 @@ class ONNXAdaptiveModel(BaseAdaptiveModel): self.device = device @classmethod - def load(cls, load_dir: Union[str, Path], device: str, **kwargs): # type: ignore + def load(cls, load_dir: Union[str, Path], device: Union[str, torch.device], **kwargs): # type: ignore """ Loads an ONNXAdaptiveModel from a directory. :param load_dir: Location where the ONNXAdaptiveModel is stored. - :param device: The device on which this model will operate. Either "cpu" or "cuda". + :param device: The device on which this model will operate. Either torch.device("cpu") or torch.device("cuda"). """ + device = torch.device(device) load_dir = Path(load_dir) import onnxruntime @@ -657,7 +656,11 @@ class ONNXAdaptiveModel(BaseAdaptiveModel): sess_options.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_ENABLE_EXTENDED # Use OpenMP optimizations. Only useful for CPU, has little impact for GPUs. sess_options.intra_op_num_threads = multiprocessing.cpu_count() - onnx_session = onnxruntime.InferenceSession(str(load_dir / "model.onnx"), sess_options) + + providers = kwargs.get( + "providers", ["CPUExecutionProvider"] if device.type == "cpu" else ["CUDAExecutionProvider"] + ) + onnx_session = onnxruntime.InferenceSession(str(load_dir / "model.onnx"), sess_options, providers=providers) # Prediction heads _, ph_config_files = cls._get_prediction_head_files(load_dir, strict=False) diff --git a/haystack/modeling/model/biadaptive_model.py b/haystack/modeling/model/biadaptive_model.py index 35b528bbb..fb8a2f2bf 100644 --- a/haystack/modeling/model/biadaptive_model.py +++ b/haystack/modeling/model/biadaptive_model.py @@ -36,7 +36,7 @@ class BiAdaptiveModel(nn.Module): language_model2: LanguageModel, prediction_heads: List[PredictionHead], embeds_dropout_prob: float = 0.1, - device: str = "cuda", + device: torch.device = torch.device("cuda"), lm1_output_types: Union[str, List[str]] = ["per_sequence"], lm2_output_types: Union[str, List[str]] = ["per_sequence"], loss_aggregation_fn: Optional[Callable] = None, @@ -57,7 +57,7 @@ class BiAdaptiveModel(nn.Module): "per_sequence", a single embedding will be extracted to represent the full input sequence. Can either be a single string, or a list of strings, one for each prediction head. - :param device: The device on which this model will operate. Either "cpu" or "cuda". + :param device: The device on which this model will operate. Either torch.device("cpu") or torch.device("cuda"). :param loss_aggregation_fn: Function to aggregate the loss of multiple prediction heads. Input: loss_per_head (list of tensors), global_step (int), batch (dict) Output: aggregated loss (tensor) @@ -108,7 +108,7 @@ class BiAdaptiveModel(nn.Module): def load( cls, load_dir: Path, - device: str, + device: torch.device, strict: bool = False, lm1_name: str = "lm1", lm2_name: str = "lm2", @@ -130,7 +130,7 @@ class BiAdaptiveModel(nn.Module): * special_tokens_map.json :param load_dir: Location where adaptive model is stored. - :param device: To which device we want to sent the model, either cpu or cuda. + :param device: To which device we want to sent the model, either torch.device("cpu") or torch.device("cuda"). :param lm1_name: The name to assign to the first loaded language model (for encoding queries). :param lm2_name: The name to assign to the second loaded language model (for encoding context/passages). :param strict: Whether to strictly enforce that the keys loaded from saved model match the ones in @@ -432,8 +432,8 @@ class BiAdaptiveModel(nn.Module): cls, model_name_or_path1: Union[str, Path], model_name_or_path2: Union[str, Path], - device: str, - task_type: str, + device: torch.device, + task_type: str = "text_similarity", processor: Optional[Processor] = None, similarity_function: str = "dot_product", ): @@ -451,9 +451,8 @@ class BiAdaptiveModel(nn.Module): Exemplary public names: - facebook/dpr-ctx_encoder-single-nq-base - deepset/bert-large-uncased-whole-word-masking-squad2 - :param device: "cpu" or "cuda" - :param task_type: 'text_similarity' - More tasks coming soon ... + :param device: On which hardware the conversion is going to run on. Either torch.device("cpu") or torch.device("cuda") + :param task_type: 'text_similarity' More tasks coming soon ... :param processor: populates prediction head with information coming from tasks :type processor: Processor :return: AdaptiveModel diff --git a/haystack/modeling/model/optimization.py b/haystack/modeling/model/optimization.py index d7ca41b8b..37ec1160c 100644 --- a/haystack/modeling/model/optimization.py +++ b/haystack/modeling/model/optimization.py @@ -1,5 +1,5 @@ # TODO analyse if this optimization is needed or whether we can use HF transformers code -from typing import Dict, Any +from typing import Dict, Any, Optional import inspect import logging @@ -73,7 +73,7 @@ def initialize_optimizer( model: AdaptiveModel, n_batches: int, n_epochs: int, - device, + device: torch.device, learning_rate: float, optimizer_opts: Dict[Any, Any] = None, schedule_opts: Dict[Any, Any] = None, @@ -90,7 +90,7 @@ def initialize_optimizer( :param model: model to optimize (e.g. trimming weights to fp16 / mixed precision) :param n_batches: number of batches for training :param n_epochs: number of epochs for training - :param device: + :param device: Which hardware will be used by the optimizer. Either torch.device("cpu") or torch.device("cuda"). :param learning_rate: Learning rate :param optimizer_opts: Dict to customize the optimizer. Choose any optimizer available from torch.optim, apex.optimizers or transformers.optimization by supplying the class name and the parameters for the constructor. @@ -295,14 +295,20 @@ def get_scheduler(optimizer, opts): return scheduler -def optimize_model(model, device, local_rank, optimizer=None, distributed=False, use_amp=None): +def optimize_model( + model: "AdaptiveModel", + device: torch.device, + local_rank: int, + optimizer=None, + distributed: Optional[bool] = False, + use_amp: Optional[str] = None, +): """ Wraps MultiGPU or distributed usage around a model No support for ONNX models :param model: model to optimize (e.g. trimming weights to fp16 / mixed precision) - :type model: AdaptiveModel - :param device: either gpu or cpu, get the device from initialize_device_settings() + :param device: either torch.device("cpu") or torch.device("cuda"). Get the device from `initialize_device_settings()` :param distributed: Whether training on distributed machines :param local_rank: rank of the machine in a distributed setting :param use_amp: Optimization level of nvidia's automatic mixed precision (AMP). The higher the level, the faster the model. diff --git a/haystack/modeling/model/triadaptive_model.py b/haystack/modeling/model/triadaptive_model.py index 789dbd0ed..0de639a02 100644 --- a/haystack/modeling/model/triadaptive_model.py +++ b/haystack/modeling/model/triadaptive_model.py @@ -43,7 +43,7 @@ class TriAdaptiveModel(nn.Module): language_model3: LanguageModel, prediction_heads: List[PredictionHead], embeds_dropout_prob: float = 0.1, - device: str = "cuda", + device: torch.device = torch.device("cuda"), lm1_output_types: Union[str, List[str]] = ["per_sequence"], lm2_output_types: Union[str, List[str]] = ["per_sequence"], lm3_output_types: Union[str, List[str]] = ["per_sequence"], @@ -71,7 +71,7 @@ class TriAdaptiveModel(nn.Module): "per_sequence", a single embedding will be extracted to represent the full input sequence. Can either be a single string, or a list of strings, one for each prediction head. - :param device: The device on which this model will operate. Either "cpu" or "cuda". + :param device: The device on which this model will operate. Either torch.device("cpu") or torch.device("cuda"). :param loss_aggregation_fn: Function to aggregate the loss of multiple prediction heads. Input: loss_per_head (list of tensors), global_step (int), batch (dict) Output: aggregated loss (tensor) @@ -129,7 +129,7 @@ class TriAdaptiveModel(nn.Module): def load( cls, load_dir: Path, - device: str, + device: torch.device, strict: bool = False, lm1_name: str = "lm1", lm2_name: str = "lm2", @@ -155,7 +155,7 @@ class TriAdaptiveModel(nn.Module): * special_tokens_map.json :param load_dir: Location where the TriAdaptiveModel is stored. - :param device: To which device we want to sent the model, either cpu or cuda. + :param device: To which device we want to sent the model, either torch.device("cpu") or torch.device("cuda"). :param lm1_name: The name to assign to the first loaded language model (for encoding queries). :param lm2_name: The name to assign to the second loaded language model (for encoding context/passages). :param lm3_name: The name to assign to the second loaded language model (for encoding tables). diff --git a/haystack/modeling/training/base.py b/haystack/modeling/training/base.py index e2a213ff4..6554fa96f 100644 --- a/haystack/modeling/training/base.py +++ b/haystack/modeling/training/base.py @@ -126,7 +126,7 @@ class Trainer: data_silo: DataSilo, epochs: int, n_gpu: int, - device, + device: torch.device, lr_schedule=None, evaluate_every: int = 100, eval_report: bool = True, @@ -152,7 +152,7 @@ class Trainer: :param data_silo: A DataSilo object that will contain the train, dev and test datasets as PyTorch DataLoaders :param epochs: How many times the training procedure will loop through the train dataset :param n_gpu: The number of gpus available for training and evaluation. - :param device: The device on which the train, dev and test tensors should be hosted. Choose from "cpu" and "cuda". + :param device: The device on which the train, dev and test tensors should be hosted. Choose from torch.device("cpu") and torch.device("cuda"). :param lr_schedule: An optional scheduler object that can regulate the learning rate of the optimizer :param evaluate_every: Perform dev set evaluation after this many steps of training. :param eval_report: If evaluate_every is not 0, specifies if an eval report should be generated when evaluating @@ -660,7 +660,7 @@ class DistillationTrainer(Trainer): data_silo: DistillationDataSilo, epochs: int, n_gpu: int, - device: str, + device: torch.device, lr_schedule: Optional["_LRScheduler"] = None, evaluate_every: int = 100, eval_report: bool = True, @@ -691,7 +691,7 @@ class DistillationTrainer(Trainer): :param data_silo: A DataSilo object that will contain the train, dev and test datasets as PyTorch DataLoaders :param epochs: How many times the training procedure will loop through the train dataset :param n_gpu: The number of gpus available for training and evaluation. - :param device: The device on which the train, dev and test tensors should be hosted. Choose from "cpu" and "cuda". + :param device: The device on which the train, dev and test tensors should be hosted. Choose from torch.device("cpu") and torch.device("cuda"). :param lr_schedule: An optional scheduler object that can regulate the learning rate of the optimizer :param evaluate_every: Perform dev set evaluation after this many steps of training. :param eval_report: If evaluate_every is not 0, specifies if an eval report should be generated when evaluating @@ -833,7 +833,7 @@ class TinyBERTDistillationTrainer(Trainer): :param data_silo: A DataSilo object that will contain the train, dev and test datasets as PyTorch DataLoaders :param epochs: How many times the training procedure will loop through the train dataset :param n_gpu: The number of gpus available for training and evaluation. - :param device: The device on which the train, dev and test tensors should be hosted. Choose from "cpu" and "cuda". + :param device: The device on which the train, dev and test tensors should be hosted. Choose from torch.device("cpu") and torch.device("cuda"). :param lr_schedule: An optional scheduler object that can regulate the learning rate of the optimizer :param evaluate_every: Perform dev set evaluation after this many steps of training. :param eval_report: If evaluate_every is not 0, specifies if an eval report should be generated when evaluating diff --git a/haystack/nodes/ranker/sentence_transformers.py b/haystack/nodes/ranker/sentence_transformers.py index 67defe5fe..e471f4dbb 100644 --- a/haystack/nodes/ranker/sentence_transformers.py +++ b/haystack/nodes/ranker/sentence_transformers.py @@ -41,7 +41,7 @@ class SentenceTransformersRanker(BaseRanker): model_version: Optional[str] = None, top_k: int = 10, use_gpu: bool = True, - devices: Optional[List[Union[int, str, torch.device]]] = None, + devices: Optional[List[Union[str, torch.device]]] = None, ): """ :param model_name_or_path: Directory of a saved model or the name of a public model e.g. @@ -50,16 +50,20 @@ class SentenceTransformersRanker(BaseRanker): :param model_version: The version of model to use from the HuggingFace model hub. Can be tag name, branch name, or commit hash. :param top_k: The maximum number of documents to return :param use_gpu: Whether to use all available GPUs or the CPU. Falls back on CPU if no GPU is available. - :param devices: List of GPU devices to limit inference to certain GPUs and not use all available ones (e.g. ["cuda:0"]). + :param devices: List of GPU (or CPU) devices, to limit inference to certain GPUs and not use all available ones + The strings will be converted into pytorch devices, so use the string notation described here: + https://pytorch.org/docs/stable/tensor_attributes.html?highlight=torch%20device#torch.torch.device + (e.g. ["cuda:0"]). """ super().__init__() self.top_k = top_k if devices is not None: - self.devices = devices + self.devices = [torch.device(device) for device in devices] else: self.devices, _ = initialize_device_settings(use_cuda=use_gpu, multi_gpu=True) + self.transformer_model = AutoModelForSequenceClassification.from_pretrained( pretrained_model_name_or_path=model_name_or_path, revision=model_version ) diff --git a/haystack/nodes/reader/farm.py b/haystack/nodes/reader/farm.py index ccecc772f..73a6bfccf 100644 --- a/haystack/nodes/reader/farm.py +++ b/haystack/nodes/reader/farm.py @@ -777,7 +777,9 @@ class FARMReader(BaseReader): return result - def eval_on_file(self, data_dir: str, test_filename: str, device: Optional[str] = None): + def eval_on_file( + self, data_dir: Union[Path, str], test_filename: str, device: Optional[Union[str, torch.device]] = None + ): """ Performs evaluation on a SQuAD-formatted file. Returns a dict containing the following metrics: @@ -786,14 +788,16 @@ class FARMReader(BaseReader): - "top_n_accuracy": Proportion of predicted answers that overlap with correct answer :param data_dir: The directory in which the test set can be found - :type data_dir: Path or str :param test_filename: The name of the file containing the test data in SQuAD format. - :type test_filename: str - :param device: The device on which the tensors should be processed. Choose from "cpu" and "cuda" or use the Reader's device by default. - :type device: str + :param device: The device on which the tensors should be processed. + Choose from torch.device("cpu") and torch.device("cuda") (or simply "cpu" or "cuda") + or use the Reader's device by default. """ if device is None: device = self.devices[0] + else: + device = torch.device(device) + eval_processor = SquadProcessor( tokenizer=self.inferencer.processor.tokenizer, max_seq_len=self.inferencer.processor.max_seq_len, @@ -822,7 +826,7 @@ class FARMReader(BaseReader): def eval( self, document_store: BaseDocumentStore, - device: Optional[str] = None, + device: Optional[Union[str, torch.device]] = None, label_index: str = "label", doc_index: str = "eval_document", label_origin: str = "gold-label", @@ -836,7 +840,9 @@ class FARMReader(BaseReader): - "top_n_accuracy": Proportion of predicted answers that overlap with correct answer :param document_store: DocumentStore containing the evaluation documents - :param device: The device on which the tensors should be processed. Choose from "cpu" and "cuda" or use the Reader's device by default. + :param device: The device on which the tensors should be processed. + Choose from torch.device("cpu") and torch.device("cuda") (or simply "cpu" or "cuda") + or use the Reader's device by default. :param label_index: Index/Table name where labeled questions are stored :param doc_index: Index/Table name where documents that are used for evaluation are stored :param label_origin: Field name where the gold labels are stored @@ -844,6 +850,9 @@ class FARMReader(BaseReader): """ if device is None: device = self.devices[0] + else: + device = torch.device(device) + if self.top_k_per_candidate != 4: logger.info( f"Performing Evaluation using top_k_per_candidate = {self.top_k_per_candidate} \n" @@ -1012,7 +1021,7 @@ class FARMReader(BaseReader): def calibrate_confidence_scores( self, document_store: BaseDocumentStore, - device: Optional[str] = None, + device: Optional[Union[str, torch.device]] = None, label_index: str = "label", doc_index: str = "eval_document", label_origin: str = "gold_label", @@ -1021,7 +1030,9 @@ class FARMReader(BaseReader): Calibrates confidence scores on evaluation documents in the DocumentStore. :param document_store: DocumentStore containing the evaluation documents - :param device: The device on which the tensors should be processed. Choose from "cpu" and "cuda" or use the Reader's device by default. + :param device: The device on which the tensors should be processed. + Choose from torch.device("cpu") and torch.device("cuda") (or simply "cpu" or "cuda") + or use the Reader's device by default. :param label_index: Index/Table name where labeled questions are stored :param doc_index: Index/Table name where documents that are used for evaluation are stored :param label_origin: Field name where the gold labels are stored diff --git a/haystack/nodes/retriever/dense.py b/haystack/nodes/retriever/dense.py index 5d56245fc..6bd11aba1 100644 --- a/haystack/nodes/retriever/dense.py +++ b/haystack/nodes/retriever/dense.py @@ -54,7 +54,7 @@ class DensePassageRetriever(BaseRetriever): similarity_function: str = "dot_product", global_loss_buffer_size: int = 150000, progress_bar: bool = True, - devices: Optional[List[Union[int, str, torch.device]]] = None, + devices: Optional[List[Union[str, torch.device]]] = None, use_auth_token: Optional[Union[str, bool]] = None, ): """ @@ -102,8 +102,11 @@ class DensePassageRetriever(BaseRetriever): Increase if errors like "encoded data exceeds max_size ..." come up :param progress_bar: Whether to show a tqdm progress bar or not. Can be helpful to disable in production deployments to keep the logs clean. - :param devices: List of GPU devices to limit inference to certain GPUs and not use all available ones (e.g. ["cuda:0"]). - As multi-GPU training is currently not implemented for DPR, training will only use the first device provided in this list. + :param devices: List of GPU (or CPU) devices, to limit inference to certain GPUs and not use all available ones + These strings will be converted into pytorch devices, so use the string notation described here: + https://pytorch.org/docs/stable/tensor_attributes.html?highlight=torch%20device#torch.torch.device + (e.g. ["cuda:0"]). Note: as multi-GPU training is currently not implemented for DPR, training + will only use the first device provided in this list. :param use_auth_token: API token used to download private models from Huggingface. If this parameter is set to `True`, the local token will be used, which must be previously created via `transformer-cli login`. Additional information can be found here https://huggingface.co/transformers/main_classes/model.html#transformers.PreTrainedModel.from_pretrained @@ -111,7 +114,7 @@ class DensePassageRetriever(BaseRetriever): super().__init__() if devices is not None: - self.devices = devices + self.devices = [torch.device(device) for device in devices] else: self.devices, _ = initialize_device_settings(use_cuda=use_gpu, multi_gpu=True) @@ -193,7 +196,7 @@ class DensePassageRetriever(BaseRetriever): embeds_dropout_prob=0.1, lm1_output_types=["per_sequence"], lm2_output_types=["per_sequence"], - device=str(self.devices[0]), + device=self.devices[0], ) self.model.connect_heads_with_processor(self.processor.tasks, require_labels=False) @@ -548,7 +551,7 @@ class TableTextRetriever(BaseRetriever): similarity_function: str = "dot_product", global_loss_buffer_size: int = 150000, progress_bar: bool = True, - devices: Optional[List[Union[int, str, torch.device]]] = None, + devices: Optional[List[Union[str, torch.device]]] = None, use_auth_token: Optional[Union[str, bool]] = None, ): """ @@ -582,8 +585,11 @@ class TableTextRetriever(BaseRetriever): Increase if errors like "encoded data exceeds max_size ..." come up :param progress_bar: Whether to show a tqdm progress bar or not. Can be helpful to disable in production deployments to keep the logs clean. - :param devices: List of GPU devices to limit inference to certain GPUs and not use all available ones (e.g. ["cuda:0"]). - As multi-GPU training is currently not implemented for DPR, training will only use the first device provided in this list. + :param devices: List of GPU (or CPU) devices, to limit inference to certain GPUs and not use all available ones + These strings will be converted into pytorch devices, so use the string notation described here: + https://pytorch.org/docs/stable/tensor_attributes.html?highlight=torch%20device#torch.torch.device + (e.g. ["cuda:0"]). Note: as multi-GPU training is currently not implemented for TableTextRetriever, + training will only use the first device provided in this list. :param use_auth_token: API token used to download private models from Huggingface. If this parameter is set to `True`, the local token will be used, which must be previously created via `transformer-cli login`. Additional information can be found here https://huggingface.co/transformers/main_classes/model.html#transformers.PreTrainedModel.from_pretrained @@ -591,7 +597,7 @@ class TableTextRetriever(BaseRetriever): super().__init__() if devices is not None: - self.devices = devices + self.devices = [torch.device(device) for device in devices] else: self.devices, _ = initialize_device_settings(use_cuda=use_gpu, multi_gpu=True) @@ -699,7 +705,7 @@ class TableTextRetriever(BaseRetriever): lm1_output_types=["per_sequence"], lm2_output_types=["per_sequence"], lm3_output_types=["per_sequence"], - device=str(self.devices[0]), + device=self.devices[0], ) self.model.connect_heads_with_processor(self.processor.tasks, require_labels=False) @@ -1075,7 +1081,7 @@ class EmbeddingRetriever(BaseRetriever): emb_extraction_layer: int = -1, top_k: int = 10, progress_bar: bool = True, - devices: Optional[List[Union[int, str, torch.device]]] = None, + devices: Optional[List[Union[str, torch.device]]] = None, use_auth_token: Optional[Union[str, bool]] = None, ): """ @@ -1101,8 +1107,11 @@ class EmbeddingRetriever(BaseRetriever): Default: -1 (very last layer). :param top_k: How many documents to return per query. :param progress_bar: If true displays progress bar during embedding. - :param devices: List of GPU devices to limit inference to certain GPUs and not use all available ones (e.g. ["cuda:0"]). - As multi-GPU training is currently not implemented for DPR, training will only use the first device provided in this list. + :param devices: List of GPU (or CPU) devices, to limit inference to certain GPUs and not use all available ones + These strings will be converted into pytorch devices, so use the string notation described here: + https://pytorch.org/docs/stable/tensor_attributes.html?highlight=torch%20device#torch.torch.device + (e.g. ["cuda:0"]). Note: As multi-GPU training is currently not implemented for EmbeddingRetriever, + training will only use the first device provided in this list. :param use_auth_token: API token used to download private models from Huggingface. If this parameter is set to `True`, the local token will be used, which must be previously created via `transformer-cli login`. Additional information can be found here https://huggingface.co/transformers/main_classes/model.html#transformers.PreTrainedModel.from_pretrained @@ -1110,7 +1119,7 @@ class EmbeddingRetriever(BaseRetriever): super().__init__() if devices is not None: - self.devices = devices + self.devices = [torch.device(device) for device in devices] else: self.devices, _ = initialize_device_settings(use_cuda=use_gpu, multi_gpu=True) diff --git a/haystack/utils/augment_squad.py b/haystack/utils/augment_squad.py index 35a79f50b..e9be4ac8b 100644 --- a/haystack/utils/augment_squad.py +++ b/haystack/utils/augment_squad.py @@ -23,6 +23,7 @@ Arguments: model: Huggingface MLM model identifier. """ +from typing import Tuple, List, Union import torch from torch.nn import functional as F @@ -37,13 +38,14 @@ import argparse import json import logging from tqdm import tqdm -from typing import Tuple, List logger = logging.getLogger(__name__) def load_glove( - glove_path: Path = Path("glove.txt"), vocab_size: int = 100_000, device: str = "cpu:0" + glove_path: Path = Path("glove.txt"), + vocab_size: int = 100_000, + device: Union[str, torch.device] = torch.device("cpu:0"), ) -> Tuple[dict, dict, torch.Tensor]: """Loads the GloVe vectors and returns a mapping from words to their GloVe vector indices and the other way around.""" @@ -112,8 +114,9 @@ def get_replacements( text: str, word_possibilities: int = 20, batch_size: int = 16, - device: str = "cpu:0", + device: torch.device = torch.device("cpu:0"), ) -> List[List[str]]: + """Returns a list of possible replacements for each word in the text.""" input_ids, words, word_subword_mapping = tokenize_and_extract_words(text, tokenizer) @@ -179,8 +182,9 @@ def augment( word_possibilities: int = 20, replace_probability: float = 0.4, batch_size: int = 16, - device: str = "cpu:0", + device: Union[str, torch.device] = torch.device("cpu:0"), ) -> List[str]: + device = torch.device(device) # returns a list of different augmented versions of the text replacements = get_replacements( glove_word_id_mapping=word_id_mapping, @@ -211,16 +215,17 @@ def augment( def augment_squad( squad_path: Path, output_path: Path, + glove_path: Path = Path("glove.txt"), model: str = "bert-base-uncased", tokenizer: str = "bert-base-uncased", - glove_path: Path = Path("glove.txt"), multiplication_factor: int = 20, word_possibilities: int = 20, replace_probability: float = 0.4, - device: str = "cpu:0", + device: Union[str, torch.device] = "cpu:0", batch_size: int = 16, ): """Loads a squad dataset, augments the contexts, and saves the result in SQuAD format.""" + device = torch.device(device) # loading model and tokenizer transformers_model = AutoModelForMaskedLM.from_pretrained(model) transformers_model.to(device)