From 0cf0568dd0cdcb6b9084cc50bb6fbdbddc8b48b4 Mon Sep 17 00:00:00 2001 From: Sebastian Date: Thu, 25 Aug 2022 10:30:03 +0200 Subject: [PATCH] fix: Use use_auth_token in all cases when loading from the HF Hub (#3094) * Making sure to pass on use_auth_token to all from_pretrained calls --- docs/_src/api/api/document_classifier.md | 7 +- docs/_src/api/api/evaluation.md | 7 +- docs/_src/api/api/extractor.md | 5 + docs/_src/api/api/generator.md | 18 +- docs/_src/api/api/pipelines.md | 7 +- docs/_src/api/api/pseudo_label_generator.md | 7 +- docs/_src/api/api/query_classifier.md | 7 +- docs/_src/api/api/question_generator.md | 8 +- docs/_src/api/api/ranker.md | 7 +- docs/_src/api/api/reader.md | 31 ++- docs/_src/api/api/retriever.md | 39 ++-- docs/_src/api/api/summarizer.md | 7 +- docs/_src/api/api/translator.md | 7 +- .../haystack-pipeline-main.schema.json | 196 ++++++++++++++++++ haystack/modeling/infer.py | 7 +- haystack/modeling/model/adaptive_model.py | 35 +++- haystack/modeling/model/biadaptive_model.py | 10 +- haystack/modeling/model/language_model.py | 46 +++- haystack/modeling/model/prediction_head.py | 16 +- haystack/modeling/model/tokenization.py | 6 +- haystack/nodes/answer_generator/openai.py | 2 +- .../nodes/answer_generator/transformers.py | 28 ++- .../nodes/document_classifier/transformers.py | 14 +- haystack/nodes/evaluator/evaluator.py | 19 +- haystack/nodes/extractor/entity.py | 13 +- .../label_generator/pseudo_label_generator.py | 13 +- .../nodes/query_classifier/transformers.py | 13 +- .../question_generator/question_generator.py | 11 +- .../nodes/ranker/sentence_transformers.py | 10 +- haystack/nodes/reader/farm.py | 8 +- haystack/nodes/reader/table.py | 48 +++-- haystack/nodes/reader/transformers.py | 13 +- .../nodes/retriever/_embedding_encoder.py | 8 +- haystack/nodes/retriever/dense.py | 49 +++-- haystack/nodes/retriever/text2sparql.py | 17 +- haystack/nodes/summarizer/transformers.py | 12 +- haystack/nodes/translator/transformers.py | 10 +- haystack/pipelines/base.py | 7 + haystack/utils/docker.py | 14 +- 39 files changed, 660 insertions(+), 122 deletions(-) diff --git a/docs/_src/api/api/document_classifier.md b/docs/_src/api/api/document_classifier.md index 1aa84a58f..375c3baa2 100644 --- a/docs/_src/api/api/document_classifier.md +++ b/docs/_src/api/api/document_classifier.md @@ -84,7 +84,7 @@ With this document_classifier, you can directly get predictions via predict() #### TransformersDocumentClassifier.\_\_init\_\_ ```python -def __init__(model_name_or_path: str = "bhadresh-savani/distilbert-base-uncased-emotion", model_version: Optional[str] = None, tokenizer: Optional[str] = None, use_gpu: bool = True, return_all_scores: bool = False, task: str = "text-classification", labels: Optional[List[str]] = None, batch_size: int = 16, classification_field: str = None, progress_bar: bool = True) +def __init__(model_name_or_path: str = "bhadresh-savani/distilbert-base-uncased-emotion", model_version: Optional[str] = None, tokenizer: Optional[str] = None, use_gpu: bool = True, return_all_scores: bool = False, task: str = "text-classification", labels: Optional[List[str]] = None, batch_size: int = 16, classification_field: str = None, progress_bar: bool = True, use_auth_token: Optional[Union[str, bool]] = None) ``` Load a text classification model from Transformers. @@ -117,6 +117,11 @@ or an entailment. - `batch_size`: Number of Documents to be processed at a time. - `classification_field`: Name of Document's meta field to be used for classification. If left unset, Document.content is used by default. - `progress_bar`: Whether to show a progress bar while processing. +- `use_auth_token`: The API token used to download private models from Huggingface. +If this parameter is set to `True`, then the token generated when running +`transformers-cli login` (stored in ~/.huggingface) will be used. +Additional information can be found here +https://huggingface.co/transformers/main_classes/model.html#transformers.PreTrainedModel.from_pretrained diff --git a/docs/_src/api/api/evaluation.md b/docs/_src/api/api/evaluation.md index 35d4bebe1..7db66a452 100644 --- a/docs/_src/api/api/evaluation.md +++ b/docs/_src/api/api/evaluation.md @@ -123,7 +123,7 @@ Print the evaluation results #### semantic\_answer\_similarity ```python -def semantic_answer_similarity(predictions: List[List[str]], gold_labels: List[List[str]], sas_model_name_or_path: str = "sentence-transformers/paraphrase-multilingual-mpnet-base-v2", batch_size: int = 32, use_gpu: bool = True) -> Tuple[List[float], List[float], List[List[float]]] +def semantic_answer_similarity(predictions: List[List[str]], gold_labels: List[List[str]], sas_model_name_or_path: str = "sentence-transformers/paraphrase-multilingual-mpnet-base-v2", batch_size: int = 32, use_gpu: bool = True, use_auth_token: Optional[Union[str, bool]] = None) -> Tuple[List[float], List[float], List[List[float]]] ``` Computes Transformer-based similarity of predicted answer to gold labels to derive a more meaningful metric than EM or F1. @@ -141,6 +141,11 @@ pointing to downloadable models. - `batch_size`: Number of prediction label pairs to encode at once. - `use_gpu`: Whether to use a GPU or the CPU for calculating semantic answer similarity. Falls back to CPU if no GPU is available. +- `use_auth_token`: The API token used to download private models from Huggingface. +If this parameter is set to `True`, then the token generated when running +`transformers-cli login` (stored in ~/.huggingface) will be used. +Additional information can be found here +https://huggingface.co/transformers/main_classes/model.html#transformers.PreTrainedModel.from_pretrained **Returns**: diff --git a/docs/_src/api/api/extractor.md b/docs/_src/api/api/extractor.md index 7828b5f12..67dc56b4b 100644 --- a/docs/_src/api/api/extractor.md +++ b/docs/_src/api/api/extractor.md @@ -24,6 +24,11 @@ The entities extracted by this Node will populate Document.entities - `use_gpu`: Whether to use the GPU or not. - `batch_size`: The batch size to use for entity extraction. - `progress_bar`: Whether to show a progress bar or not. +- `use_auth_token`: The API token used to download private models from Huggingface. +If this parameter is set to `True`, then the token generated when running +`transformers-cli login` (stored in ~/.huggingface) will be used. +Additional information can be found here +https://huggingface.co/transformers/main_classes/model.html#transformers.PreTrainedModel.from_pretrained diff --git a/docs/_src/api/api/generator.md b/docs/_src/api/api/generator.md index 13fb497c1..0ff3cf4f1 100644 --- a/docs/_src/api/api/generator.md +++ b/docs/_src/api/api/generator.md @@ -138,7 +138,7 @@ i.e. the model can easily adjust to domain documents even after training has fin #### RAGenerator.\_\_init\_\_ ```python -def __init__(model_name_or_path: str = "facebook/rag-token-nq", model_version: Optional[str] = None, retriever: Optional[DensePassageRetriever] = None, generator_type: str = "token", top_k: int = 2, max_length: int = 200, min_length: int = 2, num_beams: int = 2, embed_title: bool = True, prefix: Optional[str] = None, use_gpu: bool = True, progress_bar: bool = True) +def __init__(model_name_or_path: str = "facebook/rag-token-nq", model_version: Optional[str] = None, retriever: Optional[DensePassageRetriever] = None, generator_type: str = "token", top_k: int = 2, max_length: int = 200, min_length: int = 2, num_beams: int = 2, embed_title: bool = True, prefix: Optional[str] = None, use_gpu: bool = True, progress_bar: bool = True, use_auth_token: Optional[Union[str, bool]] = None) ``` Load a RAG model from Transformers along with passage_embedding_model. @@ -160,6 +160,12 @@ See https://huggingface.co/models for full list of available models. - `embed_title`: Embedded the title of passage while generating embedding - `prefix`: The prefix used by the generator's tokenizer. - `use_gpu`: Whether to use GPU. Falls back on CPU if no GPU is available. +- `progress_bar`: Whether to show a tqdm progress bar or not. +- `use_auth_token`: The API token used to download private models from Huggingface. +If this parameter is set to `True`, then the token generated when running +`transformers-cli login` (stored in ~/.huggingface) will be used. +Additional information can be found here +https://huggingface.co/transformers/main_classes/model.html#transformers.PreTrainedModel.from_pretrained @@ -256,7 +262,7 @@ the [Hugging Face Model Hub](https://huggingface.co/models?pipeline_tag=text2tex #### Seq2SeqGenerator.\_\_init\_\_ ```python -def __init__(model_name_or_path: str, input_converter: Optional[Callable] = None, top_k: int = 1, max_length: int = 200, min_length: int = 2, num_beams: int = 8, use_gpu: bool = True, progress_bar: bool = True) +def __init__(model_name_or_path: str, input_converter: Optional[Callable] = None, top_k: int = 1, max_length: int = 200, min_length: int = 2, num_beams: int = 8, use_gpu: bool = True, progress_bar: bool = True, use_auth_token: Optional[Union[str, bool]] = None) ``` **Arguments**: @@ -272,6 +278,12 @@ top_k: Optional[int] = None) -> BatchEncoding: - `min_length`: Minimum length of generated text - `num_beams`: Number of beams for beam search. 1 means no beam search. - `use_gpu`: Whether to use GPU or the CPU. Falls back on CPU if no GPU is available. +- `progress_bar`: Whether to show a tqdm progress bar or not. +- `use_auth_token`: The API token used to download private models from Huggingface. +If this parameter is set to `True`, then the token generated when running +`transformers-cli login` (stored in ~/.huggingface) will be used. +Additional information can be found here +https://huggingface.co/transformers/main_classes/model.html#transformers.PreTrainedModel.from_pretrained @@ -311,7 +323,7 @@ Uses the GPT-3 models from the OpenAI API to generate Answers based on the Docum The Documents can come from a Retriever or you can supply them manually. To use this Node, you need an API key from an active OpenAI account. You can sign-up for an account -on the [OpenAI API website](https://openai.com/api/)). +on the [OpenAI API website](https://openai.com/api/). diff --git a/docs/_src/api/api/pipelines.md b/docs/_src/api/api/pipelines.md index 72b53ea4a..54cdc9a6e 100644 --- a/docs/_src/api/api/pipelines.md +++ b/docs/_src/api/api/pipelines.md @@ -509,7 +509,7 @@ Thus [AB] <-> [BC] (score ~50) gets recalculated with B <-> B (score ~100) scori ```python @send_event -def eval(labels: List[MultiLabel], documents: Optional[List[List[Document]]] = None, params: Optional[dict] = None, sas_model_name_or_path: str = None, sas_batch_size: int = 32, sas_use_gpu: bool = True, add_isolated_node_eval: bool = False, custom_document_id_field: Optional[str] = None, context_matching_min_length: int = 100, context_matching_boost_split_overlaps: bool = True, context_matching_threshold: float = 65.0) -> EvaluationResult +def eval(labels: List[MultiLabel], documents: Optional[List[List[Document]]] = None, params: Optional[dict] = None, sas_model_name_or_path: str = None, sas_batch_size: int = 32, sas_use_gpu: bool = True, add_isolated_node_eval: bool = False, custom_document_id_field: Optional[str] = None, context_matching_min_length: int = 100, context_matching_boost_split_overlaps: bool = True, context_matching_threshold: float = 65.0, use_auth_token: Optional[Union[str, bool]] = None) -> EvaluationResult ``` Evaluates the pipeline by running the pipeline once per query in debug mode @@ -563,6 +563,11 @@ If we detect that the score is near a half match and the matching part of the ca we cut the context on the same side, recalculate the score and take the mean of both. Thus [AB] <-> [BC] (score ~50) gets recalculated with B <-> B (score ~100) scoring ~75 in total. - `context_matching_threshold`: Score threshold that candidates must surpass to be included into the result list. Range: [0,100] +- `use_auth_token`: The API token used to download private models from Huggingface. +If this parameter is set to `True`, then the token generated when running +`transformers-cli login` (stored in ~/.huggingface) will be used. +Additional information can be found here +https://huggingface.co/transformers/main_classes/model.html#transformers.PreTrainedModel.from_pretrained diff --git a/docs/_src/api/api/pseudo_label_generator.md b/docs/_src/api/api/pseudo_label_generator.md index 53cba941b..d8fa9a4c1 100644 --- a/docs/_src/api/api/pseudo_label_generator.md +++ b/docs/_src/api/api/pseudo_label_generator.md @@ -53,7 +53,7 @@ For example: #### PseudoLabelGenerator.\_\_init\_\_ ```python -def __init__(question_producer: Union[QuestionGenerator, List[Dict[str, str]]], retriever: BaseRetriever, cross_encoder_model_name_or_path: str = "cross-encoder/ms-marco-MiniLM-L-6-v2", max_questions_per_document: int = 3, top_k: int = 50, batch_size: int = 16, progress_bar: bool = True) +def __init__(question_producer: Union[QuestionGenerator, List[Dict[str, str]]], retriever: BaseRetriever, cross_encoder_model_name_or_path: str = "cross-encoder/ms-marco-MiniLM-L-6-v2", max_questions_per_document: int = 3, top_k: int = 50, batch_size: int = 16, progress_bar: bool = True, use_auth_token: Optional[Union[str, bool]] = None) ``` Loads the cross-encoder model and prepares PseudoLabelGenerator. @@ -69,6 +69,11 @@ questions/document pairs in a Dictionary format {"question": "question text ..." - `top_k` (`int (optional)`): The number of answers retrieved for each question, defaults to 50. - `batch_size` (`int (optional)`): The number of documents to process at a time. - `progress_bar` (`bool (optional)`): Whether to show a progress bar, defaults to True. +- `use_auth_token` (`Union[str, bool] (optional)`): The API token used to download private models from Huggingface. +If this parameter is set to `True`, then the token generated when running +`transformers-cli login` (stored in ~/.huggingface) will be used. +Additional information can be found here +https://huggingface.co/transformers/main_classes/model.html#transformers.PreTrainedModel.from_pretrained diff --git a/docs/_src/api/api/query_classifier.md b/docs/_src/api/api/query_classifier.md index b92d8fa7c..81b89c373 100644 --- a/docs/_src/api/api/query_classifier.md +++ b/docs/_src/api/api/query_classifier.md @@ -144,7 +144,7 @@ This node also supports zero-shot-classification. #### TransformersQueryClassifier.\_\_init\_\_ ```python -def __init__(model_name_or_path: Union[Path, str] = "shahrukhx01/bert-mini-finetune-question-detection", model_version: Optional[str] = None, tokenizer: Optional[str] = None, use_gpu: bool = True, task: str = "text-classification", labels: List[str] = DEFAULT_LABELS, batch_size: int = 16, progress_bar: bool = True) +def __init__(model_name_or_path: Union[Path, str] = "shahrukhx01/bert-mini-finetune-question-detection", model_version: Optional[str] = None, tokenizer: Optional[str] = None, use_gpu: bool = True, task: str = "text-classification", labels: List[str] = DEFAULT_LABELS, batch_size: int = 16, progress_bar: bool = True, use_auth_token: Optional[Union[str, bool]] = None) ``` **Arguments**: @@ -160,4 +160,9 @@ the second label to output_2, and so on. The labels must match the model labels; If the task is 'zero-shot-classification', these are the candidate labels. - `batch_size`: The number of queries to be processed at a time. - `progress_bar`: Whether to show a progress bar. +- `use_auth_token`: The API token used to download private models from Huggingface. +If this parameter is set to `True`, then the token generated when running +`transformers-cli login` (stored in ~/.huggingface) will be used. +Additional information can be found here +https://huggingface.co/transformers/main_classes/model.html#transformers.PreTrainedModel.from_pretrained diff --git a/docs/_src/api/api/question_generator.md b/docs/_src/api/api/question_generator.md index 3e00a211d..e8ab9f4a4 100644 --- a/docs/_src/api/api/question_generator.md +++ b/docs/_src/api/api/question_generator.md @@ -23,7 +23,7 @@ come from earlier in the document. #### QuestionGenerator.\_\_init\_\_ ```python -def __init__(model_name_or_path="valhalla/t5-base-e2e-qg", model_version=None, num_beams=4, max_length=256, no_repeat_ngram_size=3, length_penalty=1.5, early_stopping=True, split_length=50, split_overlap=10, use_gpu=True, prompt="generate questions:", num_queries_per_doc=1, sep_token: str = "", batch_size: int = 16, progress_bar: bool = True) +def __init__(model_name_or_path="valhalla/t5-base-e2e-qg", model_version=None, num_beams=4, max_length=256, no_repeat_ngram_size=3, length_penalty=1.5, early_stopping=True, split_length=50, split_overlap=10, use_gpu=True, prompt="generate questions:", num_queries_per_doc=1, sep_token: str = "", batch_size: int = 16, progress_bar: bool = True, use_auth_token: Optional[Union[str, bool]] = None) ``` Uses the valhalla/t5-base-e2e-qg model by default. This class supports any question generation model that is @@ -39,6 +39,12 @@ See https://huggingface.co/models for full list of available models. - `model_version`: The version of model to use from the HuggingFace model hub. Can be tag name, branch name, or commit hash. - `use_gpu`: Whether to use GPU or the CPU. Falls back on CPU if no GPU is available. - `batch_size`: Number of documents to process at a time. +- `progress_bar`: Whether to show a tqdm progress bar or not. +- `use_auth_token`: The API token used to download private models from Huggingface. +If this parameter is set to `True`, then the token generated when running +`transformers-cli login` (stored in ~/.huggingface) will be used. +Additional information can be found here +https://huggingface.co/transformers/main_classes/model.html#transformers.PreTrainedModel.from_pretrained diff --git a/docs/_src/api/api/ranker.md b/docs/_src/api/api/ranker.md index afb674461..912e2fc7b 100644 --- a/docs/_src/api/api/ranker.md +++ b/docs/_src/api/api/ranker.md @@ -94,7 +94,7 @@ Usage example: #### SentenceTransformersRanker.\_\_init\_\_ ```python -def __init__(model_name_or_path: Union[str, Path], model_version: Optional[str] = None, top_k: int = 10, use_gpu: bool = True, devices: Optional[List[Union[str, torch.device]]] = None, batch_size: int = 16, scale_score: bool = True, progress_bar: bool = True) +def __init__(model_name_or_path: Union[str, Path], model_version: Optional[str] = None, top_k: int = 10, use_gpu: bool = True, devices: Optional[List[Union[str, torch.device]]] = None, batch_size: int = 16, scale_score: bool = True, progress_bar: bool = True, use_auth_token: Optional[Union[str, bool]] = None) ``` **Arguments**: @@ -114,6 +114,11 @@ https://pytorch.org/docs/stable/tensor_attributes.html?highlight=torch%20device# only predicts a single label. For multi-label predictions, no scaling is applied. Set this to False if you do not want any scaling of the raw predictions. - `progress_bar`: Whether to show a progress bar while processing the documents. +- `use_auth_token`: The API token used to download private models from Huggingface. +If this parameter is set to `True`, then the token generated when running +`transformers-cli login` (stored in ~/.huggingface) will be used. +Additional information can be found here +https://huggingface.co/transformers/main_classes/model.html#transformers.PreTrainedModel.from_pretrained diff --git a/docs/_src/api/api/reader.md b/docs/_src/api/api/reader.md index b111df1bd..b163a9e45 100644 --- a/docs/_src/api/api/reader.md +++ b/docs/_src/api/api/reader.md @@ -100,9 +100,11 @@ unscaled raw scores. - `proxies`: Dict of proxy servers to use for downloading external models. Example: {'http': 'some.proxy:1234', 'http://hostname': 'my.proxy:3111'} - `local_files_only`: Whether to force checking for local files only (and forbid downloads) - `force_download`: Whether fo force a (re-)download even if the model exists locally in the cache. -- `use_auth_token`: API token used to download private models from Huggingface. If this parameter is set to `True`, -the local token will be used, which must be previously created via `transformer-cli login`. -Additional information can be found here https://huggingface.co/transformers/main_classes/model.html#transformers.PreTrainedModel.from_pretrained +- `use_auth_token`: The API token used to download private models from Huggingface. +If this parameter is set to `True`, then the token generated when running +`transformers-cli login` (stored in ~/.huggingface) will be used. +Additional information can be found here +https://huggingface.co/transformers/main_classes/model.html#transformers.PreTrainedModel.from_pretrained @@ -587,7 +589,7 @@ With this reader, you can directly get predictions via predict() #### TransformersReader.\_\_init\_\_ ```python -def __init__(model_name_or_path: str = "distilbert-base-uncased-distilled-squad", model_version: Optional[str] = None, tokenizer: Optional[str] = None, context_window_size: int = 70, use_gpu: bool = True, top_k: int = 10, top_k_per_candidate: int = 3, return_no_answers: bool = False, max_seq_len: int = 256, doc_stride: int = 128, batch_size: int = 16) +def __init__(model_name_or_path: str = "distilbert-base-uncased-distilled-squad", model_version: Optional[str] = None, tokenizer: Optional[str] = None, context_window_size: int = 70, use_gpu: bool = True, top_k: int = 10, top_k_per_candidate: int = 3, return_no_answers: bool = False, max_seq_len: int = 256, doc_stride: int = 128, batch_size: int = 16, use_auth_token: Optional[Union[str, bool]] = None) ``` Load a QA model from Transformers. @@ -621,6 +623,11 @@ If you would like to set no_answer_boost, use a `FARMReader`. - `max_seq_len`: max sequence length of one input text for the model - `doc_stride`: length of striding window for splitting long texts (used if len(text) > max_seq_len) - `batch_size`: Number of documents to process at a time. +- `use_auth_token`: The API token used to download private models from Huggingface. +If this parameter is set to `True`, then the token generated when running +`transformers-cli login` (stored in ~/.huggingface) will be used. +Additional information can be found here +https://huggingface.co/transformers/main_classes/model.html#transformers.PreTrainedModel.from_pretrained @@ -710,7 +717,7 @@ With this reader, you can directly get predictions via predict() ```python from haystack import Document -from haystack.reader import TableReader +from haystack.nodes import TableReader import pandas as pd table_reader = TableReader(model_name_or_path="google/tapas-base-finetuned-wtq") @@ -732,7 +739,7 @@ answer = prediction["answers"][0].answer # "10 june 1996" #### TableReader.\_\_init\_\_ ```python -def __init__(model_name_or_path: str = "google/tapas-base-finetuned-wtq", model_version: Optional[str] = None, tokenizer: Optional[str] = None, use_gpu: bool = True, top_k: int = 10, top_k_per_candidate: int = 3, return_no_answer: bool = False, max_seq_len: int = 256) +def __init__(model_name_or_path: str = "google/tapas-base-finetuned-wtq", model_version: Optional[str] = None, tokenizer: Optional[str] = None, use_gpu: bool = True, top_k: int = 10, top_k_per_candidate: int = 3, return_no_answer: bool = False, max_seq_len: int = 256, use_auth_token: Optional[Union[str, bool]] = None) ``` Load a TableQA model from Transformers. @@ -768,6 +775,11 @@ the retriever. - `max_seq_len`: Max sequence length of one input table for the model. If the number of tokens of query + table exceed max_seq_len, the table will be truncated by removing rows until the input size fits the model. +- `use_auth_token`: The API token used to download private models from Huggingface. +If this parameter is set to `True`, then the token generated when running +`transformers-cli login` (stored in ~/.huggingface) will be used. +Additional information can be found here +https://huggingface.co/transformers/main_classes/model.html#transformers.PreTrainedModel.from_pretrained @@ -861,7 +873,7 @@ Pros and Cons of RCIReader compared to TableReader: #### RCIReader.\_\_init\_\_ ```python -def __init__(row_model_name_or_path: str = "michaelrglass/albert-base-rci-wikisql-row", column_model_name_or_path: str = "michaelrglass/albert-base-rci-wikisql-col", row_model_version: Optional[str] = None, column_model_version: Optional[str] = None, row_tokenizer: Optional[str] = None, column_tokenizer: Optional[str] = None, use_gpu: bool = True, top_k: int = 10, max_seq_len: int = 256) +def __init__(row_model_name_or_path: str = "michaelrglass/albert-base-rci-wikisql-row", column_model_name_or_path: str = "michaelrglass/albert-base-rci-wikisql-col", row_model_version: Optional[str] = None, column_model_version: Optional[str] = None, row_tokenizer: Optional[str] = None, column_tokenizer: Optional[str] = None, use_gpu: bool = True, top_k: int = 10, max_seq_len: int = 256, use_auth_token: Optional[Union[str, bool]] = None) ``` Load an RCI model from Transformers. @@ -886,6 +898,11 @@ Can be tag name, branch name, or commit hash. - `max_seq_len`: Max sequence length of one input table for the model. If the number of tokens of query + table exceed max_seq_len, the table will be truncated by removing rows until the input size fits the model. +- `use_auth_token`: The API token used to download private models from Huggingface. +If this parameter is set to `True`, then the token generated when running +`transformers-cli login` (stored in ~/.huggingface) will be used. +Additional information can be found here +https://huggingface.co/transformers/main_classes/model.html#transformers.PreTrainedModel.from_pretrained diff --git a/docs/_src/api/api/retriever.md b/docs/_src/api/api/retriever.md index fdabc26c2..4f2ba8ca1 100644 --- a/docs/_src/api/api/retriever.md +++ b/docs/_src/api/api/retriever.md @@ -572,9 +572,11 @@ These strings will be converted into pytorch devices, so use the string notation https://pytorch.org/docs/stable/tensor_attributes.html?highlight=torch%20device#torch.torch.device (e.g. ["cuda:0"]). Note: as multi-GPU training is currently not implemented for DPR, training will only use the first device provided in this list. -- `use_auth_token`: API token used to download private models from Huggingface. If this parameter is set to `True`, -the local token will be used, which must be previously created via `transformer-cli login`. -Additional information can be found here https://huggingface.co/transformers/main_classes/model.html#transformers.PreTrainedModel.from_pretrained +- `use_auth_token`: The API token used to download private models from Huggingface. +If this parameter is set to `True`, then the token generated when running +`transformers-cli login` (stored in ~/.huggingface) will be used. +Additional information can be found here +https://huggingface.co/transformers/main_classes/model.html#transformers.PreTrainedModel.from_pretrained - `scale_score`: Whether to scale the similarity score to the unit interval (range of [0,1]). If true (default) similarity scores (e.g. cosine or dot_product) which naturally have a different value range will be scaled to a range of [0,1], where 1 means extremely relevant. Otherwise raw similarity scores (e.g. cosine or dot_product) will be used. @@ -937,9 +939,11 @@ These strings will be converted into pytorch devices, so use the string notation https://pytorch.org/docs/stable/tensor_attributes.html?highlight=torch%20device#torch.torch.device (e.g. ["cuda:0"]). Note: as multi-GPU training is currently not implemented for TableTextRetriever, training will only use the first device provided in this list. -- `use_auth_token`: API token used to download private models from Huggingface. If this parameter is set to `True`, -the local token will be used, which must be previously created via `transformer-cli login`. -Additional information can be found here https://huggingface.co/transformers/main_classes/model.html#transformers.PreTrainedModel.from_pretrained +- `use_auth_token`: The API token used to download private models from Huggingface. +If this parameter is set to `True`, then the token generated when running +`transformers-cli login` (stored in ~/.huggingface) will be used. +Additional information can be found here +https://huggingface.co/transformers/main_classes/model.html#transformers.PreTrainedModel.from_pretrained - `scale_score`: Whether to scale the similarity score to the unit interval (range of [0,1]). If true (default) similarity scores (e.g. cosine or dot_product) which naturally have a different value range will be scaled to a range of [0,1], where 1 means extremely relevant. Otherwise raw similarity scores (e.g. cosine or dot_product) will be used. @@ -1213,9 +1217,11 @@ These strings will be converted into pytorch devices, so use the string notation https://pytorch.org/docs/stable/tensor_attributes.html?highlight=torch%20device#torch.torch.device (e.g. ["cuda:0"]). Note: As multi-GPU training is currently not implemented for EmbeddingRetriever, training will only use the first device provided in this list. -- `use_auth_token`: API token used to download private models from Huggingface. If this parameter is set to `True`, -the local token will be used, which must be previously created via `transformer-cli login`. -Additional information can be found here https://huggingface.co/transformers/main_classes/model.html#transformers.PreTrainedModel.from_pretrained +- `use_auth_token`: The API token used to download private models from Huggingface. +If this parameter is set to `True`, then the token generated when running +`transformers-cli login` (stored in ~/.huggingface) will be used. +Additional information can be found here +https://huggingface.co/transformers/main_classes/model.html#transformers.PreTrainedModel.from_pretrained - `scale_score`: Whether to scale the similarity score to the unit interval (range of [0,1]). If true (default) similarity scores (e.g. cosine or dot_product) which naturally have a different value range will be scaled to a range of [0,1], where 1 means extremely relevant. Otherwise raw similarity scores (e.g. cosine or dot_product) will be used. @@ -1534,9 +1540,11 @@ These strings will be converted into pytorch devices, so use the string notation https://pytorch.org/docs/stable/tensor_attributes.html?highlight=torch%20device#torch.torch.device (e.g. ["cuda:0"]). Note: As multi-GPU training is currently not implemented for EmbeddingRetriever, training will only use the first device provided in this list. -- `use_auth_token`: API token used to download private models from Huggingface. If this parameter is set to `True`, -the local token will be used, which must be previously created via `transformer-cli login`. -Additional information can be found here https://huggingface.co/transformers/main_classes/model.html#transformers.PreTrainedModel.from_pretrained +- `use_auth_token`: The API token used to download private models from Huggingface. +If this parameter is set to `True`, then the token generated when running +`transformers-cli login` (stored in ~/.huggingface) will be used. +Additional information can be found here +https://huggingface.co/transformers/main_classes/model.html#transformers.PreTrainedModel.from_pretrained - `scale_score`: Whether to scale the similarity score to the unit interval (range of [0,1]). If true (default) similarity scores (e.g. cosine or dot_product) which naturally have a different value range will be scaled to a range of [0,1], where 1 means extremely relevant. Otherwise raw similarity scores (e.g. cosine or dot_product) will be used. @@ -1747,7 +1755,7 @@ The generated SPARQL query is executed on a knowledge graph. #### Text2SparqlRetriever.\_\_init\_\_ ```python -def __init__(knowledge_graph, model_name_or_path, top_k: int = 1) +def __init__(knowledge_graph, model_name_or_path, top_k: int = 1, use_auth_token: Optional[Union[str, bool]] = None) ``` Init the Retriever by providing a knowledge graph and a pre-trained BART model @@ -1757,6 +1765,11 @@ Init the Retriever by providing a knowledge graph and a pre-trained BART model - `knowledge_graph`: An instance of BaseKnowledgeGraph on which to execute SPARQL queries. - `model_name_or_path`: Name of or path to a pre-trained BartForConditionalGeneration model. - `top_k`: How many SPARQL queries to generate per text query. +- `use_auth_token`: The API token used to download private models from Huggingface. +If this parameter is set to `True`, then the token generated when running +`transformers-cli login` (stored in ~/.huggingface) will be used. +Additional information can be found here +https://huggingface.co/transformers/main_classes/model.html#transformers.PreTrainedModel.from_pretrained diff --git a/docs/_src/api/api/summarizer.md b/docs/_src/api/api/summarizer.md index 134f6dce2..f5a5a23c5 100644 --- a/docs/_src/api/api/summarizer.md +++ b/docs/_src/api/api/summarizer.md @@ -87,7 +87,7 @@ See the up-to-date list of available models on #### TransformersSummarizer.\_\_init\_\_ ```python -def __init__(model_name_or_path: str = "google/pegasus-xsum", model_version: Optional[str] = None, tokenizer: Optional[str] = None, max_length: int = 200, min_length: int = 5, use_gpu: bool = True, clean_up_tokenization_spaces: bool = True, separator_for_single_summary: str = " ", generate_single_summary: bool = False, batch_size: int = 16, progress_bar: bool = True) +def __init__(model_name_or_path: str = "google/pegasus-xsum", model_version: Optional[str] = None, tokenizer: Optional[str] = None, max_length: int = 200, min_length: int = 5, use_gpu: bool = True, clean_up_tokenization_spaces: bool = True, separator_for_single_summary: str = " ", generate_single_summary: bool = False, batch_size: int = 16, progress_bar: bool = True, use_auth_token: Optional[Union[str, bool]] = None) ``` Load a Summarization model from Transformers. @@ -114,6 +114,11 @@ be summarized. Important: The summary will depend on the order of the supplied documents! - `batch_size`: Number of documents to process at a time. - `progress_bar`: Whether to show a progress bar. +- `use_auth_token`: The API token used to download private models from Huggingface. +If this parameter is set to `True`, then the token generated when running +`transformers-cli login` (stored in ~/.huggingface) will be used. +Additional information can be found here +https://huggingface.co/transformers/main_classes/model.html#transformers.PreTrainedModel.from_pretrained diff --git a/docs/_src/api/api/translator.md b/docs/_src/api/api/translator.md index f996b3c5a..f93d961e2 100644 --- a/docs/_src/api/api/translator.md +++ b/docs/_src/api/api/translator.md @@ -68,7 +68,7 @@ We currently recommend using OPUS models (see __init__() for details) #### TransformersTranslator.\_\_init\_\_ ```python -def __init__(model_name_or_path: str, tokenizer_name: Optional[str] = None, max_seq_len: Optional[int] = None, clean_up_tokenization_spaces: Optional[bool] = True, use_gpu: bool = True, progress_bar: bool = True) +def __init__(model_name_or_path: str, tokenizer_name: Optional[str] = None, max_seq_len: Optional[int] = None, clean_up_tokenization_spaces: Optional[bool] = True, use_gpu: bool = True, progress_bar: bool = True, use_auth_token: Optional[Union[str, bool]] = None) ``` Initialize the translator with a model that fits your targeted languages. While we support all seq2seq @@ -94,6 +94,11 @@ tokenizer. - `clean_up_tokenization_spaces`: Whether or not to clean up the tokenization spaces. (default True) - `use_gpu`: Whether to use GPU or the CPU. Falls back on CPU if no GPU is available. - `progress_bar`: Whether to show a progress bar. +- `use_auth_token`: The API token used to download private models from Huggingface. +If this parameter is set to `True`, then the token generated when running +`transformers-cli login` (stored in ~/.huggingface) will be used. +Additional information can be found here +https://huggingface.co/transformers/main_classes/model.html#transformers.PreTrainedModel.from_pretrained diff --git a/haystack/json-schemas/haystack-pipeline-main.schema.json b/haystack/json-schemas/haystack-pipeline-main.schema.json index 5298ecfd6..dd2e76d6b 100644 --- a/haystack/json-schemas/haystack-pipeline-main.schema.json +++ b/haystack/json-schemas/haystack-pipeline-main.schema.json @@ -2986,6 +2986,20 @@ "title": "Progress Bar", "default": true, "type": "boolean" + }, + "use_auth_token": { + "title": "Use Auth Token", + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "string" + }, + { + "type": "null" + } + ] } }, "additionalProperties": false, @@ -4312,6 +4326,20 @@ "title": "Progress Bar", "default": true, "type": "boolean" + }, + "use_auth_token": { + "title": "Use Auth Token", + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "string" + }, + { + "type": "null" + } + ] } }, "required": [ @@ -4407,6 +4435,20 @@ "title": "Progress Bar", "default": true, "type": "boolean" + }, + "use_auth_token": { + "title": "Use Auth Token", + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "string" + }, + { + "type": "null" + } + ] } }, "additionalProperties": false, @@ -4515,6 +4557,20 @@ "title": "Progress Bar", "default": true, "type": "boolean" + }, + "use_auth_token": { + "title": "Use Auth Token", + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "string" + }, + { + "type": "null" + } + ] } }, "additionalProperties": false, @@ -4613,6 +4669,20 @@ "title": "Max Seq Len", "default": 256, "type": "integer" + }, + "use_auth_token": { + "title": "Use Auth Token", + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "string" + }, + { + "type": "null" + } + ] } }, "additionalProperties": false, @@ -4759,6 +4829,20 @@ "title": "Progress Bar", "default": true, "type": "boolean" + }, + "use_auth_token": { + "title": "Use Auth Token", + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "string" + }, + { + "type": "null" + } + ] } }, "required": [ @@ -4837,6 +4921,20 @@ "title": "Progress Bar", "default": true, "type": "boolean" + }, + "use_auth_token": { + "title": "Use Auth Token", + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "string" + }, + { + "type": "null" + } + ] } }, "required": [ @@ -4986,6 +5084,20 @@ "title": "Max Seq Len", "default": 256, "type": "integer" + }, + "use_auth_token": { + "title": "Use Auth Token", + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "string" + }, + { + "type": "null" + } + ] } }, "additionalProperties": false, @@ -5219,6 +5331,20 @@ "title": "Top K", "default": 1, "type": "integer" + }, + "use_auth_token": { + "title": "Use Auth Token", + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "string" + }, + { + "type": "null" + } + ] } }, "required": [ @@ -5501,6 +5627,20 @@ "title": "Progress Bar", "default": true, "type": "boolean" + }, + "use_auth_token": { + "title": "Use Auth Token", + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "string" + }, + { + "type": "null" + } + ] } }, "additionalProperties": false, @@ -5596,6 +5736,20 @@ "title": "Progress Bar", "default": true, "type": "boolean" + }, + "use_auth_token": { + "title": "Use Auth Token", + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "string" + }, + { + "type": "null" + } + ] } }, "additionalProperties": false, @@ -5692,6 +5846,20 @@ "title": "Batch Size", "default": 16, "type": "integer" + }, + "use_auth_token": { + "title": "Use Auth Token", + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "string" + }, + { + "type": "null" + } + ] } }, "additionalProperties": false, @@ -5788,6 +5956,20 @@ "title": "Progress Bar", "default": true, "type": "boolean" + }, + "use_auth_token": { + "title": "Use Auth Token", + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "string" + }, + { + "type": "null" + } + ] } }, "additionalProperties": false, @@ -5865,6 +6047,20 @@ "title": "Progress Bar", "default": true, "type": "boolean" + }, + "use_auth_token": { + "title": "Use Auth Token", + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "string" + }, + { + "type": "null" + } + ] } }, "required": [ diff --git a/haystack/modeling/infer.py b/haystack/modeling/infer.py index adfddf1d5..44c766081 100644 --- a/haystack/modeling/infer.py +++ b/haystack/modeling/infer.py @@ -126,7 +126,7 @@ class Inferencer: tokenizer_args: Dict = None, multithreading_rust: bool = True, devices: Optional[List[torch.device]] = None, - use_auth_token: Union[bool, str] = None, + use_auth_token: Optional[Union[bool, str]] = None, **kwargs, ): """ @@ -167,6 +167,11 @@ class Inferencer: Note: Enabling multithreading in Rust AND multiprocessing in python might cause deadlocks. :param devices: List of devices to perform inference on. (Currently, only the first device in the list is used.) + :param use_auth_token: The API token used to download private models from Huggingface. + If this parameter is set to `True`, then the token generated when running + `transformers-cli login` (stored in ~/.huggingface) will be used. + Additional information can be found here + https://huggingface.co/transformers/main_classes/model.html#transformers.PreTrainedModel.from_pretrained :return: An instance of the Inferencer. """ if tokenizer_args is None: diff --git a/haystack/modeling/model/adaptive_model.py b/haystack/modeling/model/adaptive_model.py index 1d01dc467..479bbd33b 100644 --- a/haystack/modeling/model/adaptive_model.py +++ b/haystack/modeling/model/adaptive_model.py @@ -305,8 +305,8 @@ class AdaptiveModel(nn.Module, BaseAdaptiveModel): device: Union[str, torch.device], revision: str = None, task_type: str = "question_answering", - processor: Processor = None, - use_auth_token: Union[bool, str] = None, + processor: Optional[Processor] = None, + use_auth_token: Optional[Union[bool, str]] = None, **kwargs, ) -> "AdaptiveModel": """ @@ -325,6 +325,11 @@ class AdaptiveModel(nn.Module, BaseAdaptiveModel): :param revision: The version of model to use from the HuggingFace model hub. Can be tag name, branch name, or commit hash. Right now accepts only 'question_answering'. :param processor: populates prediction head with information coming from tasks. + :param use_auth_token: The API token used to download private models from Huggingface. + If this parameter is set to `True`, then the token generated when running + `transformers-cli login` (stored in ~/.huggingface) will be used. + Additional information can be found here + https://huggingface.co/transformers/main_classes/model.html#transformers.PreTrainedModel.from_pretrained :return: AdaptiveModel """ @@ -343,7 +348,9 @@ class AdaptiveModel(nn.Module, BaseAdaptiveModel): ) if task_type == "question_answering": - ph = QuestionAnsweringHead.load(model_name_or_path, revision=revision, **kwargs) + ph = QuestionAnsweringHead.load( + model_name_or_path, revision=revision, use_auth_token=use_auth_token, **kwargs + ) adaptive_model = cls( language_model=lm, prediction_heads=[ph], @@ -599,6 +606,7 @@ class AdaptiveModel(nn.Module, BaseAdaptiveModel): convert_to_float16: bool = False, quantize: bool = False, opset_version: int = 11, + use_auth_token: Optional[Union[str, bool]] = None, ): """ Convert a PyTorch model from transformers hub to an ONNX Model. @@ -611,6 +619,11 @@ class AdaptiveModel(nn.Module, BaseAdaptiveModel): might be more performant. :param quantize: Convert floating point number to integers :param opset_version: ONNX opset version. + :param use_auth_token: The API token used to download private models from Huggingface. + If this parameter is set to `True`, then the token generated when running + `transformers-cli login` (stored in ~/.huggingface) will be used. + Additional information can be found here + https://huggingface.co/transformers/main_classes/model.html#transformers.PreTrainedModel.from_pretrained :return: None. """ language_model_class = LanguageModel.get_language_model_class(model_name) @@ -626,14 +639,22 @@ class AdaptiveModel(nn.Module, BaseAdaptiveModel): output=output_path / "model.onnx", opset=opset_version, use_external_format=True if language_model_class == "XLMRoberta" else False, + use_auth_token=use_auth_token, ) # save processor & model config files that are needed when loading the model with the Haystack.basics Inferencer processor = Processor.convert_from_transformers( - tokenizer_name_or_path=model_name, task_type=task_type, max_seq_len=256, doc_stride=128, use_fast=True + tokenizer_name_or_path=model_name, + task_type=task_type, + max_seq_len=256, + doc_stride=128, + use_fast=True, + use_auth_token=use_auth_token, ) processor.save(output_path) - model = AdaptiveModel.convert_from_transformers(model_name, device=torch.device("cpu"), task_type=task_type) + model = AdaptiveModel.convert_from_transformers( + model_name, device=torch.device("cpu"), task_type=task_type, use_auth_token=use_auth_token + ) model.save(output_path) os.remove(output_path / "language_model.bin") # remove the actual PyTorch model(only configs are required) @@ -649,7 +670,7 @@ class AdaptiveModel(nn.Module, BaseAdaptiveModel): if convert_to_float16: from onnxruntime_tools import optimizer - config = AutoConfig.from_pretrained(model_name) + config = AutoConfig.from_pretrained(model_name, use_auth_token=use_auth_token) optimized_model = optimizer.optimize_model( input=str(output_path / "model.onnx"), model_type="bert", @@ -685,7 +706,7 @@ class ONNXAdaptiveModel(BaseAdaptiveModel): """ :param onnx_session: ? # TODO :param language_model_class: Class of LanguageModel - :param langauge: Language the model is trained for. + :param language: Language the model is trained for. :param prediction_heads: A list of models that take embeddings and return logits for a given task. :param device: The device on which this model will operate. Either torch.device("cpu") or torch.device("cuda"). """ diff --git a/haystack/modeling/model/biadaptive_model.py b/haystack/modeling/model/biadaptive_model.py index d80f00957..9bca50b85 100644 --- a/haystack/modeling/model/biadaptive_model.py +++ b/haystack/modeling/model/biadaptive_model.py @@ -474,6 +474,7 @@ class BiAdaptiveModel(nn.Module): task_type: str = "text_similarity", processor: Optional[Processor] = None, similarity_function: str = "dot_product", + use_auth_token: Optional[Union[str, bool]] = None, ): """ Load a (downstream) model from huggingface's transformers format. Use cases: @@ -493,10 +494,15 @@ class BiAdaptiveModel(nn.Module): :param task_type: 'text_similarity' More tasks coming soon ... :param processor: populates prediction head with information coming from tasks :type processor: Processor + :param use_auth_token: The API token used to download private models from Huggingface. + If this parameter is set to `True`, then the token generated when running + `transformers-cli login` (stored in ~/.huggingface) will be used. + Additional information can be found here + https://huggingface.co/transformers/main_classes/model.html#transformers.PreTrainedModel.from_pretrained :return: AdaptiveModel """ - lm1 = get_language_model(pretrained_model_name_or_path=model_name_or_path1) - lm2 = get_language_model(pretrained_model_name_or_path=model_name_or_path2) + lm1 = get_language_model(pretrained_model_name_or_path=model_name_or_path1, use_auth_token=use_auth_token) + lm2 = get_language_model(pretrained_model_name_or_path=model_name_or_path2, use_auth_token=use_auth_token) prediction_head = TextSimilarityHead(similarity_function=similarity_function) # TODO Infer type of head automatically from config if task_type == "text_similarity": diff --git a/haystack/modeling/model/language_model.py b/haystack/modeling/model/language_model.py index 34a456576..0d46f9c55 100644 --- a/haystack/modeling/model/language_model.py +++ b/haystack/modeling/model/language_model.py @@ -284,7 +284,11 @@ class HFLanguageModel(LanguageModel): :param pretrained_model_name_or_path: The path of the saved pretrained model or the name of the model. :param model_type: the HuggingFace class name prefix (for example 'Bert', 'Roberta', etc...) :param language: the model's language ('multilingual' is also accepted) - :param use_auth_token: the HF token or False + :param use_auth_token: The API token used to download private models from Huggingface. + If this parameter is set to `True`, then the token generated when running + `transformers-cli login` (stored in ~/.huggingface) will be used. + Additional information can be found here + https://huggingface.co/transformers/main_classes/model.html#transformers.PreTrainedModel.from_pretrained """ super().__init__(model_type=model_type) @@ -295,7 +299,7 @@ class HFLanguageModel(LanguageModel): if os.path.exists(haystack_lm_config): # Haystack style haystack_lm_model = Path(pretrained_model_name_or_path) / "language_model.bin" - model_config = config_class.from_pretrained(haystack_lm_config) + model_config = config_class.from_pretrained(haystack_lm_config, use_auth_token=use_auth_token) self.model = model_class.from_pretrained( haystack_lm_model, config=model_config, use_auth_token=use_auth_token, **(model_kwargs or {}) ) @@ -391,6 +395,13 @@ class HFLanguageModelWithPooler(HFLanguageModel): * A local path of a model trained using Haystack (for example, "some_dir/haystack_model") :param pretrained_model_name_or_path: The path of the saved pretrained model or its name. + :param model_type: the HuggingFace class name prefix (for example 'DebertaV2', 'Electra', etc...) + :param language: the model's language ('multilingual' is also accepted) + :param use_auth_token: The API token used to download private models from Huggingface. + If this parameter is set to `True`, then the token generated when running + `transformers-cli login` (stored in ~/.huggingface) will be used. + Additional information can be found here + https://huggingface.co/transformers/main_classes/model.html#transformers.PreTrainedModel.from_pretrained """ super().__init__( pretrained_model_name_or_path=pretrained_model_name_or_path, @@ -512,10 +523,13 @@ class DPREncoder(LanguageModel): :param pretrained_model_name_or_path: The path of the base pretrained language model whose weights are used to initialize DPRQuestionEncoder. :param model_type: the type of model (see `HUGGINGFACE_TO_HAYSTACK`) - :param model_kwargs: any kwarg to pass to the model at init :param language: the model's language. If not given, it will be inferred. Defaults to english. :param n_added_tokens: unused for `DPREncoder` - :param use_auth_token: useful if the model is from the HF Hub and private + :param use_auth_token: The API token used to download private models from Huggingface. + If this parameter is set to `True`, then the token generated when running + `transformers-cli login` (stored in ~/.huggingface) will be used. + Additional information can be found here + https://huggingface.co/transformers/main_classes/model.html#transformers.PreTrainedModel.from_pretrained :param model_kwargs: any kwarg to pass to the model at init """ super().__init__(model_type=model_type) @@ -561,15 +575,21 @@ class DPREncoder(LanguageModel): :param model_name_or_path: name or path of the model to load :param model_class: The HuggingFace model class name :param model_kwargs: any kwarg to pass to the model at init - :param use_auth_token: useful if the model is from the HF Hub and private + :param use_auth_token: The API token used to download private models from Huggingface. + If this parameter is set to `True`, then the token generated when running + `transformers-cli login` (stored in ~/.huggingface) will be used. + Additional information can be found here + https://huggingface.co/transformers/main_classes/model.html#transformers.PreTrainedModel.from_pretrained """ original_model_config = AutoConfig.from_pretrained(haystack_lm_config) haystack_lm_model = Path(model_name_or_path) / "language_model.bin" original_model_type = original_model_config.model_type if original_model_type and "dpr" in original_model_type.lower(): - dpr_config = transformers.DPRConfig.from_pretrained(haystack_lm_config) - self.model = model_class.from_pretrained(haystack_lm_model, config=dpr_config, **model_kwargs) + dpr_config = transformers.DPRConfig.from_pretrained(haystack_lm_config, use_auth_token=use_auth_token) + self.model = model_class.from_pretrained( + haystack_lm_model, config=dpr_config, use_auth_token=use_auth_token, **model_kwargs + ) else: self.model = self._init_model_through_config( @@ -607,7 +627,11 @@ class DPREncoder(LanguageModel): :param model_name_or_path: name or path of the model to load :param model_class: The HuggingFace model class name :param model_kwargs: any kwarg to pass to the model at init - :param use_auth_token: useful if the model is from the HF Hub and private + :param use_auth_token: The API token used to download private models from Huggingface. + If this parameter is set to `True`, then the token generated when running + `transformers-cli login` (stored in ~/.huggingface) will be used. + Additional information can be found here + https://huggingface.co/transformers/main_classes/model.html#transformers.PreTrainedModel.from_pretrained :param language: the model's language. If not given, it will be inferred. Defaults to english. """ original_model_config = AutoConfig.from_pretrained(model_name_or_path, use_auth_token=use_auth_token) @@ -842,7 +866,11 @@ def get_language_model( :param pretrained_model_name_or_path: The path of the saved pretrained model or its name. :param language: The language of the model (i.e english etc). :param n_added_tokens: The number of added tokens to the model. - :param use_auth_token: Whether to use the huggingface auth token for private repos or not. + :param use_auth_token: The API token used to download private models from Huggingface. + If this parameter is set to `True`, then the token generated when running + `transformers-cli login` (stored in ~/.huggingface) will be used. + Additional information can be found here + https://huggingface.co/transformers/main_classes/model.html#transformers.PreTrainedModel.from_pretrained :param revision: The version of the model to use from the Hugging Face model hub. This can be a tag name, a branch name, or a commit hash. :param autoconfig_kwargs: Additional keyword arguments to pass to the autoconfig function. diff --git a/haystack/modeling/model/prediction_head.py b/haystack/modeling/model/prediction_head.py index 4cb411e5e..63fd86d4b 100644 --- a/haystack/modeling/model/prediction_head.py +++ b/haystack/modeling/model/prediction_head.py @@ -286,7 +286,13 @@ class QuestionAnsweringHead(PredictionHead): self.use_no_answer_legacy_confidence = use_no_answer_legacy_confidence @classmethod - def load(cls, pretrained_model_name_or_path: Union[str, Path], revision: Optional[str] = None, **kwargs): # type: ignore + def load( # type: ignore + cls, + pretrained_model_name_or_path: Union[str, Path], + revision: Optional[str] = None, + use_auth_token: Optional[Union[str, bool]] = None, + **kwargs, + ): """ Load a prediction head from a saved Haystack or transformers model. `pretrained_model_name_or_path` can be one of the following: @@ -299,9 +305,13 @@ class QuestionAnsweringHead(PredictionHead): Exemplary public names: - distilbert-base-uncased-distilled-squad - bert-large-uncased-whole-word-masking-finetuned-squad - See https://huggingface.co/models for full list :param revision: The version of model to use from the HuggingFace model hub. Can be tag name, branch name, or commit hash. + :param use_auth_token: The API token used to download private models from Huggingface. + If this parameter is set to `True`, then the token generated when running + `transformers-cli login` (stored in ~/.huggingface) will be used. + Additional information can be found here + https://huggingface.co/transformers/main_classes/model.html#transformers.PreTrainedModel.from_pretrained """ if ( os.path.exists(pretrained_model_name_or_path) @@ -314,7 +324,7 @@ class QuestionAnsweringHead(PredictionHead): # b) transformers style # load all weights from model full_qa_model = AutoModelForQuestionAnswering.from_pretrained( - pretrained_model_name_or_path, revision=revision, **kwargs + pretrained_model_name_or_path, revision=revision, use_auth_token=use_auth_token, **kwargs ) # init empty head head = cls(layer_dims=[full_qa_model.config.hidden_size, 2], task_name="question_answering") diff --git a/haystack/modeling/model/tokenization.py b/haystack/modeling/model/tokenization.py index 9467d3813..6c6db86c0 100644 --- a/haystack/modeling/model/tokenization.py +++ b/haystack/modeling/model/tokenization.py @@ -45,7 +45,11 @@ def get_tokenizer( :param pretrained_model_name_or_path: The path of the saved pretrained model or its name (e.g. `bert-base-uncased`) :param revision: The version of model to use from the HuggingFace model hub. Can be tag name, branch name, or commit hash. :param use_fast: Indicate if Haystack should try to load the fast version of the tokenizer (True) or use the Python one (False). Defaults to True. - :param use_auth_token: The auth_token to use in `PretrainedTokenizer.from_pretrained()`, or False + :param use_auth_token: The API token used to download private models from Huggingface. + If this parameter is set to `True`, then the token generated when running + `transformers-cli login` (stored in ~/.huggingface) will be used. + Additional information can be found here + https://huggingface.co/transformers/main_classes/model.html#transformers.PreTrainedModel.from_pretrained :param kwargs: other kwargs to pass on to `PretrainedTokenizer.from_pretrained()` :return: AutoTokenizer instance """ diff --git a/haystack/nodes/answer_generator/openai.py b/haystack/nodes/answer_generator/openai.py index 10f49c95a..bde450c37 100644 --- a/haystack/nodes/answer_generator/openai.py +++ b/haystack/nodes/answer_generator/openai.py @@ -19,7 +19,7 @@ class OpenAIAnswerGenerator(BaseGenerator): The Documents can come from a Retriever or you can supply them manually. To use this Node, you need an API key from an active OpenAI account. You can sign-up for an account - on the [OpenAI API website](https://openai.com/api/)). + on the [OpenAI API website](https://openai.com/api/). """ def __init__( diff --git a/haystack/nodes/answer_generator/transformers.py b/haystack/nodes/answer_generator/transformers.py index 270379252..5387c058b 100644 --- a/haystack/nodes/answer_generator/transformers.py +++ b/haystack/nodes/answer_generator/transformers.py @@ -1,4 +1,4 @@ -from typing import Dict, List, Optional +from typing import Dict, List, Optional, Union import logging from collections.abc import Callable @@ -79,6 +79,7 @@ class RAGenerator(BaseGenerator): prefix: Optional[str] = None, use_gpu: bool = True, progress_bar: bool = True, + use_auth_token: Optional[Union[str, bool]] = None, ): """ Load a RAG model from Transformers along with passage_embedding_model. @@ -97,6 +98,12 @@ class RAGenerator(BaseGenerator): :param embed_title: Embedded the title of passage while generating embedding :param prefix: The prefix used by the generator's tokenizer. :param use_gpu: Whether to use GPU. Falls back on CPU if no GPU is available. + :param progress_bar: Whether to show a tqdm progress bar or not. + :param use_auth_token: The API token used to download private models from Huggingface. + If this parameter is set to `True`, then the token generated when running + `transformers-cli login` (stored in ~/.huggingface) will be used. + Additional information can be found here + https://huggingface.co/transformers/main_classes/model.html#transformers.PreTrainedModel.from_pretrained """ super().__init__(progress_bar=progress_bar) @@ -117,15 +124,17 @@ class RAGenerator(BaseGenerator): self.devices, _ = initialize_device_settings(use_cuda=use_gpu, multi_gpu=False) - self.tokenizer = RagTokenizer.from_pretrained(model_name_or_path) + self.tokenizer = RagTokenizer.from_pretrained(model_name_or_path, use_auth_token=use_auth_token) if self.generator_type == "sequence": raise NotImplementedError("RagSequenceForGeneration is not implemented yet") # TODO: Enable when transformers have it. Refer https://github.com/huggingface/transformers/issues/7905 # Also refer refer https://github.com/huggingface/transformers/issues/7829 - # self.model = RagSequenceForGeneration.from_pretrained(model_name_or_path) + # self.model = RagSequenceForGeneration.from_pretrained(model_name_or_path, use_auth_token=use_auth_token) - self.model = RagTokenForGeneration.from_pretrained(model_name_or_path, revision=model_version) + self.model = RagTokenForGeneration.from_pretrained( + model_name_or_path, revision=model_version, use_auth_token=use_auth_token + ) self.model.to(str(self.devices[0])) # Copied cat_input_and_doc method from transformers.RagRetriever @@ -328,6 +337,7 @@ class Seq2SeqGenerator(BaseGenerator): num_beams: int = 8, use_gpu: bool = True, progress_bar: bool = True, + use_auth_token: Optional[Union[str, bool]] = None, ): """ :param model_name_or_path: a HF model name for auto-regressive language model like GPT2, XLNet, XLM, Bart, T5 etc @@ -341,6 +351,12 @@ class Seq2SeqGenerator(BaseGenerator): :param min_length: Minimum length of generated text :param num_beams: Number of beams for beam search. 1 means no beam search. :param use_gpu: Whether to use GPU or the CPU. Falls back on CPU if no GPU is available. + :param progress_bar: Whether to show a tqdm progress bar or not. + :param use_auth_token: The API token used to download private models from Huggingface. + If this parameter is set to `True`, then the token generated when running + `transformers-cli login` (stored in ~/.huggingface) will be used. + Additional information can be found here + https://huggingface.co/transformers/main_classes/model.html#transformers.PreTrainedModel.from_pretrained """ super().__init__(progress_bar=progress_bar) self.model_name_or_path = model_name_or_path @@ -358,8 +374,8 @@ class Seq2SeqGenerator(BaseGenerator): Seq2SeqGenerator._register_converters(model_name_or_path, input_converter) - self.tokenizer = AutoTokenizer.from_pretrained(model_name_or_path) - self.model = AutoModelForSeq2SeqLM.from_pretrained(model_name_or_path) + self.tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_auth_token=use_auth_token) + self.model = AutoModelForSeq2SeqLM.from_pretrained(model_name_or_path, use_auth_token=use_auth_token) self.model.to(str(self.devices[0])) self.model.eval() diff --git a/haystack/nodes/document_classifier/transformers.py b/haystack/nodes/document_classifier/transformers.py index 8e59f6d70..c10bfc49a 100644 --- a/haystack/nodes/document_classifier/transformers.py +++ b/haystack/nodes/document_classifier/transformers.py @@ -75,6 +75,7 @@ class TransformersDocumentClassifier(BaseDocumentClassifier): batch_size: int = 16, classification_field: str = None, progress_bar: bool = True, + use_auth_token: Optional[Union[str, bool]] = None, ): """ Load a text classification model from Transformers. @@ -104,6 +105,11 @@ class TransformersDocumentClassifier(BaseDocumentClassifier): :param batch_size: Number of Documents to be processed at a time. :param classification_field: Name of Document's meta field to be used for classification. If left unset, Document.content is used by default. :param progress_bar: Whether to show a progress bar while processing. + :param use_auth_token: The API token used to download private models from Huggingface. + If this parameter is set to `True`, then the token generated when running + `transformers-cli login` (stored in ~/.huggingface) will be used. + Additional information can be found here + https://huggingface.co/transformers/main_classes/model.html#transformers.PreTrainedModel.from_pretrained """ super().__init__() @@ -120,7 +126,12 @@ class TransformersDocumentClassifier(BaseDocumentClassifier): tokenizer = model_name_or_path if task == "zero-shot-classification": self.model = pipeline( - task=task, model=model_name_or_path, tokenizer=tokenizer, device=device, revision=model_version + task=task, + model=model_name_or_path, + tokenizer=tokenizer, + device=device, + revision=model_version, + use_auth_token=use_auth_token, ) elif task == "text-classification": self.model = pipeline( @@ -130,6 +141,7 @@ class TransformersDocumentClassifier(BaseDocumentClassifier): device=device, revision=model_version, return_all_scores=return_all_scores, + use_auth_token=use_auth_token, ) self.return_all_scores = return_all_scores self.labels = labels diff --git a/haystack/nodes/evaluator/evaluator.py b/haystack/nodes/evaluator/evaluator.py index 76280f38c..d9e11791f 100644 --- a/haystack/nodes/evaluator/evaluator.py +++ b/haystack/nodes/evaluator/evaluator.py @@ -1,4 +1,4 @@ -from typing import List, Tuple, Dict, Any, Optional +from typing import List, Tuple, Dict, Any, Optional, Union import logging from transformers import AutoConfig from sentence_transformers import SentenceTransformer, CrossEncoder @@ -401,6 +401,7 @@ def semantic_answer_similarity( sas_model_name_or_path: str = "sentence-transformers/paraphrase-multilingual-mpnet-base-v2", batch_size: int = 32, use_gpu: bool = True, + use_auth_token: Optional[Union[str, bool]] = None, ) -> Tuple[List[float], List[float], List[List[float]]]: """ Computes Transformer-based similarity of predicted answer to gold labels to derive a more meaningful metric than EM or F1. @@ -415,11 +416,16 @@ def semantic_answer_similarity( :param batch_size: Number of prediction label pairs to encode at once. :param use_gpu: Whether to use a GPU or the CPU for calculating semantic answer similarity. Falls back to CPU if no GPU is available. + :param use_auth_token: The API token used to download private models from Huggingface. + If this parameter is set to `True`, then the token generated when running + `transformers-cli login` (stored in ~/.huggingface) will be used. + Additional information can be found here + https://huggingface.co/transformers/main_classes/model.html#transformers.PreTrainedModel.from_pretrained :return: top_1_sas, top_k_sas, pred_label_matrix """ assert len(predictions) == len(gold_labels) - config = AutoConfig.from_pretrained(sas_model_name_or_path) + config = AutoConfig.from_pretrained(sas_model_name_or_path, use_auth_token=use_auth_token) cross_encoder_used = False if config.architectures is not None: cross_encoder_used = any(arch.endswith("ForSequenceClassification") for arch in config.architectures) @@ -435,7 +441,12 @@ def semantic_answer_similarity( # Based on Modelstring we can load either Bi-Encoders or Cross Encoders. # Similarity computation changes for both approaches if cross_encoder_used: - model = CrossEncoder(sas_model_name_or_path, device=device) + model = CrossEncoder( + sas_model_name_or_path, + device=device, + tokenizer_args={"use_auth_token": use_auth_token}, + automodel_args={"use_auth_token": use_auth_token}, + ) grid = [] for preds, labels in zip(predictions, gold_labels): for p in preds: @@ -455,7 +466,7 @@ def semantic_answer_similarity( current_position += len_p * len_l else: # For Bi-encoders we can flatten predictions and labels into one list - model = SentenceTransformer(sas_model_name_or_path, device=device) + model = SentenceTransformer(sas_model_name_or_path, device=device, use_auth_token=use_auth_token) all_texts: List[str] = [] for p, l in zip(predictions, gold_labels): # type: ignore # TODO potentially exclude (near) exact matches from computations diff --git a/haystack/nodes/extractor/entity.py b/haystack/nodes/extractor/entity.py index eb0cc2b51..81bcc9f21 100644 --- a/haystack/nodes/extractor/entity.py +++ b/haystack/nodes/extractor/entity.py @@ -24,6 +24,11 @@ class EntityExtractor(BaseComponent): :param use_gpu: Whether to use the GPU or not. :param batch_size: The batch size to use for entity extraction. :param progress_bar: Whether to show a progress bar or not. + :param use_auth_token: The API token used to download private models from Huggingface. + If this parameter is set to `True`, then the token generated when running + `transformers-cli login` (stored in ~/.huggingface) will be used. + Additional information can be found here + https://huggingface.co/transformers/main_classes/model.html#transformers.PreTrainedModel.from_pretrained """ outgoing_edges = 1 @@ -34,6 +39,7 @@ class EntityExtractor(BaseComponent): use_gpu: bool = True, batch_size: int = 16, progress_bar: bool = True, + use_auth_token: Optional[Union[str, bool]] = None, ): super().__init__() @@ -41,8 +47,10 @@ class EntityExtractor(BaseComponent): self.batch_size = batch_size self.progress_bar = progress_bar - tokenizer = AutoTokenizer.from_pretrained(model_name_or_path) - token_classifier = AutoModelForTokenClassification.from_pretrained(model_name_or_path) + tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_auth_token=use_auth_token) + token_classifier = AutoModelForTokenClassification.from_pretrained( + model_name_or_path, use_auth_token=use_auth_token + ) token_classifier.to(str(self.devices[0])) self.model = pipeline( "ner", @@ -50,6 +58,7 @@ class EntityExtractor(BaseComponent): tokenizer=tokenizer, aggregation_strategy="simple", device=0 if self.devices[0].type == "cuda" else -1, + use_auth_token=use_auth_token, ) def run(self, documents: Optional[Union[List[Document], List[dict]]] = None) -> Tuple[Dict, str]: # type: ignore diff --git a/haystack/nodes/label_generator/pseudo_label_generator.py b/haystack/nodes/label_generator/pseudo_label_generator.py index 8e922f658..8fa8f4d96 100644 --- a/haystack/nodes/label_generator/pseudo_label_generator.py +++ b/haystack/nodes/label_generator/pseudo_label_generator.py @@ -61,6 +61,7 @@ class PseudoLabelGenerator(BaseComponent): top_k: int = 50, batch_size: int = 16, progress_bar: bool = True, + use_auth_token: Optional[Union[str, bool]] = None, ): """ Loads the cross-encoder model and prepares PseudoLabelGenerator. @@ -81,6 +82,12 @@ class PseudoLabelGenerator(BaseComponent): :type batch_size: int (optional) :param progress_bar: Whether to show a progress bar, defaults to True. :type progress_bar: bool (optional) + :param use_auth_token: The API token used to download private models from Huggingface. + If this parameter is set to `True`, then the token generated when running + `transformers-cli login` (stored in ~/.huggingface) will be used. + Additional information can be found here + https://huggingface.co/transformers/main_classes/model.html#transformers.PreTrainedModel.from_pretrained + :type use_auth_token: Union[str, bool] (optional) """ super().__init__() @@ -100,7 +107,11 @@ class PseudoLabelGenerator(BaseComponent): raise ValueError("Provide either a QuestionGenerator or a non-empty list of questions/document pairs.") self.retriever = retriever - self.cross_encoder = CrossEncoder(cross_encoder_model_name_or_path) + self.cross_encoder = CrossEncoder( + cross_encoder_model_name_or_path, + tokenizer_args={"use_auth_token": use_auth_token}, + automodel_args={"use_auth_token": use_auth_token}, + ) self.max_questions_per_document = max_questions_per_document self.top_k = top_k self.batch_size = batch_size diff --git a/haystack/nodes/query_classifier/transformers.py b/haystack/nodes/query_classifier/transformers.py index 4b92c840a..b834f4021 100644 --- a/haystack/nodes/query_classifier/transformers.py +++ b/haystack/nodes/query_classifier/transformers.py @@ -70,6 +70,7 @@ class TransformersQueryClassifier(BaseQueryClassifier): labels: List[str] = DEFAULT_LABELS, batch_size: int = 16, progress_bar: bool = True, + use_auth_token: Optional[Union[str, bool]] = None, ): """ :param model_name_or_path: Directory of a saved model or the name of a public model, for example 'shahrukhx01/bert-mini-finetune-question-detection'. @@ -83,13 +84,23 @@ class TransformersQueryClassifier(BaseQueryClassifier): If the task is 'zero-shot-classification', these are the candidate labels. :param batch_size: The number of queries to be processed at a time. :param progress_bar: Whether to show a progress bar. + :param use_auth_token: The API token used to download private models from Huggingface. + If this parameter is set to `True`, then the token generated when running + `transformers-cli login` (stored in ~/.huggingface) will be used. + Additional information can be found here + https://huggingface.co/transformers/main_classes/model.html#transformers.PreTrainedModel.from_pretrained """ super().__init__() devices, _ = initialize_device_settings(use_cuda=use_gpu, multi_gpu=False) device = 0 if devices[0].type == "cuda" else -1 self.model = pipeline( - task=task, model=model_name_or_path, tokenizer=tokenizer, device=device, revision=model_version + task=task, + model=model_name_or_path, + tokenizer=tokenizer, + device=device, + revision=model_version, + use_auth_token=use_auth_token, ) self.labels = labels diff --git a/haystack/nodes/question_generator/question_generator.py b/haystack/nodes/question_generator/question_generator.py index dec5abb86..d77f573dd 100644 --- a/haystack/nodes/question_generator/question_generator.py +++ b/haystack/nodes/question_generator/question_generator.py @@ -42,6 +42,7 @@ class QuestionGenerator(BaseComponent): sep_token: str = "", batch_size: int = 16, progress_bar: bool = True, + use_auth_token: Optional[Union[str, bool]] = None, ): """ Uses the valhalla/t5-base-e2e-qg model by default. This class supports any question generation model that is @@ -54,12 +55,18 @@ class QuestionGenerator(BaseComponent): :param model_version: The version of model to use from the HuggingFace model hub. Can be tag name, branch name, or commit hash. :param use_gpu: Whether to use GPU or the CPU. Falls back on CPU if no GPU is available. :param batch_size: Number of documents to process at a time. + :param progress_bar: Whether to show a tqdm progress bar or not. + :param use_auth_token: The API token used to download private models from Huggingface. + If this parameter is set to `True`, then the token generated when running + `transformers-cli login` (stored in ~/.huggingface) will be used. + Additional information can be found here + https://huggingface.co/transformers/main_classes/model.html#transformers.PreTrainedModel.from_pretrained """ super().__init__() self.devices, _ = initialize_device_settings(use_cuda=use_gpu, multi_gpu=False) - self.model = AutoModelForSeq2SeqLM.from_pretrained(model_name_or_path) + self.model = AutoModelForSeq2SeqLM.from_pretrained(model_name_or_path, use_auth_token=use_auth_token) self.model.to(str(self.devices[0])) - self.tokenizer = AutoTokenizer.from_pretrained(model_name_or_path) + self.tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_auth_token=use_auth_token) self.num_beams = num_beams self.max_length = max_length self.no_repeat_ngram_size = no_repeat_ngram_size diff --git a/haystack/nodes/ranker/sentence_transformers.py b/haystack/nodes/ranker/sentence_transformers.py index e1597aff0..3c1a4ff48 100644 --- a/haystack/nodes/ranker/sentence_transformers.py +++ b/haystack/nodes/ranker/sentence_transformers.py @@ -49,6 +49,7 @@ class SentenceTransformersRanker(BaseRanker): batch_size: int = 16, scale_score: bool = True, progress_bar: bool = True, + use_auth_token: Optional[Union[str, bool]] = None, ): """ :param model_name_or_path: Directory of a saved model or the name of a public model e.g. @@ -66,6 +67,11 @@ class SentenceTransformersRanker(BaseRanker): only predicts a single label. For multi-label predictions, no scaling is applied. Set this to False if you do not want any scaling of the raw predictions. :param progress_bar: Whether to show a progress bar while processing the documents. + :param use_auth_token: The API token used to download private models from Huggingface. + If this parameter is set to `True`, then the token generated when running + `transformers-cli login` (stored in ~/.huggingface) will be used. + Additional information can be found here + https://huggingface.co/transformers/main_classes/model.html#transformers.PreTrainedModel.from_pretrained """ super().__init__() @@ -77,11 +83,11 @@ class SentenceTransformersRanker(BaseRanker): self.devices, _ = initialize_device_settings(use_cuda=use_gpu, multi_gpu=True) self.progress_bar = progress_bar self.transformer_model = AutoModelForSequenceClassification.from_pretrained( - pretrained_model_name_or_path=model_name_or_path, revision=model_version + pretrained_model_name_or_path=model_name_or_path, revision=model_version, use_auth_token=use_auth_token ) self.transformer_model.to(str(self.devices[0])) self.transformer_tokenizer = AutoTokenizer.from_pretrained( - pretrained_model_name_or_path=model_name_or_path, revision=model_version + pretrained_model_name_or_path=model_name_or_path, revision=model_version, use_auth_token=use_auth_token ) self.transformer_model.eval() diff --git a/haystack/nodes/reader/farm.py b/haystack/nodes/reader/farm.py index e1c5c77a2..fbdb7885f 100644 --- a/haystack/nodes/reader/farm.py +++ b/haystack/nodes/reader/farm.py @@ -121,9 +121,11 @@ class FARMReader(BaseReader): :param proxies: Dict of proxy servers to use for downloading external models. Example: {'http': 'some.proxy:1234', 'http://hostname': 'my.proxy:3111'} :param local_files_only: Whether to force checking for local files only (and forbid downloads) :param force_download: Whether fo force a (re-)download even if the model exists locally in the cache. - :param use_auth_token: API token used to download private models from Huggingface. If this parameter is set to `True`, - the local token will be used, which must be previously created via `transformer-cli login`. - Additional information can be found here https://huggingface.co/transformers/main_classes/model.html#transformers.PreTrainedModel.from_pretrained + :param use_auth_token: The API token used to download private models from Huggingface. + If this parameter is set to `True`, then the token generated when running + `transformers-cli login` (stored in ~/.huggingface) will be used. + Additional information can be found here + https://huggingface.co/transformers/main_classes/model.html#transformers.PreTrainedModel.from_pretrained """ super().__init__() diff --git a/haystack/nodes/reader/table.py b/haystack/nodes/reader/table.py index a7bc40221..28c3d52fe 100644 --- a/haystack/nodes/reader/table.py +++ b/haystack/nodes/reader/table.py @@ -44,7 +44,7 @@ class TableReader(BaseReader): Example: ```python from haystack import Document - from haystack.reader import TableReader + from haystack.nodes import TableReader import pandas as pd table_reader = TableReader(model_name_or_path="google/tapas-base-finetuned-wtq") @@ -72,6 +72,7 @@ class TableReader(BaseReader): top_k_per_candidate: int = 3, return_no_answer: bool = False, max_seq_len: int = 256, + use_auth_token: Optional[Union[str, bool]] = None, ): """ Load a TableQA model from Transformers. @@ -104,6 +105,11 @@ class TableReader(BaseReader): :param max_seq_len: Max sequence length of one input table for the model. If the number of tokens of query + table exceed max_seq_len, the table will be truncated by removing rows until the input size fits the model. + :param use_auth_token: The API token used to download private models from Huggingface. + If this parameter is set to `True`, then the token generated when running + `transformers-cli login` (stored in ~/.huggingface) will be used. + Additional information can be found here + https://huggingface.co/transformers/main_classes/model.html#transformers.PreTrainedModel.from_pretrained """ if not torch_scatter_installed: raise ImportError( @@ -117,17 +123,21 @@ class TableReader(BaseReader): super().__init__() self.devices, _ = initialize_device_settings(use_cuda=use_gpu, multi_gpu=False) - config = TapasConfig.from_pretrained(model_name_or_path) + config = TapasConfig.from_pretrained(model_name_or_path, use_auth_token=use_auth_token) if config.architectures[0] == "TapasForScoredQA": - self.model = self.TapasForScoredQA.from_pretrained(model_name_or_path, revision=model_version) + self.model = self.TapasForScoredQA.from_pretrained( + model_name_or_path, revision=model_version, use_auth_token=use_auth_token + ) else: - self.model = TapasForQuestionAnswering.from_pretrained(model_name_or_path, revision=model_version) + self.model = TapasForQuestionAnswering.from_pretrained( + model_name_or_path, revision=model_version, use_auth_token=use_auth_token + ) self.model.to(str(self.devices[0])) if tokenizer is None: - self.tokenizer = TapasTokenizer.from_pretrained(model_name_or_path) + self.tokenizer = TapasTokenizer.from_pretrained(model_name_or_path, use_auth_token=use_auth_token) else: - self.tokenizer = TapasTokenizer.from_pretrained(tokenizer) + self.tokenizer = TapasTokenizer.from_pretrained(tokenizer, use_auth_token=use_auth_token) self.top_k = top_k self.top_k_per_candidate = top_k_per_candidate @@ -540,6 +550,7 @@ class RCIReader(BaseReader): use_gpu: bool = True, top_k: int = 10, max_seq_len: int = 256, + use_auth_token: Optional[Union[str, bool]] = None, ): """ Load an RCI model from Transformers. @@ -563,36 +574,45 @@ class RCIReader(BaseReader): :param max_seq_len: Max sequence length of one input table for the model. If the number of tokens of query + table exceed max_seq_len, the table will be truncated by removing rows until the input size fits the model. + :param use_auth_token: The API token used to download private models from Huggingface. + If this parameter is set to `True`, then the token generated when running + `transformers-cli login` (stored in ~/.huggingface) will be used. + Additional information can be found here + https://huggingface.co/transformers/main_classes/model.html#transformers.PreTrainedModel.from_pretrained """ super().__init__() self.devices, _ = initialize_device_settings(use_cuda=use_gpu, multi_gpu=False) self.row_model = AutoModelForSequenceClassification.from_pretrained( - row_model_name_or_path, revision=row_model_version + row_model_name_or_path, revision=row_model_version, use_auth_token=use_auth_token ) self.column_model = AutoModelForSequenceClassification.from_pretrained( - row_model_name_or_path, revision=column_model_version + row_model_name_or_path, revision=column_model_version, use_auth_token=use_auth_token ) self.row_model.to(str(self.devices[0])) self.column_model.to(str(self.devices[0])) if row_tokenizer is None: try: - self.row_tokenizer = AutoTokenizer.from_pretrained(row_model_name_or_path) + self.row_tokenizer = AutoTokenizer.from_pretrained( + row_model_name_or_path, use_auth_token=use_auth_token + ) # The existing RCI models on the model hub don't come with tokenizer vocab files. except TypeError: - self.row_tokenizer = AutoTokenizer.from_pretrained("albert-base-v2") + self.row_tokenizer = AutoTokenizer.from_pretrained("albert-base-v2", use_auth_token=use_auth_token) else: - self.row_tokenizer = AutoTokenizer.from_pretrained(row_tokenizer) + self.row_tokenizer = AutoTokenizer.from_pretrained(row_tokenizer, use_auth_token=use_auth_token) if column_tokenizer is None: try: - self.column_tokenizer = AutoTokenizer.from_pretrained(column_model_name_or_path) + self.column_tokenizer = AutoTokenizer.from_pretrained( + column_model_name_or_path, use_auth_token=use_auth_token + ) # The existing RCI models on the model hub don't come with tokenizer vocab files. except TypeError: - self.column_tokenizer = AutoTokenizer.from_pretrained("albert-base-v2") + self.column_tokenizer = AutoTokenizer.from_pretrained("albert-base-v2", use_auth_token=use_auth_token) else: - self.column_tokenizer = AutoTokenizer.from_pretrained(column_tokenizer) + self.column_tokenizer = AutoTokenizer.from_pretrained(column_tokenizer, use_auth_token=use_auth_token) self.top_k = top_k self.max_seq_len = max_seq_len diff --git a/haystack/nodes/reader/transformers.py b/haystack/nodes/reader/transformers.py index d8f6fe845..6d78a16b5 100644 --- a/haystack/nodes/reader/transformers.py +++ b/haystack/nodes/reader/transformers.py @@ -36,6 +36,7 @@ class TransformersReader(BaseReader): max_seq_len: int = 256, doc_stride: int = 128, batch_size: int = 16, + use_auth_token: Optional[Union[str, bool]] = None, ): """ Load a QA model from Transformers. @@ -66,13 +67,23 @@ class TransformersReader(BaseReader): :param max_seq_len: max sequence length of one input text for the model :param doc_stride: length of striding window for splitting long texts (used if len(text) > max_seq_len) :param batch_size: Number of documents to process at a time. + :param use_auth_token: The API token used to download private models from Huggingface. + If this parameter is set to `True`, then the token generated when running + `transformers-cli login` (stored in ~/.huggingface) will be used. + Additional information can be found here + https://huggingface.co/transformers/main_classes/model.html#transformers.PreTrainedModel.from_pretrained """ super().__init__() self.devices, _ = initialize_device_settings(use_cuda=use_gpu, multi_gpu=False) device = 0 if self.devices[0].type == "cuda" else -1 self.model = pipeline( - "question-answering", model=model_name_or_path, tokenizer=tokenizer, device=device, revision=model_version + "question-answering", + model=model_name_or_path, + tokenizer=tokenizer, + device=device, + revision=model_version, + use_auth_token=use_auth_token, ) self.context_window_size = context_window_size self.top_k = top_k diff --git a/haystack/nodes/retriever/_embedding_encoder.py b/haystack/nodes/retriever/_embedding_encoder.py index f0abede69..366358555 100644 --- a/haystack/nodes/retriever/_embedding_encoder.py +++ b/haystack/nodes/retriever/_embedding_encoder.py @@ -222,8 +222,12 @@ class _RetribertEmbeddingEncoder(_BaseEmbeddingEncoder): self.progress_bar = retriever.progress_bar self.batch_size = retriever.batch_size self.max_length = retriever.max_seq_len - self.embedding_tokenizer = AutoTokenizer.from_pretrained(retriever.embedding_model) - self.embedding_model = AutoModel.from_pretrained(retriever.embedding_model).to(str(retriever.devices[0])) + self.embedding_tokenizer = AutoTokenizer.from_pretrained( + retriever.embedding_model, use_auth_token=retriever.use_auth_token + ) + self.embedding_model = AutoModel.from_pretrained( + retriever.embedding_model, use_auth_token=retriever.use_auth_token + ).to(str(retriever.devices[0])) def embed_queries(self, texts: List[str]) -> List[np.ndarray]: diff --git a/haystack/nodes/retriever/dense.py b/haystack/nodes/retriever/dense.py index 3543ebe95..3009a0f92 100644 --- a/haystack/nodes/retriever/dense.py +++ b/haystack/nodes/retriever/dense.py @@ -118,9 +118,11 @@ class DensePassageRetriever(BaseRetriever): https://pytorch.org/docs/stable/tensor_attributes.html?highlight=torch%20device#torch.torch.device (e.g. ["cuda:0"]). Note: as multi-GPU training is currently not implemented for DPR, training will only use the first device provided in this list. - :param use_auth_token: API token used to download private models from Huggingface. If this parameter is set to `True`, - the local token will be used, which must be previously created via `transformer-cli login`. - Additional information can be found here https://huggingface.co/transformers/main_classes/model.html#transformers.PreTrainedModel.from_pretrained + :param use_auth_token: The API token used to download private models from Huggingface. + If this parameter is set to `True`, then the token generated when running + `transformers-cli login` (stored in ~/.huggingface) will be used. + Additional information can be found here + https://huggingface.co/transformers/main_classes/model.html#transformers.PreTrainedModel.from_pretrained :param scale_score: Whether to scale the similarity score to the unit interval (range of [0,1]). If true (default) similarity scores (e.g. cosine or dot_product) which naturally have a different value range will be scaled to a range of [0,1], where 1 means extremely relevant. Otherwise raw similarity scores (e.g. cosine or dot_product) will be used. @@ -140,6 +142,7 @@ class DensePassageRetriever(BaseRetriever): self.progress_bar = progress_bar self.top_k = top_k self.scale_score = scale_score + self.use_auth_token = use_auth_token if document_store is None: logger.warning( @@ -822,9 +825,11 @@ class TableTextRetriever(BaseRetriever): https://pytorch.org/docs/stable/tensor_attributes.html?highlight=torch%20device#torch.torch.device (e.g. ["cuda:0"]). Note: as multi-GPU training is currently not implemented for TableTextRetriever, training will only use the first device provided in this list. - :param use_auth_token: API token used to download private models from Huggingface. If this parameter is set to `True`, - the local token will be used, which must be previously created via `transformer-cli login`. - Additional information can be found here https://huggingface.co/transformers/main_classes/model.html#transformers.PreTrainedModel.from_pretrained + :param use_auth_token: The API token used to download private models from Huggingface. + If this parameter is set to `True`, then the token generated when running + `transformers-cli login` (stored in ~/.huggingface) will be used. + Additional information can be found here + https://huggingface.co/transformers/main_classes/model.html#transformers.PreTrainedModel.from_pretrained :param scale_score: Whether to scale the similarity score to the unit interval (range of [0,1]). If true (default) similarity scores (e.g. cosine or dot_product) which naturally have a different value range will be scaled to a range of [0,1], where 1 means extremely relevant. Otherwise raw similarity scores (e.g. cosine or dot_product) will be used. @@ -1489,9 +1494,11 @@ class EmbeddingRetriever(BaseRetriever): https://pytorch.org/docs/stable/tensor_attributes.html?highlight=torch%20device#torch.torch.device (e.g. ["cuda:0"]). Note: As multi-GPU training is currently not implemented for EmbeddingRetriever, training will only use the first device provided in this list. - :param use_auth_token: API token used to download private models from Huggingface. If this parameter is set to `True`, - the local token will be used, which must be previously created via `transformer-cli login`. - Additional information can be found here https://huggingface.co/transformers/main_classes/model.html#transformers.PreTrainedModel.from_pretrained + :param use_auth_token: The API token used to download private models from Huggingface. + If this parameter is set to `True`, then the token generated when running + `transformers-cli login` (stored in ~/.huggingface) will be used. + Additional information can be found here + https://huggingface.co/transformers/main_classes/model.html#transformers.PreTrainedModel.from_pretrained :param scale_score: Whether to scale the similarity score to the unit interval (range of [0,1]). If true (default) similarity scores (e.g. cosine or dot_product) which naturally have a different value range will be scaled to a range of [0,1], where 1 means extremely relevant. Otherwise raw similarity scores (e.g. cosine or dot_product) will be used. @@ -1523,7 +1530,11 @@ class EmbeddingRetriever(BaseRetriever): self.progress_bar = progress_bar self.use_auth_token = use_auth_token self.scale_score = scale_score - self.model_format = self._infer_model_format(embedding_model) if model_format is None else model_format + self.model_format = ( + self._infer_model_format(model_name_or_path=embedding_model, use_auth_token=use_auth_token) + if model_format is None + else model_format + ) logger.info(f"Init retriever using embeddings of model {embedding_model}") @@ -1826,7 +1837,7 @@ class EmbeddingRetriever(BaseRetriever): return linearized_docs @staticmethod - def _infer_model_format(model_name_or_path: str) -> str: + def _infer_model_format(model_name_or_path: str, use_auth_token: Optional[Union[str, bool]]) -> str: # Check if model name is a local directory with sentence transformers config file in it if Path(model_name_or_path).exists(): if Path(f"{model_name_or_path}/config_sentence_transformers.json").exists(): @@ -1834,13 +1845,17 @@ class EmbeddingRetriever(BaseRetriever): # Check if sentence transformers config file in model hub else: try: - hf_hub_download(repo_id=model_name_or_path, filename="config_sentence_transformers.json") + hf_hub_download( + repo_id=model_name_or_path, + filename="config_sentence_transformers.json", + use_auth_token=use_auth_token, + ) return "sentence_transformers" except HTTPError: pass # Check if retribert model - config = AutoConfig.from_pretrained(model_name_or_path) + config = AutoConfig.from_pretrained(model_name_or_path, use_auth_token=use_auth_token) if config.model_type == "retribert": return "retribert" @@ -1955,9 +1970,11 @@ class MultihopEmbeddingRetriever(EmbeddingRetriever): https://pytorch.org/docs/stable/tensor_attributes.html?highlight=torch%20device#torch.torch.device (e.g. ["cuda:0"]). Note: As multi-GPU training is currently not implemented for EmbeddingRetriever, training will only use the first device provided in this list. - :param use_auth_token: API token used to download private models from Huggingface. If this parameter is set to `True`, - the local token will be used, which must be previously created via `transformer-cli login`. - Additional information can be found here https://huggingface.co/transformers/main_classes/model.html#transformers.PreTrainedModel.from_pretrained + :param use_auth_token: The API token used to download private models from Huggingface. + If this parameter is set to `True`, then the token generated when running + `transformers-cli login` (stored in ~/.huggingface) will be used. + Additional information can be found here + https://huggingface.co/transformers/main_classes/model.html#transformers.PreTrainedModel.from_pretrained :param scale_score: Whether to scale the similarity score to the unit interval (range of [0,1]). If true (default) similarity scores (e.g. cosine or dot_product) which naturally have a different value range will be scaled to a range of [0,1], where 1 means extremely relevant. Otherwise raw similarity scores (e.g. cosine or dot_product) will be used. diff --git a/haystack/nodes/retriever/text2sparql.py b/haystack/nodes/retriever/text2sparql.py index ffff0888c..6fdbdf47d 100644 --- a/haystack/nodes/retriever/text2sparql.py +++ b/haystack/nodes/retriever/text2sparql.py @@ -1,4 +1,4 @@ -from typing import Optional, List +from typing import Optional, List, Union import logging from transformers import BartForConditionalGeneration, BartTokenizer @@ -15,20 +15,29 @@ class Text2SparqlRetriever(BaseGraphRetriever): The generated SPARQL query is executed on a knowledge graph. """ - def __init__(self, knowledge_graph, model_name_or_path, top_k: int = 1): + def __init__( + self, knowledge_graph, model_name_or_path, top_k: int = 1, use_auth_token: Optional[Union[str, bool]] = None + ): """ Init the Retriever by providing a knowledge graph and a pre-trained BART model :param knowledge_graph: An instance of BaseKnowledgeGraph on which to execute SPARQL queries. :param model_name_or_path: Name of or path to a pre-trained BartForConditionalGeneration model. :param top_k: How many SPARQL queries to generate per text query. + :param use_auth_token: The API token used to download private models from Huggingface. + If this parameter is set to `True`, then the token generated when running + `transformers-cli login` (stored in ~/.huggingface) will be used. + Additional information can be found here + https://huggingface.co/transformers/main_classes/model.html#transformers.PreTrainedModel.from_pretrained """ super().__init__() self.knowledge_graph = knowledge_graph # TODO We should extend this to any seq2seq models and use the AutoModel class - self.model = BartForConditionalGeneration.from_pretrained(model_name_or_path, forced_bos_token_id=0) - self.tok = BartTokenizer.from_pretrained(model_name_or_path) + self.model = BartForConditionalGeneration.from_pretrained( + model_name_or_path, forced_bos_token_id=0, use_auth_token=use_auth_token + ) + self.tok = BartTokenizer.from_pretrained(model_name_or_path, use_auth_token=use_auth_token) self.top_k = top_k def retrieve(self, query: str, top_k: Optional[int] = None): diff --git a/haystack/nodes/summarizer/transformers.py b/haystack/nodes/summarizer/transformers.py index 03ad90468..8e79ef34b 100644 --- a/haystack/nodes/summarizer/transformers.py +++ b/haystack/nodes/summarizer/transformers.py @@ -65,6 +65,7 @@ class TransformersSummarizer(BaseSummarizer): generate_single_summary: bool = False, batch_size: int = 16, progress_bar: bool = True, + use_auth_token: Optional[Union[str, bool]] = None, ): """ Load a Summarization model from Transformers. @@ -88,6 +89,11 @@ class TransformersSummarizer(BaseSummarizer): Important: The summary will depend on the order of the supplied documents! :param batch_size: Number of documents to process at a time. :param progress_bar: Whether to show a progress bar. + :param use_auth_token: The API token used to download private models from Huggingface. + If this parameter is set to `True`, then the token generated when running + `transformers-cli login` (stored in ~/.huggingface) will be used. + Additional information can be found here + https://huggingface.co/transformers/main_classes/model.html#transformers.PreTrainedModel.from_pretrained """ super().__init__() @@ -97,9 +103,11 @@ class TransformersSummarizer(BaseSummarizer): if tokenizer is None: tokenizer = model_name_or_path model = AutoModelForSeq2SeqLM.from_pretrained( - pretrained_model_name_or_path=model_name_or_path, revision=model_version + pretrained_model_name_or_path=model_name_or_path, revision=model_version, use_auth_token=use_auth_token + ) + self.summarizer = pipeline( + "summarization", model=model, tokenizer=tokenizer, device=device, use_auth_token=use_auth_token ) - self.summarizer = pipeline("summarization", model=model, tokenizer=tokenizer, device=device) self.max_length = max_length self.min_length = min_length self.clean_up_tokenization_spaces = clean_up_tokenization_spaces diff --git a/haystack/nodes/translator/transformers.py b/haystack/nodes/translator/transformers.py index 69153debf..464c859a9 100644 --- a/haystack/nodes/translator/transformers.py +++ b/haystack/nodes/translator/transformers.py @@ -43,6 +43,7 @@ class TransformersTranslator(BaseTranslator): clean_up_tokenization_spaces: Optional[bool] = True, use_gpu: bool = True, progress_bar: bool = True, + use_auth_token: Optional[Union[str, bool]] = None, ): """Initialize the translator with a model that fits your targeted languages. While we support all seq2seq models from Hugging Face's model hub, we recommend using the OPUS models from Helsinki NLP. They provide plenty @@ -64,6 +65,11 @@ class TransformersTranslator(BaseTranslator): :param clean_up_tokenization_spaces: Whether or not to clean up the tokenization spaces. (default True) :param use_gpu: Whether to use GPU or the CPU. Falls back on CPU if no GPU is available. :param progress_bar: Whether to show a progress bar. + :param use_auth_token: The API token used to download private models from Huggingface. + If this parameter is set to `True`, then the token generated when running + `transformers-cli login` (stored in ~/.huggingface) will be used. + Additional information can be found here + https://huggingface.co/transformers/main_classes/model.html#transformers.PreTrainedModel.from_pretrained """ super().__init__() @@ -72,8 +78,8 @@ class TransformersTranslator(BaseTranslator): self.clean_up_tokenization_spaces = clean_up_tokenization_spaces self.progress_bar = progress_bar tokenizer_name = tokenizer_name or model_name_or_path - self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_name) - self.model = AutoModelForSeq2SeqLM.from_pretrained(model_name_or_path) + self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_name, use_auth_token=use_auth_token) + self.model = AutoModelForSeq2SeqLM.from_pretrained(model_name_or_path, use_auth_token=use_auth_token) self.model.to(str(self.devices[0])) def translate( diff --git a/haystack/pipelines/base.py b/haystack/pipelines/base.py index 80eb64682..661e5da86 100644 --- a/haystack/pipelines/base.py +++ b/haystack/pipelines/base.py @@ -1062,6 +1062,7 @@ class Pipeline: context_matching_min_length: int = 100, context_matching_boost_split_overlaps: bool = True, context_matching_threshold: float = 65.0, + use_auth_token: Optional[Union[str, bool]] = None, ) -> EvaluationResult: """ Evaluates the pipeline by running the pipeline once per query in debug mode @@ -1112,6 +1113,11 @@ class Pipeline: we cut the context on the same side, recalculate the score and take the mean of both. Thus [AB] <-> [BC] (score ~50) gets recalculated with B <-> B (score ~100) scoring ~75 in total. :param context_matching_threshold: Score threshold that candidates must surpass to be included into the result list. Range: [0,100] + :param use_auth_token: The API token used to download private models from Huggingface. + If this parameter is set to `True`, then the token generated when running + `transformers-cli login` (stored in ~/.huggingface) will be used. + Additional information can be found here + https://huggingface.co/transformers/main_classes/model.html#transformers.PreTrainedModel.from_pretrained """ eval_result = EvaluationResult() if add_isolated_node_eval: @@ -1162,6 +1168,7 @@ class Pipeline: sas_model_name_or_path=sas_model_name_or_path, batch_size=sas_batch_size, use_gpu=sas_use_gpu, + use_auth_token=use_auth_token, ) df["sas"] = sas df["gold_answers_sas"] = [ diff --git a/haystack/utils/docker.py b/haystack/utils/docker.py index 5e182a6df..2f2bdc181 100644 --- a/haystack/utils/docker.py +++ b/haystack/utils/docker.py @@ -1,10 +1,18 @@ import logging +from typing import List, Union, Optional -def cache_models(models=None): +def cache_models(models: List[str] = None, use_auth_token: Optional[Union[str, bool]] = None): """ Small function that caches models and other data. Used only in the Dockerfile to include these caches in the images. + + :param models: List of Hugging Face model names to cache + :param use_auth_token: The API token used to download private models from Huggingface. + If this parameter is set to `True`, then the token generated when running + `transformers-cli login` (stored in ~/.huggingface) will be used. + Additional information can be found here + https://huggingface.co/transformers/main_classes/model.html#transformers.PreTrainedModel.from_pretrained """ # Backward compat after adding the `model` param if models is None: @@ -21,5 +29,5 @@ def cache_models(models=None): for model_to_cache in models: logging.info(f"Caching {model_to_cache}") - transformers.AutoTokenizer.from_pretrained(model_to_cache) - transformers.AutoModel.from_pretrained(model_to_cache) + transformers.AutoTokenizer.from_pretrained(model_to_cache, use_auth_token=use_auth_token) + transformers.AutoModel.from_pretrained(model_to_cache, use_auth_token=use_auth_token)