diff --git a/haystack/preview/components/embedders/sentence_transformers_document_embedder.py b/haystack/preview/components/embedders/sentence_transformers_document_embedder.py index 80701ca83..a33593758 100644 --- a/haystack/preview/components/embedders/sentence_transformers_document_embedder.py +++ b/haystack/preview/components/embedders/sentence_transformers_document_embedder.py @@ -17,7 +17,7 @@ class SentenceTransformersDocumentEmbedder: self, model_name_or_path: str = "sentence-transformers/all-mpnet-base-v2", device: Optional[str] = None, - use_auth_token: Union[bool, str, None] = None, + token: Union[bool, str, None] = None, prefix: str = "", suffix: str = "", batch_size: int = 32, @@ -33,7 +33,7 @@ class SentenceTransformersDocumentEmbedder: such as ``'sentence-transformers/all-mpnet-base-v2'``. :param device: Device (like 'cuda' / 'cpu') that should be used for computation. Defaults to CPU. - :param use_auth_token: The API token used to download private models from Hugging Face. + :param token: The API token used to download private models from Hugging Face. If this parameter is set to `True`, then the token generated when running `transformers-cli login` (stored in ~/.huggingface) will be used. :param prefix: A string to add to the beginning of each Document text before embedding. @@ -48,7 +48,7 @@ class SentenceTransformersDocumentEmbedder: self.model_name_or_path = model_name_or_path # TODO: remove device parameter and use Haystack's device management once migrated self.device = device or "cpu" - self.use_auth_token = use_auth_token + self.token = token self.prefix = prefix self.suffix = suffix self.batch_size = batch_size @@ -71,7 +71,7 @@ class SentenceTransformersDocumentEmbedder: self, model_name_or_path=self.model_name_or_path, device=self.device, - use_auth_token=self.use_auth_token, + token=self.token if not isinstance(self.token, str) else None, # don't serialize valid tokens prefix=self.prefix, suffix=self.suffix, batch_size=self.batch_size, @@ -94,7 +94,7 @@ class SentenceTransformersDocumentEmbedder: """ if not hasattr(self, "embedding_backend"): self.embedding_backend = _SentenceTransformersEmbeddingBackendFactory.get_embedding_backend( - model_name_or_path=self.model_name_or_path, device=self.device, use_auth_token=self.use_auth_token + model_name_or_path=self.model_name_or_path, device=self.device, use_auth_token=self.token ) @component.output_types(documents=List[Document]) diff --git a/haystack/preview/components/embedders/sentence_transformers_text_embedder.py b/haystack/preview/components/embedders/sentence_transformers_text_embedder.py index 6a479cac6..dec9c44f7 100644 --- a/haystack/preview/components/embedders/sentence_transformers_text_embedder.py +++ b/haystack/preview/components/embedders/sentence_transformers_text_embedder.py @@ -16,7 +16,7 @@ class SentenceTransformersTextEmbedder: self, model_name_or_path: str = "sentence-transformers/all-mpnet-base-v2", device: Optional[str] = None, - use_auth_token: Union[bool, str, None] = None, + token: Union[bool, str, None] = None, prefix: str = "", suffix: str = "", batch_size: int = 32, @@ -30,7 +30,7 @@ class SentenceTransformersTextEmbedder: such as ``'sentence-transformers/all-mpnet-base-v2'``. :param device: Device (like 'cuda' / 'cpu') that should be used for computation. Defaults to CPU. - :param use_auth_token: The API token used to download private models from Hugging Face. + :param token: The API token used to download private models from Hugging Face. If this parameter is set to `True`, then the token generated when running `transformers-cli login` (stored in ~/.huggingface) will be used. :param prefix: A string to add to the beginning of each text. @@ -43,7 +43,7 @@ class SentenceTransformersTextEmbedder: self.model_name_or_path = model_name_or_path # TODO: remove device parameter and use Haystack's device management once migrated self.device = device or "cpu" - self.use_auth_token = use_auth_token + self.token = token self.prefix = prefix self.suffix = suffix self.batch_size = batch_size @@ -64,7 +64,7 @@ class SentenceTransformersTextEmbedder: self, model_name_or_path=self.model_name_or_path, device=self.device, - use_auth_token=self.use_auth_token, + token=self.token if not isinstance(self.token, str) else None, # don't serialize valid tokens prefix=self.prefix, suffix=self.suffix, batch_size=self.batch_size, @@ -85,7 +85,7 @@ class SentenceTransformersTextEmbedder: """ if not hasattr(self, "embedding_backend"): self.embedding_backend = _SentenceTransformersEmbeddingBackendFactory.get_embedding_backend( - model_name_or_path=self.model_name_or_path, device=self.device, use_auth_token=self.use_auth_token + model_name_or_path=self.model_name_or_path, device=self.device, use_auth_token=self.token ) @component.output_types(embedding=List[float]) diff --git a/haystack/preview/components/rankers/similarity.py b/haystack/preview/components/rankers/similarity.py index 5d5125006..f87d5eac1 100644 --- a/haystack/preview/components/rankers/similarity.py +++ b/haystack/preview/components/rankers/similarity.py @@ -36,15 +36,19 @@ class SimilarityRanker: def __init__( self, model_name_or_path: Union[str, Path] = "cross-encoder/ms-marco-MiniLM-L-6-v2", - top_k: int = 10, device: str = "cpu", + token: Union[bool, str, None] = None, + top_k: int = 10, ): """ Creates an instance of SimilarityRanker. :param model_name_or_path: Path to a pre-trained sentence-transformers model. - :param top_k: The maximum number of documents to return per query. :param device: torch device (for example, cuda:0, cpu, mps) to limit model inference to a specific device. + :param token: The API token used to download private models from Hugging Face. + If this parameter is set to `True`, then the token generated when running + `transformers-cli login` (stored in ~/.huggingface) will be used. + :param top_k: The maximum number of documents to return per query. """ torch_and_transformers_import.check() @@ -53,6 +57,7 @@ class SimilarityRanker: raise ValueError(f"top_k must be > 0, but got {top_k}") self.top_k = top_k self.device = device + self.token = token self.model = None self.tokenizer = None @@ -67,16 +72,22 @@ class SimilarityRanker: Warm up the model and tokenizer used in scoring the documents. """ if self.model_name_or_path and not self.model: - self.model = AutoModelForSequenceClassification.from_pretrained(self.model_name_or_path) + self.model = AutoModelForSequenceClassification.from_pretrained(self.model_name_or_path, token=self.token) self.model = self.model.to(self.device) self.model.eval() - self.tokenizer = AutoTokenizer.from_pretrained(self.model_name_or_path) + self.tokenizer = AutoTokenizer.from_pretrained(self.model_name_or_path, token=self.token) def to_dict(self) -> Dict[str, Any]: """ Serialize this component to a dictionary. """ - return default_to_dict(self, top_k=self.top_k, device=self.device, model_name_or_path=self.model_name_or_path) + return default_to_dict( + self, + device=self.device, + model_name_or_path=self.model_name_or_path, + token=self.token if not isinstance(self.token, str) else None, # don't serialize valid tokens + top_k=self.top_k, + ) @classmethod def from_dict(cls, data: Dict[str, Any]) -> "SimilarityRanker": diff --git a/haystack/preview/components/readers/extractive.py b/haystack/preview/components/readers/extractive.py index be18b5698..5db4cabcc 100644 --- a/haystack/preview/components/readers/extractive.py +++ b/haystack/preview/components/readers/extractive.py @@ -25,6 +25,7 @@ class ExtractiveReader: self, model_name_or_path: Union[Path, str] = "deepset/roberta-base-squad2-distilled", device: Optional[str] = None, + token: Union[bool, str, None] = None, top_k: int = 20, confidence_threshold: Optional[float] = None, max_seq_length: int = 384, @@ -40,6 +41,9 @@ class ExtractiveReader: Can either be a path to a folder containing the model files or an identifier for the HF hub Default: `'deepset/roberta-base-squad2-distilled'` :param device: Pytorch device string. Uses GPU by default if available + :param token: The API token used to download private models from Hugging Face. + If this parameter is set to `True`, then the token generated when running + `transformers-cli login` (stored in ~/.huggingface) will be used. :param top_k: Number of answers to return per query. It is required even if confidence_threshold is set. Defaults to 20. :param confidence_threshold: Answers with a confidence score below this value will not be returned @@ -58,6 +62,7 @@ class ExtractiveReader: self.model_name_or_path = str(model_name_or_path) self.model = None self.device = device + self.token = token self.max_seq_length = max_seq_length self.top_k = top_k self.confidence_threshold = confidence_threshold @@ -81,6 +86,7 @@ class ExtractiveReader: self, model_name_or_path=self.model_name_or_path, device=self.device, + token=self.token if not isinstance(self.token, str) else None, max_seq_length=self.max_seq_length, top_k=self.top_k, confidence_threshold=self.confidence_threshold, @@ -104,8 +110,10 @@ class ExtractiveReader: self.device = self.device or "cuda:0" else: self.device = self.device or "cpu:0" - self.model = AutoModelForQuestionAnswering.from_pretrained(self.model_name_or_path).to(self.device) - self.tokenizer = AutoTokenizer.from_pretrained(self.model_name_or_path) + self.model = AutoModelForQuestionAnswering.from_pretrained(self.model_name_or_path, token=self.token).to( + self.device + ) + self.tokenizer = AutoTokenizer.from_pretrained(self.model_name_or_path, token=self.token) def _flatten_documents( self, queries: List[str], documents: List[List[Document]] diff --git a/releasenotes/notes/adopt-hf-token-770edaccf6278ad9.yaml b/releasenotes/notes/adopt-hf-token-770edaccf6278ad9.yaml new file mode 100644 index 000000000..0a0f1e63e --- /dev/null +++ b/releasenotes/notes/adopt-hf-token-770edaccf6278ad9.yaml @@ -0,0 +1,8 @@ +--- +preview: + - | + Adopt Hugging Face `token` instead of the deprecated `use_auth_token`. + Add this parameter to `ExtractiveReader` and `SimilarityRanker` to allow + loading private models. + Proper handling of `token` during serialization: if it is a string (a possible valid token) + it is not serialized. diff --git a/test/preview/components/embedders/test_sentence_transformers_document_embedder.py b/test/preview/components/embedders/test_sentence_transformers_document_embedder.py index c861c9c48..242aa392a 100644 --- a/test/preview/components/embedders/test_sentence_transformers_document_embedder.py +++ b/test/preview/components/embedders/test_sentence_transformers_document_embedder.py @@ -14,7 +14,7 @@ class TestSentenceTransformersDocumentEmbedder: embedder = SentenceTransformersDocumentEmbedder(model_name_or_path="model") assert embedder.model_name_or_path == "model" assert embedder.device == "cpu" - assert embedder.use_auth_token is None + assert embedder.token is None assert embedder.prefix == "" assert embedder.suffix == "" assert embedder.batch_size == 32 @@ -28,7 +28,7 @@ class TestSentenceTransformersDocumentEmbedder: embedder = SentenceTransformersDocumentEmbedder( model_name_or_path="model", device="cuda", - use_auth_token=True, + token=True, prefix="prefix", suffix="suffix", batch_size=64, @@ -39,7 +39,7 @@ class TestSentenceTransformersDocumentEmbedder: ) assert embedder.model_name_or_path == "model" assert embedder.device == "cuda" - assert embedder.use_auth_token is True + assert embedder.token is True assert embedder.prefix == "prefix" assert embedder.suffix == "suffix" assert embedder.batch_size == 64 @@ -57,7 +57,7 @@ class TestSentenceTransformersDocumentEmbedder: "init_parameters": { "model_name_or_path": "model", "device": "cpu", - "use_auth_token": None, + "token": None, "prefix": "", "suffix": "", "batch_size": 32, @@ -73,7 +73,7 @@ class TestSentenceTransformersDocumentEmbedder: component = SentenceTransformersDocumentEmbedder( model_name_or_path="model", device="cuda", - use_auth_token="the-token", + token="the-token", prefix="prefix", suffix="suffix", batch_size=64, @@ -83,12 +83,13 @@ class TestSentenceTransformersDocumentEmbedder: embedding_separator=" - ", ) data = component.to_dict() + assert data == { "type": "SentenceTransformersDocumentEmbedder", "init_parameters": { "model_name_or_path": "model", "device": "cuda", - "use_auth_token": "the-token", + "token": None, # the token is not serialized "prefix": "prefix", "suffix": "suffix", "batch_size": 64, @@ -106,7 +107,7 @@ class TestSentenceTransformersDocumentEmbedder: "init_parameters": { "model_name_or_path": "model", "device": "cuda", - "use_auth_token": "the-token", + "token": None, "prefix": "prefix", "suffix": "suffix", "batch_size": 64, @@ -119,7 +120,7 @@ class TestSentenceTransformersDocumentEmbedder: component = SentenceTransformersDocumentEmbedder.from_dict(data) assert component.model_name_or_path == "model" assert component.device == "cuda" - assert component.use_auth_token == "the-token" + assert component.token is None assert component.prefix == "prefix" assert component.suffix == "suffix" assert component.batch_size == 64 diff --git a/test/preview/components/embedders/test_sentence_transformers_text_embedder.py b/test/preview/components/embedders/test_sentence_transformers_text_embedder.py index 70ab959c7..49dae9e8b 100644 --- a/test/preview/components/embedders/test_sentence_transformers_text_embedder.py +++ b/test/preview/components/embedders/test_sentence_transformers_text_embedder.py @@ -12,7 +12,7 @@ class TestSentenceTransformersTextEmbedder: embedder = SentenceTransformersTextEmbedder(model_name_or_path="model") assert embedder.model_name_or_path == "model" assert embedder.device == "cpu" - assert embedder.use_auth_token is None + assert embedder.token is None assert embedder.prefix == "" assert embedder.suffix == "" assert embedder.batch_size == 32 @@ -24,7 +24,7 @@ class TestSentenceTransformersTextEmbedder: embedder = SentenceTransformersTextEmbedder( model_name_or_path="model", device="cuda", - use_auth_token=True, + token=True, prefix="prefix", suffix="suffix", batch_size=64, @@ -33,7 +33,7 @@ class TestSentenceTransformersTextEmbedder: ) assert embedder.model_name_or_path == "model" assert embedder.device == "cuda" - assert embedder.use_auth_token is True + assert embedder.token is True assert embedder.prefix == "prefix" assert embedder.suffix == "suffix" assert embedder.batch_size == 64 @@ -49,7 +49,7 @@ class TestSentenceTransformersTextEmbedder: "init_parameters": { "model_name_or_path": "model", "device": "cpu", - "use_auth_token": None, + "token": None, "prefix": "", "suffix": "", "batch_size": 32, @@ -63,7 +63,7 @@ class TestSentenceTransformersTextEmbedder: component = SentenceTransformersTextEmbedder( model_name_or_path="model", device="cuda", - use_auth_token=True, + token=True, prefix="prefix", suffix="suffix", batch_size=64, @@ -76,7 +76,7 @@ class TestSentenceTransformersTextEmbedder: "init_parameters": { "model_name_or_path": "model", "device": "cuda", - "use_auth_token": True, + "token": True, "prefix": "prefix", "suffix": "suffix", "batch_size": 64, @@ -85,6 +85,24 @@ class TestSentenceTransformersTextEmbedder: }, } + @pytest.mark.unit + def test_to_dict_not_serialize_token(self): + component = SentenceTransformersTextEmbedder(model_name_or_path="model", token="awesome-token") + data = component.to_dict() + assert data == { + "type": "SentenceTransformersTextEmbedder", + "init_parameters": { + "model_name_or_path": "model", + "device": "cpu", + "token": None, + "prefix": "", + "suffix": "", + "batch_size": 32, + "progress_bar": True, + "normalize_embeddings": False, + }, + } + @pytest.mark.unit def test_from_dict(self): data = { @@ -92,7 +110,7 @@ class TestSentenceTransformersTextEmbedder: "init_parameters": { "model_name_or_path": "model", "device": "cuda", - "use_auth_token": True, + "token": True, "prefix": "prefix", "suffix": "suffix", "batch_size": 64, @@ -103,7 +121,7 @@ class TestSentenceTransformersTextEmbedder: component = SentenceTransformersTextEmbedder.from_dict(data) assert component.model_name_or_path == "model" assert component.device == "cuda" - assert component.use_auth_token is True + assert component.token is True assert component.prefix == "prefix" assert component.suffix == "suffix" assert component.batch_size == 64 diff --git a/test/preview/components/rankers/test_similarity.py b/test/preview/components/rankers/test_similarity.py index cc2486a2c..90e43a040 100644 --- a/test/preview/components/rankers/test_similarity.py +++ b/test/preview/components/rankers/test_similarity.py @@ -7,19 +7,6 @@ from haystack.preview.components.rankers.similarity import SimilarityRanker class TestSimilarityRanker: @pytest.mark.unit def test_to_dict(self): - component = SimilarityRanker(model_name_or_path="cross-encoder/ms-marco-MiniLM-L-6-v2") - data = component.to_dict() - assert data == { - "type": "SimilarityRanker", - "init_parameters": { - "device": "cpu", - "top_k": 10, - "model_name_or_path": "cross-encoder/ms-marco-MiniLM-L-6-v2", - }, - } - - @pytest.mark.unit - def test_to_dict_with_custom_init_parameters(self): component = SimilarityRanker() data = component.to_dict() assert data == { @@ -28,6 +15,21 @@ class TestSimilarityRanker: "device": "cpu", "top_k": 10, "model_name_or_path": "cross-encoder/ms-marco-MiniLM-L-6-v2", + "token": None, + }, + } + + @pytest.mark.unit + def test_to_dict_with_custom_init_parameters(self): + component = SimilarityRanker(model_name_or_path="my_model", device="cuda", token="my_token", top_k=5) + data = component.to_dict() + assert data == { + "type": "SimilarityRanker", + "init_parameters": { + "device": "cuda", + "model_name_or_path": "my_model", + "token": None, # we don't serialize valid tokens, + "top_k": 5, }, } diff --git a/test/preview/components/readers/test_extractive.py b/test/preview/components/readers/test_extractive.py index a6bbe7e2f..8533b091f 100644 --- a/test/preview/components/readers/test_extractive.py +++ b/test/preview/components/readers/test_extractive.py @@ -87,6 +87,62 @@ example_documents = [ ] * 2 +@pytest.mark.unit +def test_to_dict(): + component = ExtractiveReader("my-model", token="secret-token") + data = component.to_dict() + + assert data == { + "type": "ExtractiveReader", + "init_parameters": { + "model_name_or_path": "my-model", + "device": None, + "token": None, # don't serialize valid tokens + "top_k": 20, + "confidence_threshold": None, + "max_seq_length": 384, + "stride": 128, + "max_batch_size": None, + "answers_per_seq": None, + "no_answer": True, + "calibration_factor": 0.1, + }, + } + + +@pytest.mark.unit +def test_from_dict(): + data = { + "type": "ExtractiveReader", + "init_parameters": { + "model_name_or_path": "my-model", + "device": "cpu", + "token": None, + "top_k": 30, + "confidence_threshold": 0.5, + "max_seq_length": 300, + "stride": 100, + "max_batch_size": 20, + "answers_per_seq": 5, + "no_answer": False, + "calibration_factor": 0.5, + }, + } + + component = ExtractiveReader.from_dict(data) + assert component.model_name_or_path == "my-model" + assert component.device == "cpu" + assert component.token is None + assert component.top_k == 30 + assert component.confidence_threshold == 0.5 + assert component.max_seq_length == 300 + assert component.stride == 100 + assert component.max_batch_size == 20 + assert component.answers_per_seq == 5 + assert component.no_answer is False + assert component.calibration_factor == 0.5 + + @pytest.mark.unit def test_output(mock_reader: ExtractiveReader): answers = mock_reader.run(example_queries[0], example_documents[0], top_k=3)[ @@ -209,6 +265,17 @@ def test_nest_answers(mock_reader: ExtractiveReader): assert no_answer.probability == pytest.approx(expected_no_answer) +@pytest.mark.unit +@patch("haystack.preview.components.readers.extractive.AutoTokenizer.from_pretrained") +@patch("haystack.preview.components.readers.extractive.AutoModelForQuestionAnswering.from_pretrained") +def test_warm_up_use_hf_token(mocked_automodel, mocked_autotokenizer): + reader = ExtractiveReader("deepset/roberta-base-squad2", token="fake-token") + reader.warm_up() + + mocked_automodel.assert_called_once_with("deepset/roberta-base-squad2", token="fake-token") + mocked_autotokenizer.assert_called_once_with("deepset/roberta-base-squad2", token="fake-token") + + @pytest.mark.integration def test_t5(): reader = ExtractiveReader("TARUNBHATT/flan-t5-small-finetuned-squad")