diff --git a/haystack/components/embedders/backends/sentence_transformers_backend.py b/haystack/components/embedders/backends/sentence_transformers_backend.py index cff9135c8..e3550183e 100644 --- a/haystack/components/embedders/backends/sentence_transformers_backend.py +++ b/haystack/components/embedders/backends/sentence_transformers_backend.py @@ -2,9 +2,7 @@ # # SPDX-License-Identifier: Apache-2.0 -from typing import Any, Dict, List, Optional, cast - -import numpy as np +from typing import Any, Dict, List, Optional from haystack.lazy_imports import LazyImport from haystack.utils.auth import Secret @@ -78,5 +76,5 @@ class _SentenceTransformersEmbeddingBackend: ) def embed(self, data: List[str], **kwargs) -> List[List[float]]: - embeddings = cast(np.ndarray, self.model.encode(data, **kwargs)).tolist() + embeddings = self.model.encode(data, **kwargs).tolist() return embeddings diff --git a/haystack/testing/test_utils.py b/haystack/testing/test_utils.py index cbb3eb15c..247bb2e3b 100644 --- a/haystack/testing/test_utils.py +++ b/haystack/testing/test_utils.py @@ -5,8 +5,6 @@ import os import random -import numpy as np - from haystack import logging logger = logging.getLogger(__name__) @@ -23,7 +21,6 @@ def set_all_seeds(seed: int, deterministic_cudnn: bool = False) -> None: :param deterministic_cudnn: Enable for full reproducibility when using CUDA. Caution: might slow down training. """ random.seed(seed) - np.random.seed(seed) os.environ["PYTHONHASHSEED"] = str(seed) try: diff --git a/haystack/utils/expit.py b/haystack/utils/expit.py index ea84944c3..2f29ce99b 100644 --- a/haystack/utils/expit.py +++ b/haystack/utils/expit.py @@ -2,9 +2,13 @@ # # SPDX-License-Identifier: Apache-2.0 -import numpy as np +from numpy import exp -def expit(x: float) -> float: - """Compute logistic sigmoid function. Maps input values to a range between 0 and 1""" - return 1 / (1 + np.exp(-x)) +def expit(x) -> float: + """ + Compute logistic sigmoid function. Maps input values to a range between 0 and 1 + + :param x: input value. Can be a scalar or a numpy array. + """ + return 1 / (1 + exp(-x)) diff --git a/releasenotes/notes/enhance-numpy-dependency-reduced-f8a8afa08be098d3.yaml b/releasenotes/notes/enhance-numpy-dependency-reduced-f8a8afa08be098d3.yaml new file mode 100644 index 000000000..cc72e68db --- /dev/null +++ b/releasenotes/notes/enhance-numpy-dependency-reduced-f8a8afa08be098d3.yaml @@ -0,0 +1,4 @@ +--- +enhancements: + - | + Reduced numpy usage to speed up imports. diff --git a/test/components/embedders/test_hugging_face_api_document_embedder.py b/test/components/embedders/test_hugging_face_api_document_embedder.py index a2fd67c62..b9332d536 100644 --- a/test/components/embedders/test_hugging_face_api_document_embedder.py +++ b/test/components/embedders/test_hugging_face_api_document_embedder.py @@ -4,9 +4,9 @@ import os from unittest.mock import MagicMock, patch +import random import pytest from huggingface_hub.utils import RepositoryNotFoundError -from numpy import array, random from haystack.components.embedders import HuggingFaceAPIDocumentEmbedder from haystack.dataclasses import Document @@ -24,7 +24,7 @@ def mock_check_valid_model(): def mock_embedding_generation(json, **kwargs): - response = str(array([random.rand(384) for i in range(len(json["inputs"]))]).tolist()).encode() + response = str([[random.random() for _ in range(384)] for _ in range(len(json["inputs"]))]).encode() return response diff --git a/test/components/embedders/test_hugging_face_api_text_embedder.py b/test/components/embedders/test_hugging_face_api_text_embedder.py index 0031b6746..6e699fca2 100644 --- a/test/components/embedders/test_hugging_face_api_text_embedder.py +++ b/test/components/embedders/test_hugging_face_api_text_embedder.py @@ -4,9 +4,9 @@ import os from unittest.mock import MagicMock, patch +import random import pytest from huggingface_hub.utils import RepositoryNotFoundError -from numpy import array, random from haystack.components.embedders import HuggingFaceAPITextEmbedder from haystack.utils.auth import Secret @@ -22,7 +22,7 @@ def mock_check_valid_model(): def mock_embedding_generation(json, **kwargs): - response = str(array([random.rand(384) for i in range(len(json["inputs"]))]).tolist()).encode() + response = str([[random.random() for _ in range(384)] for _ in range(len(json["inputs"]))]).encode() return response diff --git a/test/components/embedders/test_openai_document_embedder.py b/test/components/embedders/test_openai_document_embedder.py index f64a3aca1..89ce62a92 100644 --- a/test/components/embedders/test_openai_document_embedder.py +++ b/test/components/embedders/test_openai_document_embedder.py @@ -5,7 +5,7 @@ import os from typing import List from haystack.utils.auth import Secret -import numpy as np +import random import pytest from haystack import Document @@ -16,7 +16,8 @@ def mock_openai_response(input: List[str], model: str = "text-embedding-ada-002" dict_response = { "object": "list", "data": [ - {"object": "embedding", "index": i, "embedding": np.random.rand(1536).tolist()} for i in range(len(input)) + {"object": "embedding", "index": i, "embedding": [random.random() for _ in range(1536)]} + for i in range(len(input)) ], "model": model, "usage": {"prompt_tokens": 4, "total_tokens": 4}, diff --git a/test/components/embedders/test_sentence_transformers_document_embedder.py b/test/components/embedders/test_sentence_transformers_document_embedder.py index 1c0bc526e..d8813f36f 100644 --- a/test/components/embedders/test_sentence_transformers_document_embedder.py +++ b/test/components/embedders/test_sentence_transformers_document_embedder.py @@ -3,7 +3,7 @@ # SPDX-License-Identifier: Apache-2.0 from unittest.mock import MagicMock, patch -import numpy as np +import random import pytest import torch @@ -264,7 +264,9 @@ class TestSentenceTransformersDocumentEmbedder: def test_run(self): embedder = SentenceTransformersDocumentEmbedder(model="model") embedder.embedding_backend = MagicMock() - embedder.embedding_backend.embed = lambda x, **kwargs: np.random.rand(len(x), 16).tolist() + embedder.embedding_backend.embed = lambda x, **kwargs: [ + [random.random() for _ in range(16)] for _ in range(len(x)) + ] documents = [Document(content=f"document number {i}") for i in range(5)] diff --git a/test/components/embedders/test_sentence_transformers_text_embedder.py b/test/components/embedders/test_sentence_transformers_text_embedder.py index 9325c481c..195ee8efd 100644 --- a/test/components/embedders/test_sentence_transformers_text_embedder.py +++ b/test/components/embedders/test_sentence_transformers_text_embedder.py @@ -4,7 +4,7 @@ from unittest.mock import MagicMock, patch import torch -import numpy as np +import random import pytest from haystack.components.embedders.sentence_transformers_text_embedder import SentenceTransformersTextEmbedder @@ -239,7 +239,9 @@ class TestSentenceTransformersTextEmbedder: def test_run(self): embedder = SentenceTransformersTextEmbedder(model="model") embedder.embedding_backend = MagicMock() - embedder.embedding_backend.embed = lambda x, **kwargs: np.random.rand(len(x), 16).tolist() + embedder.embedding_backend.embed = lambda x, **kwargs: [ + [random.random() for _ in range(16)] for _ in range(len(x)) + ] text = "a nice text to embed" diff --git a/test/components/evaluators/test_faithfulness_evaluator.py b/test/components/evaluators/test_faithfulness_evaluator.py index 1150099e0..de92388ec 100644 --- a/test/components/evaluators/test_faithfulness_evaluator.py +++ b/test/components/evaluators/test_faithfulness_evaluator.py @@ -5,7 +5,6 @@ import os import math from typing import List -import numpy as np import pytest from haystack import Pipeline diff --git a/test/components/retrievers/test_in_memory_embedding_retriever.py b/test/components/retrievers/test_in_memory_embedding_retriever.py index 7fe8387d6..366fd17b3 100644 --- a/test/components/retrievers/test_in_memory_embedding_retriever.py +++ b/test/components/retrievers/test_in_memory_embedding_retriever.py @@ -4,7 +4,6 @@ from typing import Dict, Any import pytest -import numpy as np from haystack import Pipeline, DeserializationError from haystack.document_stores.types import FilterPolicy @@ -135,7 +134,7 @@ class TestMemoryEmbeddingRetriever: assert "documents" in result assert len(result["documents"]) == top_k - assert np.array_equal(result["documents"][0].embedding, [1.0, 1.0, 1.0, 1.0]) + assert result["documents"][0].embedding == [1.0, 1.0, 1.0, 1.0] def test_invalid_run_wrong_store_type(self): SomeOtherDocumentStore = document_store_class("SomeOtherDocumentStore") @@ -165,4 +164,4 @@ class TestMemoryEmbeddingRetriever: results_docs = result["retriever"]["documents"] assert results_docs assert len(results_docs) == top_k - assert np.array_equal(results_docs[0].embedding, [1.0, 1.0, 1.0, 1.0]) + assert results_docs[0].embedding == [1.0, 1.0, 1.0, 1.0]