refactor: Change Document.embedding type to list of floats (#6135)

* Change Document.embedding type

* Add release notes

* Fix document_store testing

* Fix pylint

* Fix tests
This commit is contained in:
Silvano Cerza 2023-10-23 12:26:05 +02:00 committed by GitHub
parent 8f289282f1
commit c8d162ced9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 70 additions and 74 deletions

View File

@ -3,7 +3,7 @@ import json
import logging import logging
from dataclasses import asdict, dataclass, field, fields from dataclasses import asdict, dataclass, field, fields
from pathlib import Path from pathlib import Path
from typing import Any, Dict, Optional, Type from typing import Any, Dict, List, Optional, Type
import numpy import numpy
import pandas import pandas
@ -42,8 +42,6 @@ class DocumentDecoder(json.JSONDecoder):
dictionary["array"] = numpy.array(dictionary.get("array")) dictionary["array"] = numpy.array(dictionary.get("array"))
if "dataframe" in dictionary and dictionary.get("dataframe"): if "dataframe" in dictionary and dictionary.get("dataframe"):
dictionary["dataframe"] = pandas.read_json(dictionary.get("dataframe", None)) dictionary["dataframe"] = pandas.read_json(dictionary.get("dataframe", None))
if "embedding" in dictionary and dictionary.get("embedding"):
dictionary["embedding"] = numpy.array(dictionary.get("embedding"))
return dictionary return dictionary
@ -75,7 +73,7 @@ class Document:
mime_type: str = field(default="text/plain") mime_type: str = field(default="text/plain")
metadata: Dict[str, Any] = field(default_factory=dict) metadata: Dict[str, Any] = field(default_factory=dict)
score: Optional[float] = field(default=None) score: Optional[float] = field(default=None)
embedding: Optional[numpy.ndarray] = field(default=None, repr=False) embedding: Optional[List[float]] = field(default=None, repr=False)
def __str__(self): def __str__(self):
fields = [f"mimetype: '{self.mime_type}'"] fields = [f"mimetype: '{self.mime_type}'"]
@ -120,7 +118,7 @@ class Document:
blob = self.blob or None blob = self.blob or None
mime_type = self.mime_type or None mime_type = self.mime_type or None
metadata = self.metadata or {} metadata = self.metadata or {}
embedding = self.embedding.tolist() if self.embedding is not None else None embedding = self.embedding if self.embedding is not None else None
data = f"{text}{array}{dataframe}{blob}{mime_type}{metadata}{embedding}" data = f"{text}{array}{dataframe}{blob}{mime_type}{metadata}{embedding}"
return hashlib.sha256(data.encode("utf-8")).hexdigest() return hashlib.sha256(data.encode("utf-8")).hexdigest()

View File

@ -1,5 +1,6 @@
# pylint: disable=too-many-public-methods # pylint: disable=too-many-public-methods
from typing import List from typing import List
import random
import pytest import pytest
import numpy as np import numpy as np
@ -11,6 +12,10 @@ from haystack.preview.document_stores.errors import MissingDocumentError, Duplic
from haystack.preview.errors import FilterError from haystack.preview.errors import FilterError
def _random_embeddings(n):
return [random.random() for _ in range(n)]
class DocumentStoreBaseTests: class DocumentStoreBaseTests:
@pytest.fixture @pytest.fixture
def docstore(self) -> DocumentStore: def docstore(self) -> DocumentStore:
@ -18,8 +23,8 @@ class DocumentStoreBaseTests:
@pytest.fixture @pytest.fixture
def filterable_docs(self) -> List[Document]: def filterable_docs(self) -> List[Document]:
embedding_zero = np.zeros(768).astype(np.float32) embedding_zero = [0.0] * 768
embedding_one = np.ones(768).astype(np.float32) embedding_one = [1.0] * 768
documents = [] documents = []
for i in range(3): for i in range(3):
@ -27,21 +32,21 @@ class DocumentStoreBaseTests:
Document( Document(
text=f"A Foo Document {i}", text=f"A Foo Document {i}",
metadata={"name": f"name_{i}", "page": "100", "chapter": "intro", "number": 2}, metadata={"name": f"name_{i}", "page": "100", "chapter": "intro", "number": 2},
embedding=np.random.rand(768).astype(np.float32), embedding=_random_embeddings(768),
) )
) )
documents.append( documents.append(
Document( Document(
text=f"A Bar Document {i}", text=f"A Bar Document {i}",
metadata={"name": f"name_{i}", "page": "123", "chapter": "abstract", "number": -2}, metadata={"name": f"name_{i}", "page": "123", "chapter": "abstract", "number": -2},
embedding=np.random.rand(768).astype(np.float32), embedding=_random_embeddings(768),
) )
) )
documents.append( documents.append(
Document( Document(
text=f"A Foobar Document {i}", text=f"A Foobar Document {i}",
metadata={"name": f"name_{i}", "page": "90", "chapter": "conclusion", "number": -10}, metadata={"name": f"name_{i}", "page": "90", "chapter": "conclusion", "number": -10},
embedding=np.random.rand(768).astype(np.float32), embedding=_random_embeddings(768),
) )
) )
documents.append( documents.append(
@ -209,11 +214,9 @@ class DocumentStoreBaseTests:
@pytest.mark.unit @pytest.mark.unit
def test_eq_filter_embedding(self, docstore: DocumentStore, filterable_docs: List[Document]): def test_eq_filter_embedding(self, docstore: DocumentStore, filterable_docs: List[Document]):
docstore.write_documents(filterable_docs) docstore.write_documents(filterable_docs)
embedding = np.zeros(768).astype(np.float32) embedding = [0.0] * 768
result = docstore.filter_documents(filters={"embedding": embedding}) result = docstore.filter_documents(filters={"embedding": embedding})
assert self.contains_same_docs( assert self.contains_same_docs(result, [doc for doc in filterable_docs if embedding == doc.embedding])
result, [doc for doc in filterable_docs if np.array_equal(embedding, doc.embedding)] # type: ignore
)
@pytest.mark.unit @pytest.mark.unit
def test_in_filter_explicit(self, docstore: DocumentStore, filterable_docs: List[Document]): def test_in_filter_explicit(self, docstore: DocumentStore, filterable_docs: List[Document]):
@ -248,17 +251,12 @@ class DocumentStoreBaseTests:
@pytest.mark.unit @pytest.mark.unit
def test_in_filter_embedding(self, docstore: DocumentStore, filterable_docs: List[Document]): def test_in_filter_embedding(self, docstore: DocumentStore, filterable_docs: List[Document]):
docstore.write_documents(filterable_docs) docstore.write_documents(filterable_docs)
embedding_zero = np.zeros(768, np.float32) embedding_zero = [0.0] * 768
embedding_one = np.ones(768, np.float32) embedding_one = [1.0] * 768
result = docstore.filter_documents(filters={"embedding": {"$in": [embedding_zero, embedding_one]}}) result = docstore.filter_documents(filters={"embedding": {"$in": [embedding_zero, embedding_one]}})
assert self.contains_same_docs( assert self.contains_same_docs(
result, result,
[ [doc for doc in filterable_docs if (embedding_zero == doc.embedding or embedding_one == doc.embedding)],
doc
for doc in filterable_docs
if isinstance(doc.embedding, np.ndarray)
and (np.array_equal(embedding_zero, doc.embedding) or np.array_equal(embedding_one, doc.embedding))
],
) )
@pytest.mark.unit @pytest.mark.unit

View File

@ -0,0 +1,4 @@
---
preview:
- |
Change `Document`'s `embedding` field type from `numpy.ndarray` to `List[float]`

View File

@ -118,9 +118,9 @@ class TestMemoryEmbeddingRetriever:
top_k = 3 top_k = 3
ds = InMemoryDocumentStore(embedding_similarity_function="cosine") ds = InMemoryDocumentStore(embedding_similarity_function="cosine")
docs = [ docs = [
Document(text="my document", embedding=np.array([0.1, 0.2, 0.3, 0.4])), Document(text="my document", embedding=[0.1, 0.2, 0.3, 0.4]),
Document(text="another document", embedding=np.array([1.0, 1.0, 1.0, 1.0])), Document(text="another document", embedding=[1.0, 1.0, 1.0, 1.0]),
Document(text="third document", embedding=np.array([0.5, 0.7, 0.5, 0.7])), Document(text="third document", embedding=[0.5, 0.7, 0.5, 0.7]),
] ]
ds.write_documents(docs) ds.write_documents(docs)
@ -142,9 +142,9 @@ class TestMemoryEmbeddingRetriever:
ds = InMemoryDocumentStore(embedding_similarity_function="cosine") ds = InMemoryDocumentStore(embedding_similarity_function="cosine")
top_k = 2 top_k = 2
docs = [ docs = [
Document(text="my document", embedding=np.array([0.1, 0.2, 0.3, 0.4])), Document(text="my document", embedding=[0.1, 0.2, 0.3, 0.4]),
Document(text="another document", embedding=np.array([1.0, 1.0, 1.0, 1.0])), Document(text="another document", embedding=[1.0, 1.0, 1.0, 1.0]),
Document(text="third document", embedding=np.array([0.5, 0.7, 0.5, 0.7])), Document(text="third document", embedding=[0.5, 0.7, 0.5, 0.7]),
] ]
ds.write_documents(docs) ds.write_documents(docs)
retriever = InMemoryEmbeddingRetriever(ds, top_k=top_k) retriever = InMemoryEmbeddingRetriever(ds, top_k=top_k)
@ -152,7 +152,7 @@ class TestMemoryEmbeddingRetriever:
pipeline = Pipeline() pipeline = Pipeline()
pipeline.add_component("retriever", retriever) pipeline.add_component("retriever", retriever)
result: Dict[str, Any] = pipeline.run( result: Dict[str, Any] = pipeline.run(
data={"retriever": {"query_embedding": np.array([0.1, 0.1, 0.1, 0.1]), "return_embedding": True}} data={"retriever": {"query_embedding": [0.1, 0.1, 0.1, 0.1], "return_embedding": True}}
) )
assert result assert result

View File

@ -71,8 +71,8 @@ def test_equality_with_metadata_with_objects():
return True return True
foo = TestObject() foo = TestObject()
doc1 = Document(text="test text", metadata={"value": np.array([0, 1, 2]), "path": Path("."), "obj": foo}) doc1 = Document(text="test text", metadata={"value": [0, 1, 2], "path": Path("."), "obj": foo})
doc2 = Document(text="test text", metadata={"value": np.array([0, 1, 2]), "path": Path("."), "obj": foo}) doc2 = Document(text="test text", metadata={"value": [0, 1, 2], "path": Path("."), "obj": foo})
assert doc1 == doc2 assert doc1 == doc2
@ -107,7 +107,7 @@ def test_full_document_to_dict():
mime_type="application/pdf", mime_type="application/pdf",
metadata={"some": "values", "test": 10}, metadata={"some": "values", "test": 10},
score=0.99, score=0.99,
embedding=np.zeros([10, 10]), embedding=[10, 10],
) )
dictionary = doc.to_dict() dictionary = doc.to_dict()
@ -121,7 +121,7 @@ def test_full_document_to_dict():
assert blob == doc.blob assert blob == doc.blob
embedding = dictionary.pop("embedding") embedding = dictionary.pop("embedding")
assert (embedding == doc.embedding).all() assert embedding == doc.embedding
assert dictionary == { assert dictionary == {
"id": doc.id, "id": doc.id,
@ -134,7 +134,7 @@ def test_full_document_to_dict():
@pytest.mark.unit @pytest.mark.unit
def test_document_with_most_attributes_from_dict(): def test_document_with_most_attributes_from_dict():
embedding = np.zeros([10, 10]) embedding = [10, 10]
assert Document.from_dict( assert Document.from_dict(
{ {
"text": "test text", "text": "test text",
@ -194,7 +194,7 @@ def test_full_document_to_json(tmp_path):
mime_type="application/pdf", mime_type="application/pdf",
metadata={"some object": TestClass(), "a path": tmp_path / "test.txt"}, metadata={"some object": TestClass(), "a path": tmp_path / "test.txt"},
score=0.5, score=0.5,
embedding=np.array([1, 2, 3, 4]), embedding=[1, 2, 3, 4],
) )
assert doc_1.to_json() == json.dumps( assert doc_1.to_json() == json.dumps(
{ {
@ -241,7 +241,7 @@ def test_full_document_from_json(tmp_path):
# Note the object serialization # Note the object serialization
metadata={"some object": "<the object>", "a path": str((tmp_path / "test.txt").absolute())}, metadata={"some object": "<the object>", "a path": str((tmp_path / "test.txt").absolute())},
score=0.5, score=0.5,
embedding=np.array([1, 2, 3, 4]), embedding=[1, 2, 3, 4],
) )

View File

@ -135,6 +135,10 @@ class TestMemoryDocumentStore(DocumentStoreBaseTests):
results = docstore.bm25_retrieval(query="Python", top_k=1) results = docstore.bm25_retrieval(query="Python", top_k=1)
assert results[0].text == "Python is a popular programming language" assert results[0].text == "Python is a popular programming language"
@pytest.mark.skip(reason="Filter is not working properly, see https://github.com/deepset-ai/haystack/issues/6153")
def test_eq_filter_embedding(self, docstore: DocumentStore, filterable_docs):
pass
# Test a query, add a new document and make sure results are appropriately updated # Test a query, add a new document and make sure results are appropriately updated
@pytest.mark.unit @pytest.mark.unit
def test_bm25_retrieval_with_updated_docs(self, docstore: DocumentStore): def test_bm25_retrieval_with_updated_docs(self, docstore: DocumentStore):
@ -256,12 +260,12 @@ class TestMemoryDocumentStore(DocumentStoreBaseTests):
docstore = InMemoryDocumentStore(embedding_similarity_function="cosine") docstore = InMemoryDocumentStore(embedding_similarity_function="cosine")
# Tests if the embedding retrieval method returns the correct document based on the input query embedding. # Tests if the embedding retrieval method returns the correct document based on the input query embedding.
docs = [ docs = [
Document(text="Hello world", embedding=np.array([0.1, 0.2, 0.3, 0.4])), Document(text="Hello world", embedding=[0.1, 0.2, 0.3, 0.4]),
Document(text="Haystack supports multiple languages", embedding=np.array([1.0, 1.0, 1.0, 1.0])), Document(text="Haystack supports multiple languages", embedding=[1.0, 1.0, 1.0, 1.0]),
] ]
docstore.write_documents(docs) docstore.write_documents(docs)
results = docstore.embedding_retrieval( results = docstore.embedding_retrieval(
query_embedding=np.array([0.1, 0.1, 0.1, 0.1]), top_k=1, filters={}, scale_score=False query_embedding=[0.1, 0.1, 0.1, 0.1], top_k=1, filters={}, scale_score=False
) )
assert len(results) == 1 assert len(results) == 1
assert results[0].text == "Haystack supports multiple languages" assert results[0].text == "Haystack supports multiple languages"
@ -280,7 +284,7 @@ class TestMemoryDocumentStore(DocumentStoreBaseTests):
docstore = InMemoryDocumentStore() docstore = InMemoryDocumentStore()
docs = [Document(text="Hello world"), Document(text="Haystack supports multiple languages")] docs = [Document(text="Hello world"), Document(text="Haystack supports multiple languages")]
docstore.write_documents(docs) docstore.write_documents(docs)
results = docstore.embedding_retrieval(query_embedding=np.array([0.1, 0.1, 0.1, 0.1])) results = docstore.embedding_retrieval(query_embedding=[0.1, 0.1, 0.1, 0.1])
assert len(results) == 0 assert len(results) == 0
assert "No Documents found with embeddings. Returning empty list." in caplog.text assert "No Documents found with embeddings. Returning empty list." in caplog.text
@ -289,29 +293,29 @@ class TestMemoryDocumentStore(DocumentStoreBaseTests):
caplog.set_level(logging.INFO) caplog.set_level(logging.INFO)
docstore = InMemoryDocumentStore() docstore = InMemoryDocumentStore()
docs = [ docs = [
Document(text="Hello world", embedding=np.array([0.1, 0.2, 0.3, 0.4])), Document(text="Hello world", embedding=[0.1, 0.2, 0.3, 0.4]),
Document(text="Haystack supports multiple languages"), Document(text="Haystack supports multiple languages"),
] ]
docstore.write_documents(docs) docstore.write_documents(docs)
docstore.embedding_retrieval(query_embedding=np.array([0.1, 0.1, 0.1, 0.1])) docstore.embedding_retrieval(query_embedding=[0.1, 0.1, 0.1, 0.1])
assert "Skipping some Documents that don't have an embedding." in caplog.text assert "Skipping some Documents that don't have an embedding." in caplog.text
@pytest.mark.unit @pytest.mark.unit
def test_embedding_retrieval_documents_different_embedding_sizes(self): def test_embedding_retrieval_documents_different_embedding_sizes(self):
docstore = InMemoryDocumentStore() docstore = InMemoryDocumentStore()
docs = [ docs = [
Document(text="Hello world", embedding=np.array([0.1, 0.2, 0.3, 0.4])), Document(text="Hello world", embedding=[0.1, 0.2, 0.3, 0.4]),
Document(text="Haystack supports multiple languages", embedding=np.array([1.0, 1.0])), Document(text="Haystack supports multiple languages", embedding=np.array([1.0, 1.0])),
] ]
docstore.write_documents(docs) docstore.write_documents(docs)
with pytest.raises(DocumentStoreError, match="The embedding size of all Documents should be the same."): with pytest.raises(DocumentStoreError, match="The embedding size of all Documents should be the same."):
docstore.embedding_retrieval(query_embedding=np.array([0.1, 0.1, 0.1, 0.1])) docstore.embedding_retrieval(query_embedding=[0.1, 0.1, 0.1, 0.1])
@pytest.mark.unit @pytest.mark.unit
def test_embedding_retrieval_query_documents_different_embedding_sizes(self): def test_embedding_retrieval_query_documents_different_embedding_sizes(self):
docstore = InMemoryDocumentStore() docstore = InMemoryDocumentStore()
docs = [Document(text="Hello world", embedding=np.array([0.1, 0.2, 0.3, 0.4]))] docs = [Document(text="Hello world", embedding=[0.1, 0.2, 0.3, 0.4])]
docstore.write_documents(docs) docstore.write_documents(docs)
with pytest.raises( with pytest.raises(
@ -324,69 +328,61 @@ class TestMemoryDocumentStore(DocumentStoreBaseTests):
def test_embedding_retrieval_with_different_top_k(self): def test_embedding_retrieval_with_different_top_k(self):
docstore = InMemoryDocumentStore() docstore = InMemoryDocumentStore()
docs = [ docs = [
Document(text="Hello world", embedding=np.array([0.1, 0.2, 0.3, 0.4])), Document(text="Hello world", embedding=[0.1, 0.2, 0.3, 0.4]),
Document(text="Haystack supports multiple languages", embedding=np.array([1.0, 1.0, 1.0, 1.0])), Document(text="Haystack supports multiple languages", embedding=[1.0, 1.0, 1.0, 1.0]),
Document(text="Python is a popular programming language", embedding=np.array([0.5, 0.5, 0.5, 0.5])), Document(text="Python is a popular programming language", embedding=[0.5, 0.5, 0.5, 0.5]),
] ]
docstore.write_documents(docs) docstore.write_documents(docs)
results = docstore.embedding_retrieval(query_embedding=np.array([0.1, 0.1, 0.1, 0.1]), top_k=2) results = docstore.embedding_retrieval(query_embedding=[0.1, 0.1, 0.1, 0.1], top_k=2)
assert len(results) == 2 assert len(results) == 2
results = docstore.embedding_retrieval(query_embedding=np.array([0.1, 0.1, 0.1, 0.1]), top_k=3) results = docstore.embedding_retrieval(query_embedding=[0.1, 0.1, 0.1, 0.1], top_k=3)
assert len(results) == 3 assert len(results) == 3
@pytest.mark.unit @pytest.mark.unit
def test_embedding_retrieval_with_scale_score(self): def test_embedding_retrieval_with_scale_score(self):
docstore = InMemoryDocumentStore() docstore = InMemoryDocumentStore()
docs = [ docs = [
Document(text="Hello world", embedding=np.array([0.1, 0.2, 0.3, 0.4])), Document(text="Hello world", embedding=[0.1, 0.2, 0.3, 0.4]),
Document(text="Haystack supports multiple languages", embedding=np.array([1.0, 1.0, 1.0, 1.0])), Document(text="Haystack supports multiple languages", embedding=[1.0, 1.0, 1.0, 1.0]),
Document(text="Python is a popular programming language", embedding=np.array([0.5, 0.5, 0.5, 0.5])), Document(text="Python is a popular programming language", embedding=[0.5, 0.5, 0.5, 0.5]),
] ]
docstore.write_documents(docs) docstore.write_documents(docs)
results1 = docstore.embedding_retrieval( results1 = docstore.embedding_retrieval(query_embedding=[0.1, 0.1, 0.1, 0.1], top_k=1, scale_score=True)
query_embedding=np.array([0.1, 0.1, 0.1, 0.1]), top_k=1, scale_score=True
)
# Confirm that score is scaled between 0 and 1 # Confirm that score is scaled between 0 and 1
assert 0 <= results1[0].score <= 1 assert 0 <= results1[0].score <= 1
# Same query, different scale, scores differ when not scaled # Same query, different scale, scores differ when not scaled
results = docstore.embedding_retrieval( results = docstore.embedding_retrieval(query_embedding=[0.1, 0.1, 0.1, 0.1], top_k=1, scale_score=False)
query_embedding=np.array([0.1, 0.1, 0.1, 0.1]), top_k=1, scale_score=False
)
assert results[0].score != results1[0].score assert results[0].score != results1[0].score
@pytest.mark.unit @pytest.mark.unit
def test_embedding_retrieval_return_embedding(self): def test_embedding_retrieval_return_embedding(self):
docstore = InMemoryDocumentStore(embedding_similarity_function="cosine") docstore = InMemoryDocumentStore(embedding_similarity_function="cosine")
docs = [ docs = [
Document(text="Hello world", embedding=np.array([0.1, 0.2, 0.3, 0.4])), Document(text="Hello world", embedding=[0.1, 0.2, 0.3, 0.4]),
Document(text="Haystack supports multiple languages", embedding=np.array([1.0, 1.0, 1.0, 1.0])), Document(text="Haystack supports multiple languages", embedding=[1.0, 1.0, 1.0, 1.0]),
] ]
docstore.write_documents(docs) docstore.write_documents(docs)
results = docstore.embedding_retrieval( results = docstore.embedding_retrieval(query_embedding=[0.1, 0.1, 0.1, 0.1], top_k=1, return_embedding=False)
query_embedding=np.array([0.1, 0.1, 0.1, 0.1]), top_k=1, return_embedding=False
)
assert results[0].embedding is None assert results[0].embedding is None
results = docstore.embedding_retrieval( results = docstore.embedding_retrieval(query_embedding=[0.1, 0.1, 0.1, 0.1], top_k=1, return_embedding=True)
query_embedding=np.array([0.1, 0.1, 0.1, 0.1]), top_k=1, return_embedding=True assert results[0].embedding == [1.0, 1.0, 1.0, 1.0]
)
assert (results[0].embedding == np.array([1.0, 1.0, 1.0, 1.0])).all()
@pytest.mark.unit @pytest.mark.unit
def test_compute_cosine_similarity_scores(self): def test_compute_cosine_similarity_scores(self):
docstore = InMemoryDocumentStore(embedding_similarity_function="cosine") docstore = InMemoryDocumentStore(embedding_similarity_function="cosine")
docs = [ docs = [
Document(text="Document 1", embedding=np.array([1.0, 0.0, 0.0, 0.0])), Document(text="Document 1", embedding=[1.0, 0.0, 0.0, 0.0]),
Document(text="Document 2", embedding=np.array([1.0, 1.0, 1.0, 1.0])), Document(text="Document 2", embedding=[1.0, 1.0, 1.0, 1.0]),
] ]
scores = docstore._compute_query_embedding_similarity_scores( scores = docstore._compute_query_embedding_similarity_scores(
embedding=np.array([0.1, 0.1, 0.1, 0.1]), documents=docs, scale_score=False embedding=[0.1, 0.1, 0.1, 0.1], documents=docs, scale_score=False
) )
assert scores == [0.5, 1.0] assert scores == [0.5, 1.0]
@ -394,11 +390,11 @@ class TestMemoryDocumentStore(DocumentStoreBaseTests):
def test_compute_dot_product_similarity_scores(self): def test_compute_dot_product_similarity_scores(self):
docstore = InMemoryDocumentStore(embedding_similarity_function="dot_product") docstore = InMemoryDocumentStore(embedding_similarity_function="dot_product")
docs = [ docs = [
Document(text="Document 1", embedding=np.array([1.0, 0.0, 0.0, 0.0])), Document(text="Document 1", embedding=[1.0, 0.0, 0.0, 0.0]),
Document(text="Document 2", embedding=np.array([1.0, 1.0, 1.0, 1.0])), Document(text="Document 2", embedding=[1.0, 1.0, 1.0, 1.0]),
] ]
scores = docstore._compute_query_embedding_similarity_scores( scores = docstore._compute_query_embedding_similarity_scores(
embedding=np.array([0.1, 0.1, 0.1, 0.1]), documents=docs, scale_score=False embedding=[0.1, 0.1, 0.1, 0.1], documents=docs, scale_score=False
) )
assert scores == [0.1, 0.4] assert scores == [0.1, 0.4]