fix: changing document scores (#5090)

* #4653 fix changing scores by returning new document objects from document store queries * added integration test for InMemoryDocumentStore demonstrating the desired behavior * Update test/document_stores/test_memory.py
2025-10-29 16:59:47 +00:00 · 2023-06-14 17:35:46 +02:00 · 2023-06-14 17:35:46 +02:00 · 60e5d73424
commit 60e5d73424
parent 58c022ef86
4 changed files with 33 additions and 9 deletions
--- a/haystack/document_stores/faiss.py
+++ b/haystack/document_stores/faiss.py
@ -1,3 +1,4 @@
+import copy
 from typing import Union, List, Optional, Dict, Generator

 import json
@ -634,16 +635,18 @@ class FAISSDocumentStore(SQLDocumentStore):
        scores_for_vector_ids: Dict[str, float] = {
            str(v_id): s for v_id, s in zip(vector_id_matrix[0], score_matrix[0])
        }
+        return_documents = []
        for doc in documents:
            score = scores_for_vector_ids[doc.meta["vector_id"]]
            if scale_score:
                score = self.scale_to_unit_interval(score, self.similarity)
            doc.score = score
-
            if return_embedding is True:
                doc.embedding = self.faiss_indexes[index].reconstruct(int(doc.meta["vector_id"]))
+            return_document = copy.copy(doc)
+            return_documents.append(return_document)

-        return documents
+        return return_documents

    def save(self, index_path: Union[str, Path], config_path: Optional[Union[str, Path]] = None):
        """
--- a/haystack/document_stores/memory.py
+++ b/haystack/document_stores/memory.py
@ -1,3 +1,4 @@
+import copy
 from typing import Any, Dict, List, Optional, Union, Generator, Literal

 import time
@ -959,7 +960,7 @@ class InMemoryDocumentStore(KeywordDocumentStore):
        scale_score: bool = True,
    ) -> List[Document]:
        """
-        Scan through documents in DocumentStore and return a small number documents
+        Scan through documents in DocumentStore and return a small number of documents
        that are most relevant to the query as defined by the BM25 algorithm.
        :param query: The query.
        :param top_k: How many documents to return per query.
@ -995,13 +996,13 @@ class InMemoryDocumentStore(KeywordDocumentStore):
        top_docs_positions = np.argsort(docs_scores)[::-1][:top_k]

        textual_docs_list = [doc for doc in self.indexes[index].values() if doc.content_type in ["text", "table"]]
-        top_docs = []
+        return_documents = []
        for i in top_docs_positions:
            doc = textual_docs_list[i]
            doc.score = docs_scores[i]
-            top_docs.append(doc)
-
-        return top_docs
+            return_document = copy.copy(doc)
+            return_documents.append(return_document)
+        return return_documents

    def query_batch(
        self,
@ -1015,7 +1016,7 @@ class InMemoryDocumentStore(KeywordDocumentStore):
        scale_score: bool = True,
    ) -> List[List[Document]]:
        """
-        Scan through documents in DocumentStore and return a small number documents
+        Scan through documents in DocumentStore and return a small number of documents
        that are most relevant to the provided queries as defined by keyword matching algorithms like BM25.
        This method lets you find relevant documents for list of query strings (output: List of Lists of Documents).
        :param query: The query.
--- a/haystack/document_stores/pinecone.py
+++ b/haystack/document_stores/pinecone.py
@ -1,3 +1,4 @@
+import copy
 import json
 from typing import Set, Union, List, Optional, Dict, Generator, Any

@ -1181,13 +1182,16 @@ class PineconeDocumentStore(BaseDocumentStore):

        # assign query score to each document
        scores_for_vector_ids: Dict[str, float] = {str(v_id): s for v_id, s in zip(vector_id_matrix, score_matrix)}
+        return_documents = []
        for doc in documents:
            score = scores_for_vector_ids[doc.id]
            if scale_score:
                score = self.scale_to_unit_interval(score, self.similarity)
            doc.score = score
+            return_document = copy.copy(doc)
+            return_documents.append(return_document)

-        return documents
+        return return_documents

    def _get_documents_by_meta(
        self,
--- a/test/document_stores/test_memory.py
+++ b/test/document_stores/test_memory.py
@ -1,4 +1,5 @@
 import logging
+from copy import deepcopy

 import pandas as pd
 import pytest
@ -6,6 +7,7 @@ from rank_bm25 import BM25
 import numpy as np

 from haystack.document_stores.memory import InMemoryDocumentStore
+from haystack.nodes import BM25Retriever
 from haystack.schema import Document
 from haystack.testing import DocumentStoreBaseTestAbstract

@ -124,3 +126,17 @@ class TestInMemoryDocumentStore(DocumentStoreBaseTestAbstract):
            docs = ds.query_by_embedding(query_emb=query_embedding, top_k=1)
            assert "Skipping some of your documents that don't have embeddings" in caplog.text
        assert len(docs) == 0
+
+    @pytest.mark.integration
+    def test_bm25_scores_not_changing_across_queries(self, ds, documents):
+        """Test that computed scores which are returned to the user should not change when running multiple queries."""
+        ds.write_documents(documents)
+        retriever = BM25Retriever(ds, scale_score=False)
+        queries = ["What is a Foo Document?", "What is a Bar Document?", "Tell me about a document without embeddings"]
+        results_direct = []
+        results_direct = [retriever.retrieve(query) for query in queries]
+        results_copied = [deepcopy(retriever.retrieve(query)) for query in queries]
+        scores_direct = [rd.score for rds in results_direct for rd in rds]
+        scores_copied = [rc.score for rcs in results_copied for rc in rcs]
+
+        assert scores_direct == scores_copied