diff --git a/haystack/components/rankers/meta_field.py b/haystack/components/rankers/meta_field.py index f855eff7a..aa62ebf5b 100644 --- a/haystack/components/rankers/meta_field.py +++ b/haystack/components/rankers/meta_field.py @@ -103,7 +103,7 @@ class MetaFieldRanker: "Parameter must be in range [0,1] but is currently set to '%s'.\n'0' disables sorting by a " "meta field, '0.5' assigns equal weight to the previous relevance scores and the meta field, and " "'1' ranks by the meta field only.\nChange the parameter to a value in range 0 to 1 when " - "initializing the MetaFieldRanker." % self.weight + "initializing the MetaFieldRanker." % weight ) if ranking_mode not in ["reciprocal_rank_fusion", "linear_score"]: @@ -250,7 +250,7 @@ class MetaFieldRanker: # Add the docs missing the meta_field back on the end sorted_by_meta = [doc for meta, doc in tuple_sorted_by_meta] sorted_documents = sorted_by_meta + docs_missing_meta_field - sorted_documents = self._merge_rankings(documents, sorted_documents) + sorted_documents = self._merge_rankings(documents, sorted_documents, weight) return {"documents": sorted_documents[:top_k]} def _parse_meta( @@ -295,7 +295,9 @@ class MetaFieldRanker: return meta_values - def _merge_rankings(self, documents: List[Document], sorted_documents: List[Document]) -> List[Document]: + def _merge_rankings( + self, documents: List[Document], sorted_documents: List[Document], weight: float + ) -> List[Document]: """ Merge the two different rankings for Documents sorted both by their content and by their meta field. """ @@ -303,8 +305,8 @@ class MetaFieldRanker: if self.ranking_mode == "reciprocal_rank_fusion": for i, (document, sorted_doc) in enumerate(zip(documents, sorted_documents)): - scores_map[document.id] += self._calculate_rrf(rank=i) * (1 - self.weight) - scores_map[sorted_doc.id] += self._calculate_rrf(rank=i) * self.weight + scores_map[document.id] += self._calculate_rrf(rank=i) * (1 - weight) + scores_map[sorted_doc.id] += self._calculate_rrf(rank=i) * weight elif self.ranking_mode == "linear_score": for i, (document, sorted_doc) in enumerate(zip(documents, sorted_documents)): score = float(0) @@ -319,8 +321,8 @@ class MetaFieldRanker: else: score = document.score - scores_map[document.id] += score * (1 - self.weight) - scores_map[sorted_doc.id] += self._calc_linear_score(rank=i, amount=len(sorted_documents)) * self.weight + scores_map[document.id] += score * (1 - weight) + scores_map[sorted_doc.id] += self._calc_linear_score(rank=i, amount=len(sorted_documents)) * weight for document in documents: document.score = scores_map[document.id] diff --git a/releasenotes/notes/fix-metafieldranker-weight-in-run-66ce13191e596214.yaml b/releasenotes/notes/fix-metafieldranker-weight-in-run-66ce13191e596214.yaml new file mode 100644 index 000000000..e590bcc4a --- /dev/null +++ b/releasenotes/notes/fix-metafieldranker-weight-in-run-66ce13191e596214.yaml @@ -0,0 +1,4 @@ +--- +fixes: + - | + Fix a bug in the `MetaFieldRanker` where the `weight` parameter passed to the `run` method was not being used. diff --git a/test/components/rankers/test_metafield.py b/test/components/rankers/test_metafield.py index 4c44f7a6d..9514407e7 100644 --- a/test/components/rankers/test_metafield.py +++ b/test/components/rankers/test_metafield.py @@ -43,6 +43,16 @@ class TestMetaFieldRanker: sorted_scores = sorted([doc.meta["rating"] for doc in docs_after], reverse=True) assert [doc.meta["rating"] for doc in docs_after] == sorted_scores + def test_run_with_weight_equal_to_1_passed_in_run_method(self): + ranker = MetaFieldRanker(meta_field="rating", weight=0.0) + docs_before = [Document(content="abc", meta={"rating": value}) for value in [1.1, 0.5, 2.3]] + output = ranker.run(documents=docs_before, weight=1.0) + docs_after = output["documents"] + + assert len(docs_after) == 3 + sorted_scores = sorted([doc.meta["rating"] for doc in docs_after], reverse=True) + assert [doc.meta["rating"] for doc in docs_after] == sorted_scores + def test_sort_order_ascending(self): ranker = MetaFieldRanker(meta_field="rating", weight=1.0, sort_order="ascending") docs_before = [Document(content="abc", meta={"rating": value}) for value in [1.1, 0.5, 2.3]]