fix: MetaFieldRanker - use weight if passed in the run method (#7305)

* fix:  - use  if passed in the  method

* reno
This commit is contained in:
Stefano Fiorucci 2024-03-05 12:13:56 +01:00 committed by GitHub
parent b86490bb5a
commit 38a80b0235
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 23 additions and 7 deletions

View File

@ -103,7 +103,7 @@ class MetaFieldRanker:
"Parameter <weight> must be in range [0,1] but is currently set to '%s'.\n'0' disables sorting by a " "Parameter <weight> must be in range [0,1] but is currently set to '%s'.\n'0' disables sorting by a "
"meta field, '0.5' assigns equal weight to the previous relevance scores and the meta field, and " "meta field, '0.5' assigns equal weight to the previous relevance scores and the meta field, and "
"'1' ranks by the meta field only.\nChange the <weight> parameter to a value in range 0 to 1 when " "'1' ranks by the meta field only.\nChange the <weight> parameter to a value in range 0 to 1 when "
"initializing the MetaFieldRanker." % self.weight "initializing the MetaFieldRanker." % weight
) )
if ranking_mode not in ["reciprocal_rank_fusion", "linear_score"]: if ranking_mode not in ["reciprocal_rank_fusion", "linear_score"]:
@ -250,7 +250,7 @@ class MetaFieldRanker:
# Add the docs missing the meta_field back on the end # Add the docs missing the meta_field back on the end
sorted_by_meta = [doc for meta, doc in tuple_sorted_by_meta] sorted_by_meta = [doc for meta, doc in tuple_sorted_by_meta]
sorted_documents = sorted_by_meta + docs_missing_meta_field sorted_documents = sorted_by_meta + docs_missing_meta_field
sorted_documents = self._merge_rankings(documents, sorted_documents) sorted_documents = self._merge_rankings(documents, sorted_documents, weight)
return {"documents": sorted_documents[:top_k]} return {"documents": sorted_documents[:top_k]}
def _parse_meta( def _parse_meta(
@ -295,7 +295,9 @@ class MetaFieldRanker:
return meta_values return meta_values
def _merge_rankings(self, documents: List[Document], sorted_documents: List[Document]) -> List[Document]: def _merge_rankings(
self, documents: List[Document], sorted_documents: List[Document], weight: float
) -> List[Document]:
""" """
Merge the two different rankings for Documents sorted both by their content and by their meta field. Merge the two different rankings for Documents sorted both by their content and by their meta field.
""" """
@ -303,8 +305,8 @@ class MetaFieldRanker:
if self.ranking_mode == "reciprocal_rank_fusion": if self.ranking_mode == "reciprocal_rank_fusion":
for i, (document, sorted_doc) in enumerate(zip(documents, sorted_documents)): for i, (document, sorted_doc) in enumerate(zip(documents, sorted_documents)):
scores_map[document.id] += self._calculate_rrf(rank=i) * (1 - self.weight) scores_map[document.id] += self._calculate_rrf(rank=i) * (1 - weight)
scores_map[sorted_doc.id] += self._calculate_rrf(rank=i) * self.weight scores_map[sorted_doc.id] += self._calculate_rrf(rank=i) * weight
elif self.ranking_mode == "linear_score": elif self.ranking_mode == "linear_score":
for i, (document, sorted_doc) in enumerate(zip(documents, sorted_documents)): for i, (document, sorted_doc) in enumerate(zip(documents, sorted_documents)):
score = float(0) score = float(0)
@ -319,8 +321,8 @@ class MetaFieldRanker:
else: else:
score = document.score score = document.score
scores_map[document.id] += score * (1 - self.weight) scores_map[document.id] += score * (1 - weight)
scores_map[sorted_doc.id] += self._calc_linear_score(rank=i, amount=len(sorted_documents)) * self.weight scores_map[sorted_doc.id] += self._calc_linear_score(rank=i, amount=len(sorted_documents)) * weight
for document in documents: for document in documents:
document.score = scores_map[document.id] document.score = scores_map[document.id]

View File

@ -0,0 +1,4 @@
---
fixes:
- |
Fix a bug in the `MetaFieldRanker` where the `weight` parameter passed to the `run` method was not being used.

View File

@ -43,6 +43,16 @@ class TestMetaFieldRanker:
sorted_scores = sorted([doc.meta["rating"] for doc in docs_after], reverse=True) sorted_scores = sorted([doc.meta["rating"] for doc in docs_after], reverse=True)
assert [doc.meta["rating"] for doc in docs_after] == sorted_scores assert [doc.meta["rating"] for doc in docs_after] == sorted_scores
def test_run_with_weight_equal_to_1_passed_in_run_method(self):
ranker = MetaFieldRanker(meta_field="rating", weight=0.0)
docs_before = [Document(content="abc", meta={"rating": value}) for value in [1.1, 0.5, 2.3]]
output = ranker.run(documents=docs_before, weight=1.0)
docs_after = output["documents"]
assert len(docs_after) == 3
sorted_scores = sorted([doc.meta["rating"] for doc in docs_after], reverse=True)
assert [doc.meta["rating"] for doc in docs_after] == sorted_scores
def test_sort_order_ascending(self): def test_sort_order_ascending(self):
ranker = MetaFieldRanker(meta_field="rating", weight=1.0, sort_order="ascending") ranker = MetaFieldRanker(meta_field="rating", weight=1.0, sort_order="ascending")
docs_before = [Document(content="abc", meta={"rating": value}) for value in [1.1, 0.5, 2.3]] docs_before = [Document(content="abc", meta={"rating": value}) for value in [1.1, 0.5, 2.3]]