mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-07-03 23:19:20 +00:00
fix: MetaFieldRanker
- use weight
if passed in the run
method (#7305)
* fix: - use if passed in the method * reno
This commit is contained in:
parent
b86490bb5a
commit
38a80b0235
@ -103,7 +103,7 @@ class MetaFieldRanker:
|
|||||||
"Parameter <weight> must be in range [0,1] but is currently set to '%s'.\n'0' disables sorting by a "
|
"Parameter <weight> must be in range [0,1] but is currently set to '%s'.\n'0' disables sorting by a "
|
||||||
"meta field, '0.5' assigns equal weight to the previous relevance scores and the meta field, and "
|
"meta field, '0.5' assigns equal weight to the previous relevance scores and the meta field, and "
|
||||||
"'1' ranks by the meta field only.\nChange the <weight> parameter to a value in range 0 to 1 when "
|
"'1' ranks by the meta field only.\nChange the <weight> parameter to a value in range 0 to 1 when "
|
||||||
"initializing the MetaFieldRanker." % self.weight
|
"initializing the MetaFieldRanker." % weight
|
||||||
)
|
)
|
||||||
|
|
||||||
if ranking_mode not in ["reciprocal_rank_fusion", "linear_score"]:
|
if ranking_mode not in ["reciprocal_rank_fusion", "linear_score"]:
|
||||||
@ -250,7 +250,7 @@ class MetaFieldRanker:
|
|||||||
# Add the docs missing the meta_field back on the end
|
# Add the docs missing the meta_field back on the end
|
||||||
sorted_by_meta = [doc for meta, doc in tuple_sorted_by_meta]
|
sorted_by_meta = [doc for meta, doc in tuple_sorted_by_meta]
|
||||||
sorted_documents = sorted_by_meta + docs_missing_meta_field
|
sorted_documents = sorted_by_meta + docs_missing_meta_field
|
||||||
sorted_documents = self._merge_rankings(documents, sorted_documents)
|
sorted_documents = self._merge_rankings(documents, sorted_documents, weight)
|
||||||
return {"documents": sorted_documents[:top_k]}
|
return {"documents": sorted_documents[:top_k]}
|
||||||
|
|
||||||
def _parse_meta(
|
def _parse_meta(
|
||||||
@ -295,7 +295,9 @@ class MetaFieldRanker:
|
|||||||
|
|
||||||
return meta_values
|
return meta_values
|
||||||
|
|
||||||
def _merge_rankings(self, documents: List[Document], sorted_documents: List[Document]) -> List[Document]:
|
def _merge_rankings(
|
||||||
|
self, documents: List[Document], sorted_documents: List[Document], weight: float
|
||||||
|
) -> List[Document]:
|
||||||
"""
|
"""
|
||||||
Merge the two different rankings for Documents sorted both by their content and by their meta field.
|
Merge the two different rankings for Documents sorted both by their content and by their meta field.
|
||||||
"""
|
"""
|
||||||
@ -303,8 +305,8 @@ class MetaFieldRanker:
|
|||||||
|
|
||||||
if self.ranking_mode == "reciprocal_rank_fusion":
|
if self.ranking_mode == "reciprocal_rank_fusion":
|
||||||
for i, (document, sorted_doc) in enumerate(zip(documents, sorted_documents)):
|
for i, (document, sorted_doc) in enumerate(zip(documents, sorted_documents)):
|
||||||
scores_map[document.id] += self._calculate_rrf(rank=i) * (1 - self.weight)
|
scores_map[document.id] += self._calculate_rrf(rank=i) * (1 - weight)
|
||||||
scores_map[sorted_doc.id] += self._calculate_rrf(rank=i) * self.weight
|
scores_map[sorted_doc.id] += self._calculate_rrf(rank=i) * weight
|
||||||
elif self.ranking_mode == "linear_score":
|
elif self.ranking_mode == "linear_score":
|
||||||
for i, (document, sorted_doc) in enumerate(zip(documents, sorted_documents)):
|
for i, (document, sorted_doc) in enumerate(zip(documents, sorted_documents)):
|
||||||
score = float(0)
|
score = float(0)
|
||||||
@ -319,8 +321,8 @@ class MetaFieldRanker:
|
|||||||
else:
|
else:
|
||||||
score = document.score
|
score = document.score
|
||||||
|
|
||||||
scores_map[document.id] += score * (1 - self.weight)
|
scores_map[document.id] += score * (1 - weight)
|
||||||
scores_map[sorted_doc.id] += self._calc_linear_score(rank=i, amount=len(sorted_documents)) * self.weight
|
scores_map[sorted_doc.id] += self._calc_linear_score(rank=i, amount=len(sorted_documents)) * weight
|
||||||
|
|
||||||
for document in documents:
|
for document in documents:
|
||||||
document.score = scores_map[document.id]
|
document.score = scores_map[document.id]
|
||||||
|
@ -0,0 +1,4 @@
|
|||||||
|
---
|
||||||
|
fixes:
|
||||||
|
- |
|
||||||
|
Fix a bug in the `MetaFieldRanker` where the `weight` parameter passed to the `run` method was not being used.
|
@ -43,6 +43,16 @@ class TestMetaFieldRanker:
|
|||||||
sorted_scores = sorted([doc.meta["rating"] for doc in docs_after], reverse=True)
|
sorted_scores = sorted([doc.meta["rating"] for doc in docs_after], reverse=True)
|
||||||
assert [doc.meta["rating"] for doc in docs_after] == sorted_scores
|
assert [doc.meta["rating"] for doc in docs_after] == sorted_scores
|
||||||
|
|
||||||
|
def test_run_with_weight_equal_to_1_passed_in_run_method(self):
|
||||||
|
ranker = MetaFieldRanker(meta_field="rating", weight=0.0)
|
||||||
|
docs_before = [Document(content="abc", meta={"rating": value}) for value in [1.1, 0.5, 2.3]]
|
||||||
|
output = ranker.run(documents=docs_before, weight=1.0)
|
||||||
|
docs_after = output["documents"]
|
||||||
|
|
||||||
|
assert len(docs_after) == 3
|
||||||
|
sorted_scores = sorted([doc.meta["rating"] for doc in docs_after], reverse=True)
|
||||||
|
assert [doc.meta["rating"] for doc in docs_after] == sorted_scores
|
||||||
|
|
||||||
def test_sort_order_ascending(self):
|
def test_sort_order_ascending(self):
|
||||||
ranker = MetaFieldRanker(meta_field="rating", weight=1.0, sort_order="ascending")
|
ranker = MetaFieldRanker(meta_field="rating", weight=1.0, sort_order="ascending")
|
||||||
docs_before = [Document(content="abc", meta={"rating": value}) for value in [1.1, 0.5, 2.3]]
|
docs_before = [Document(content="abc", meta={"rating": value}) for value in [1.1, 0.5, 2.3]]
|
||||||
|
Loading…
x
Reference in New Issue
Block a user