feat!: Remove unused query parameter from MetaFieldRanker (#6300)

* Remove unused query parameter from MetaFieldRanker

* Add release notes
This commit is contained in:
Ashwin Mathur 2023-11-14 17:03:38 +05:30 committed by GitHub
parent 34136382c1
commit 4e4d5eb3e2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 16 additions and 15 deletions

View File

@ -25,7 +25,7 @@ class MetaFieldRanker:
Document(text="Barcelona", metadata={"rating": 2.1}),
]
output = ranker.run(query="", documents=docs)
output = ranker.run(documents=docs)
docs = output["documents"]
assert docs[0].text == "Barcelona"
"""
@ -45,7 +45,7 @@ class MetaFieldRanker:
0 disables sorting by metadata field.
0.5 content and metadata fields have the same impact.
1 means sorting only by metadata field, highest value comes first.
:param top_k: The maximum number of documents to return per query.
:param top_k: The maximum number of documents to return.
:param ranking_mode: The mode used to combine retriever and recentness.
Possible values are 'reciprocal_rank_fusion' (default) and 'linear_score'.
Make sure to use 'score' mode only with retrievers/rankers that give back OK score in range [0,1].
@ -90,14 +90,13 @@ class MetaFieldRanker:
)
@component.output_types(documents=List[Document])
def run(self, query: str, documents: List[Document], top_k: Optional[int] = None):
def run(self, documents: List[Document], top_k: Optional[int] = None):
"""
This method is used to rank a list of documents based on the selected metadata field by:
1. Sorting the documents by the metadata field in descending order.
2. Merging the scores from the metadata field with the scores from the previous component according to the strategy and weight provided.
3. Returning the top-k documents.
:param query: Not used in practice (so can be left blank), as this ranker does not perform sorting based on semantic closeness of documents to the query.
:param documents: Documents provided for ranking.
:param top_k: (optional) How many documents to return at the end. If not provided, all documents will be returned.
"""

View File

@ -0,0 +1,4 @@
---
preview:
- |
Removes the unused query parameter from the run method of MetaFieldRanker.

View File

@ -29,17 +29,15 @@ class TestMetaFieldRanker:
}
@pytest.mark.integration
@pytest.mark.parametrize(
"query, metafield_values, expected_first_value", [("", [1.3, 0.7, 2.1], 2.1), ("", [1, 5, 8], 8)]
)
def test_run(self, query, metafield_values, expected_first_value):
@pytest.mark.parametrize("metafield_values, expected_first_value", [([1.3, 0.7, 2.1], 2.1), ([1, 5, 8], 8)])
def test_run(self, metafield_values, expected_first_value):
"""
Test if the component ranks documents correctly.
"""
ranker = MetaFieldRanker(metadata_field="rating")
docs_before = [Document(content="abc", meta={"rating": value}) for value in metafield_values]
output = ranker.run(query=query, documents=docs_before)
output = ranker.run(documents=docs_before)
docs_after = output["documents"]
assert len(docs_after) == 3
@ -51,7 +49,7 @@ class TestMetaFieldRanker:
@pytest.mark.integration
def test_returns_empty_list_if_no_documents_are_provided(self):
ranker = MetaFieldRanker(metadata_field="rating")
output = ranker.run(query="", documents=[])
output = ranker.run(documents=[])
docs_after = output["documents"]
assert docs_after == []
@ -60,7 +58,7 @@ class TestMetaFieldRanker:
ranker = MetaFieldRanker(metadata_field="rating")
docs_before = [Document(content="abc", meta={"wrong_field": 1.3})]
with pytest.raises(ComponentError):
ranker.run(query="", documents=docs_before)
ranker.run(documents=docs_before)
@pytest.mark.integration
def test_raises_component_error_if_wrong_ranking_mode(self):
@ -81,7 +79,7 @@ class TestMetaFieldRanker:
Document(content="abc", meta={"rating": 0.7}, score=0.4),
Document(content="abc", meta={"rating": 2.1}, score=0.6),
]
output = ranker.run(query="", documents=docs_before)
output = ranker.run(documents=docs_before)
docs_after = output["documents"]
assert docs_after[0].score == 0.8
@ -93,7 +91,7 @@ class TestMetaFieldRanker:
Document(content="abc", meta={"rating": 0.7}, score=0.4),
Document(content="abc", meta={"rating": 2.1}, score=0.6),
]
output = ranker.run(query="", documents=docs_before)
output = ranker.run(documents=docs_before)
docs_after = output["documents"]
assert docs_after[0].score == 0.01626123744050767
@ -109,7 +107,7 @@ class TestMetaFieldRanker:
with pytest.warns(
UserWarning, match=rf"The score {score} for document 1 is outside the \[0,1\] range; defaulting to 0"
):
ranker.run(query="", documents=docs_before)
ranker.run(documents=docs_before)
@pytest.mark.integration
def test_linear_score_raises_raises_warning_if_doc_without_score(self):
@ -120,4 +118,4 @@ class TestMetaFieldRanker:
Document(content="abc", meta={"rating": 2.1}),
]
with pytest.warns(UserWarning, match="The score was not provided; defaulting to 0"):
ranker.run(query="", documents=docs_before)
ranker.run(documents=docs_before)