mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-12-16 17:48:19 +00:00
bug: fix MRR and MAP calculations (#7841)
* bug: fix MRR and MAP calculations
This commit is contained in:
parent
c51f8ffb86
commit
fc011d7b04
@ -43,6 +43,7 @@ class DocumentMAPEvaluator:
|
||||
```
|
||||
"""
|
||||
|
||||
# Refer to https://www.pinecone.io/learn/offline-evaluation/ for the algorithm.
|
||||
@component.output_types(score=float, individual_scores=List[float])
|
||||
def run(
|
||||
self, ground_truth_documents: List[List[Document]], retrieved_documents: List[List[Document]]
|
||||
@ -69,25 +70,21 @@ class DocumentMAPEvaluator:
|
||||
individual_scores = []
|
||||
|
||||
for ground_truth, retrieved in zip(ground_truth_documents, retrieved_documents):
|
||||
score = 0.0
|
||||
for ground_document in ground_truth:
|
||||
if ground_document.content is None:
|
||||
continue
|
||||
|
||||
average_precision = 0.0
|
||||
average_precision_numerator = 0.0
|
||||
relevant_documents = 0
|
||||
|
||||
ground_truth_contents = [doc.content for doc in ground_truth if doc.content is not None]
|
||||
for rank, retrieved_document in enumerate(retrieved):
|
||||
if retrieved_document.content is None:
|
||||
continue
|
||||
|
||||
if ground_document.content in retrieved_document.content:
|
||||
if retrieved_document.content in ground_truth_contents:
|
||||
relevant_documents += 1
|
||||
average_precision += relevant_documents / (rank + 1)
|
||||
average_precision_numerator += relevant_documents / (rank + 1)
|
||||
if relevant_documents > 0:
|
||||
score = average_precision / relevant_documents
|
||||
individual_scores.append(score)
|
||||
|
||||
score = sum(individual_scores) / len(retrieved_documents)
|
||||
average_precision = average_precision_numerator / relevant_documents
|
||||
individual_scores.append(average_precision)
|
||||
|
||||
score = sum(individual_scores) / len(ground_truth_documents)
|
||||
return {"score": score, "individual_scores": individual_scores}
|
||||
|
||||
@ -41,6 +41,7 @@ class DocumentMRREvaluator:
|
||||
```
|
||||
"""
|
||||
|
||||
# Refer to https://www.pinecone.io/learn/offline-evaluation/ for the algorithm.
|
||||
@component.output_types(score=float, individual_scores=List[float])
|
||||
def run(
|
||||
self, ground_truth_documents: List[List[Document]], retrieved_documents: List[List[Document]]
|
||||
@ -67,20 +68,17 @@ class DocumentMRREvaluator:
|
||||
individual_scores = []
|
||||
|
||||
for ground_truth, retrieved in zip(ground_truth_documents, retrieved_documents):
|
||||
score = 0.0
|
||||
for ground_document in ground_truth:
|
||||
if ground_document.content is None:
|
||||
continue
|
||||
reciprocal_rank = 0.0
|
||||
|
||||
ground_truth_contents = [doc.content for doc in ground_truth if doc.content is not None]
|
||||
for rank, retrieved_document in enumerate(retrieved):
|
||||
if retrieved_document.content is None:
|
||||
continue
|
||||
|
||||
if ground_document.content in retrieved_document.content:
|
||||
score = 1 / (rank + 1)
|
||||
if retrieved_document.content in ground_truth_contents:
|
||||
reciprocal_rank = 1 / (rank + 1)
|
||||
break
|
||||
individual_scores.append(score)
|
||||
individual_scores.append(reciprocal_rank)
|
||||
|
||||
score = sum(individual_scores) / len(retrieved_documents)
|
||||
score = sum(individual_scores) / len(ground_truth_documents)
|
||||
|
||||
return {"score": score, "individual_scores": individual_scores}
|
||||
|
||||
4
releasenotes/notes/fix-issue-7758-d35b687ca226a707.yaml
Normal file
4
releasenotes/notes/fix-issue-7758-d35b687ca226a707.yaml
Normal file
@ -0,0 +1,4 @@
|
||||
---
|
||||
fixes:
|
||||
- |
|
||||
Fixed the calculation for MRR and MAP scores.
|
||||
@ -62,7 +62,17 @@ def test_run_with_complex_data():
|
||||
],
|
||||
],
|
||||
)
|
||||
assert result == {"individual_scores": [1.0, 0.8333333333333333, 1.0, 0.5, 0.0, 1.0], "score": 0.7222222222222222}
|
||||
assert result == {
|
||||
"individual_scores": [
|
||||
1.0,
|
||||
pytest.approx(0.8333333333333333),
|
||||
1.0,
|
||||
pytest.approx(0.5833333333333333),
|
||||
0.0,
|
||||
pytest.approx(0.8055555555555555),
|
||||
],
|
||||
"score": pytest.approx(0.7037037037037037),
|
||||
}
|
||||
|
||||
|
||||
def test_run_with_different_lengths():
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user