mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-12-16 09:38:07 +00:00
bug: fix MRR and MAP calculations (#7841)
* bug: fix MRR and MAP calculations
This commit is contained in:
parent
c51f8ffb86
commit
fc011d7b04
@ -43,6 +43,7 @@ class DocumentMAPEvaluator:
|
|||||||
```
|
```
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
# Refer to https://www.pinecone.io/learn/offline-evaluation/ for the algorithm.
|
||||||
@component.output_types(score=float, individual_scores=List[float])
|
@component.output_types(score=float, individual_scores=List[float])
|
||||||
def run(
|
def run(
|
||||||
self, ground_truth_documents: List[List[Document]], retrieved_documents: List[List[Document]]
|
self, ground_truth_documents: List[List[Document]], retrieved_documents: List[List[Document]]
|
||||||
@ -69,25 +70,21 @@ class DocumentMAPEvaluator:
|
|||||||
individual_scores = []
|
individual_scores = []
|
||||||
|
|
||||||
for ground_truth, retrieved in zip(ground_truth_documents, retrieved_documents):
|
for ground_truth, retrieved in zip(ground_truth_documents, retrieved_documents):
|
||||||
score = 0.0
|
average_precision = 0.0
|
||||||
for ground_document in ground_truth:
|
average_precision_numerator = 0.0
|
||||||
if ground_document.content is None:
|
relevant_documents = 0
|
||||||
|
|
||||||
|
ground_truth_contents = [doc.content for doc in ground_truth if doc.content is not None]
|
||||||
|
for rank, retrieved_document in enumerate(retrieved):
|
||||||
|
if retrieved_document.content is None:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
average_precision = 0.0
|
if retrieved_document.content in ground_truth_contents:
|
||||||
relevant_documents = 0
|
relevant_documents += 1
|
||||||
|
average_precision_numerator += relevant_documents / (rank + 1)
|
||||||
for rank, retrieved_document in enumerate(retrieved):
|
if relevant_documents > 0:
|
||||||
if retrieved_document.content is None:
|
average_precision = average_precision_numerator / relevant_documents
|
||||||
continue
|
individual_scores.append(average_precision)
|
||||||
|
|
||||||
if ground_document.content in retrieved_document.content:
|
|
||||||
relevant_documents += 1
|
|
||||||
average_precision += relevant_documents / (rank + 1)
|
|
||||||
if relevant_documents > 0:
|
|
||||||
score = average_precision / relevant_documents
|
|
||||||
individual_scores.append(score)
|
|
||||||
|
|
||||||
score = sum(individual_scores) / len(retrieved_documents)
|
|
||||||
|
|
||||||
|
score = sum(individual_scores) / len(ground_truth_documents)
|
||||||
return {"score": score, "individual_scores": individual_scores}
|
return {"score": score, "individual_scores": individual_scores}
|
||||||
|
|||||||
@ -41,6 +41,7 @@ class DocumentMRREvaluator:
|
|||||||
```
|
```
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
# Refer to https://www.pinecone.io/learn/offline-evaluation/ for the algorithm.
|
||||||
@component.output_types(score=float, individual_scores=List[float])
|
@component.output_types(score=float, individual_scores=List[float])
|
||||||
def run(
|
def run(
|
||||||
self, ground_truth_documents: List[List[Document]], retrieved_documents: List[List[Document]]
|
self, ground_truth_documents: List[List[Document]], retrieved_documents: List[List[Document]]
|
||||||
@ -67,20 +68,17 @@ class DocumentMRREvaluator:
|
|||||||
individual_scores = []
|
individual_scores = []
|
||||||
|
|
||||||
for ground_truth, retrieved in zip(ground_truth_documents, retrieved_documents):
|
for ground_truth, retrieved in zip(ground_truth_documents, retrieved_documents):
|
||||||
score = 0.0
|
reciprocal_rank = 0.0
|
||||||
for ground_document in ground_truth:
|
|
||||||
if ground_document.content is None:
|
ground_truth_contents = [doc.content for doc in ground_truth if doc.content is not None]
|
||||||
|
for rank, retrieved_document in enumerate(retrieved):
|
||||||
|
if retrieved_document.content is None:
|
||||||
continue
|
continue
|
||||||
|
if retrieved_document.content in ground_truth_contents:
|
||||||
|
reciprocal_rank = 1 / (rank + 1)
|
||||||
|
break
|
||||||
|
individual_scores.append(reciprocal_rank)
|
||||||
|
|
||||||
for rank, retrieved_document in enumerate(retrieved):
|
score = sum(individual_scores) / len(ground_truth_documents)
|
||||||
if retrieved_document.content is None:
|
|
||||||
continue
|
|
||||||
|
|
||||||
if ground_document.content in retrieved_document.content:
|
|
||||||
score = 1 / (rank + 1)
|
|
||||||
break
|
|
||||||
individual_scores.append(score)
|
|
||||||
|
|
||||||
score = sum(individual_scores) / len(retrieved_documents)
|
|
||||||
|
|
||||||
return {"score": score, "individual_scores": individual_scores}
|
return {"score": score, "individual_scores": individual_scores}
|
||||||
|
|||||||
4
releasenotes/notes/fix-issue-7758-d35b687ca226a707.yaml
Normal file
4
releasenotes/notes/fix-issue-7758-d35b687ca226a707.yaml
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
---
|
||||||
|
fixes:
|
||||||
|
- |
|
||||||
|
Fixed the calculation for MRR and MAP scores.
|
||||||
@ -62,7 +62,17 @@ def test_run_with_complex_data():
|
|||||||
],
|
],
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
assert result == {"individual_scores": [1.0, 0.8333333333333333, 1.0, 0.5, 0.0, 1.0], "score": 0.7222222222222222}
|
assert result == {
|
||||||
|
"individual_scores": [
|
||||||
|
1.0,
|
||||||
|
pytest.approx(0.8333333333333333),
|
||||||
|
1.0,
|
||||||
|
pytest.approx(0.5833333333333333),
|
||||||
|
0.0,
|
||||||
|
pytest.approx(0.8055555555555555),
|
||||||
|
],
|
||||||
|
"score": pytest.approx(0.7037037037037037),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def test_run_with_different_lengths():
|
def test_run_with_different_lengths():
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user