bug: fix MRR and MAP calculations (#7841)

* bug: fix MRR and MAP calculations
This commit is contained in:
Amna Mubashar 2024-06-25 12:07:11 +02:00 committed by GitHub
parent c51f8ffb86
commit fc011d7b04
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 41 additions and 32 deletions

View File

@ -43,6 +43,7 @@ class DocumentMAPEvaluator:
``` ```
""" """
# Refer to https://www.pinecone.io/learn/offline-evaluation/ for the algorithm.
@component.output_types(score=float, individual_scores=List[float]) @component.output_types(score=float, individual_scores=List[float])
def run( def run(
self, ground_truth_documents: List[List[Document]], retrieved_documents: List[List[Document]] self, ground_truth_documents: List[List[Document]], retrieved_documents: List[List[Document]]
@ -69,25 +70,21 @@ class DocumentMAPEvaluator:
individual_scores = [] individual_scores = []
for ground_truth, retrieved in zip(ground_truth_documents, retrieved_documents): for ground_truth, retrieved in zip(ground_truth_documents, retrieved_documents):
score = 0.0 average_precision = 0.0
for ground_document in ground_truth: average_precision_numerator = 0.0
if ground_document.content is None: relevant_documents = 0
ground_truth_contents = [doc.content for doc in ground_truth if doc.content is not None]
for rank, retrieved_document in enumerate(retrieved):
if retrieved_document.content is None:
continue continue
average_precision = 0.0 if retrieved_document.content in ground_truth_contents:
relevant_documents = 0 relevant_documents += 1
average_precision_numerator += relevant_documents / (rank + 1)
for rank, retrieved_document in enumerate(retrieved): if relevant_documents > 0:
if retrieved_document.content is None: average_precision = average_precision_numerator / relevant_documents
continue individual_scores.append(average_precision)
if ground_document.content in retrieved_document.content:
relevant_documents += 1
average_precision += relevant_documents / (rank + 1)
if relevant_documents > 0:
score = average_precision / relevant_documents
individual_scores.append(score)
score = sum(individual_scores) / len(retrieved_documents)
score = sum(individual_scores) / len(ground_truth_documents)
return {"score": score, "individual_scores": individual_scores} return {"score": score, "individual_scores": individual_scores}

View File

@ -41,6 +41,7 @@ class DocumentMRREvaluator:
``` ```
""" """
# Refer to https://www.pinecone.io/learn/offline-evaluation/ for the algorithm.
@component.output_types(score=float, individual_scores=List[float]) @component.output_types(score=float, individual_scores=List[float])
def run( def run(
self, ground_truth_documents: List[List[Document]], retrieved_documents: List[List[Document]] self, ground_truth_documents: List[List[Document]], retrieved_documents: List[List[Document]]
@ -67,20 +68,17 @@ class DocumentMRREvaluator:
individual_scores = [] individual_scores = []
for ground_truth, retrieved in zip(ground_truth_documents, retrieved_documents): for ground_truth, retrieved in zip(ground_truth_documents, retrieved_documents):
score = 0.0 reciprocal_rank = 0.0
for ground_document in ground_truth:
if ground_document.content is None: ground_truth_contents = [doc.content for doc in ground_truth if doc.content is not None]
for rank, retrieved_document in enumerate(retrieved):
if retrieved_document.content is None:
continue continue
if retrieved_document.content in ground_truth_contents:
reciprocal_rank = 1 / (rank + 1)
break
individual_scores.append(reciprocal_rank)
for rank, retrieved_document in enumerate(retrieved): score = sum(individual_scores) / len(ground_truth_documents)
if retrieved_document.content is None:
continue
if ground_document.content in retrieved_document.content:
score = 1 / (rank + 1)
break
individual_scores.append(score)
score = sum(individual_scores) / len(retrieved_documents)
return {"score": score, "individual_scores": individual_scores} return {"score": score, "individual_scores": individual_scores}

View File

@ -0,0 +1,4 @@
---
fixes:
- |
Fixed the calculation for MRR and MAP scores.

View File

@ -62,7 +62,17 @@ def test_run_with_complex_data():
], ],
], ],
) )
assert result == {"individual_scores": [1.0, 0.8333333333333333, 1.0, 0.5, 0.0, 1.0], "score": 0.7222222222222222} assert result == {
"individual_scores": [
1.0,
pytest.approx(0.8333333333333333),
1.0,
pytest.approx(0.5833333333333333),
0.0,
pytest.approx(0.8055555555555555),
],
"score": pytest.approx(0.7037037037037037),
}
def test_run_with_different_lengths(): def test_run_with_different_lengths():