diff --git a/haystack/schema.py b/haystack/schema.py index 7fee73724..3b179a03d 100644 --- a/haystack/schema.py +++ b/haystack/schema.py @@ -1216,7 +1216,7 @@ class EvaluationResult: answer_scope=answer_scope, ) num_examples_for_eval = len(answers["multilabel_id"].unique()) - result = {metric: metrics_df[metric].mean() for metric in metrics_df.columns} + result = {metric: metrics_df[metric].mean().tolist() for metric in metrics_df.columns} result["num_examples_for_eval"] = float(num_examples_for_eval) # formatter requires float return result @@ -1314,7 +1314,7 @@ class EvaluationResult: document_relevance_criterion=document_relevance_criterion, ) - return {metric: metrics_df[metric].mean() for metric in metrics_df.columns} + return {metric: metrics_df[metric].mean().tolist() for metric in metrics_df.columns} def _build_document_metrics_df( self, diff --git a/test/pipelines/test_eval.py b/test/pipelines/test_eval.py index 9ff14dc50..e5dc9efb1 100644 --- a/test/pipelines/test_eval.py +++ b/test/pipelines/test_eval.py @@ -336,6 +336,11 @@ def test_extractive_qa_eval(reader, retriever_with_docs, tmp_path): assert metrics["Retriever"]["map"] == 1.0 assert metrics["Retriever"]["ndcg"] == 1.0 + # assert metrics are floats + for node_metrics in metrics.values(): + for value in node_metrics.values(): + assert isinstance(value, float) + eval_result.save(tmp_path) saved_eval_result = EvaluationResult.load(tmp_path) metrics = saved_eval_result.calculate_metrics(document_scope="document_id") @@ -357,6 +362,11 @@ def test_extractive_qa_eval(reader, retriever_with_docs, tmp_path): assert metrics["Retriever"]["map"] == 1.0 assert metrics["Retriever"]["ndcg"] == 1.0 + # assert metrics are floats + for node_metrics in metrics.values(): + for value in node_metrics.values(): + assert isinstance(value, float) + @pytest.mark.parametrize("retriever_with_docs", ["tfidf"], indirect=True) @pytest.mark.parametrize("document_store_with_docs", ["memory"], indirect=True) @@ -525,6 +535,11 @@ def test_extractive_qa_eval_sas(reader, retriever_with_docs): assert "sas" in metrics["Reader"] assert metrics["Reader"]["sas"] == pytest.approx(1.0) + # assert metrics are floats + for node_metrics in metrics.values(): + for value in node_metrics.values(): + assert isinstance(value, float) + @pytest.mark.parametrize("reader", ["farm"], indirect=True) def test_reader_eval_in_pipeline(reader):