diff --git a/docs/_src/tutorials/tutorials/5.md b/docs/_src/tutorials/tutorials/5.md index 54fbad670..eb625d869 100644 --- a/docs/_src/tutorials/tutorials/5.md +++ b/docs/_src/tutorials/tutorials/5.md @@ -282,6 +282,24 @@ metrics = advanced_eval_result.calculate_metrics() print(metrics["Reader"]["sas"]) ``` +## Isolated Evaluation Mode to Understand Upper Bounds of the Reader's Performance +The isolated node evaluation uses labels as input to the reader node instead of the output of the preceeding retriever node. +Thereby, we can additionally calculate the upper bounds of the evaluation metrics of the reader. + + +```python +eval_result_with_upper_bounds = pipeline.eval( + labels=eval_labels, + params={"Retriever": {"top_k": 1}}, + add_isolated_node_eval=True + ) +``` + + +```python +pipeline.print_eval_report(eval_result_with_upper_bounds) +``` + ## Evaluation of Individual Components: Retriever Here we evaluate only the retriever, based on whether the gold_label document is retrieved. diff --git a/tutorials/Tutorial5_Evaluation.ipynb b/tutorials/Tutorial5_Evaluation.ipynb index fb36bfdbc..21727f457 100644 --- a/tutorials/Tutorial5_Evaluation.ipynb +++ b/tutorials/Tutorial5_Evaluation.ipynb @@ -15346,10 +15346,54 @@ } } }, + { + "cell_type": "markdown", + "source": [ + "## Isolated Evaluation Mode to Understand Upper Bounds of the Reader's Performance\n", + "The isolated node evaluation uses labels as input to the reader node instead of the output of the preceeding retriever node.\n", + "Thereby, we can additionally calculate the upper bounds of the evaluation metrics of the reader." + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "eval_result_with_upper_bounds = pipeline.eval(\n", + " labels=eval_labels,\n", + " params={\"Retriever\": {\"top_k\": 1}},\n", + " add_isolated_node_eval=True\n", + " )" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "pipeline.print_eval_report(eval_result_with_upper_bounds)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, { "cell_type": "markdown", "source": [ "## Evaluation of Individual Components: Retriever\n", + "Sometimes you might want to evaluate individual components, for example, if you don't have a pipeline but only a retriever or a reader with a model that you trained yourself.\n", "Here we evaluate only the retriever, based on whether the gold_label document is retrieved." ], "metadata": { diff --git a/tutorials/Tutorial5_Evaluation.py b/tutorials/Tutorial5_Evaluation.py index 9a67612dc..d6545debb 100644 --- a/tutorials/Tutorial5_Evaluation.py +++ b/tutorials/Tutorial5_Evaluation.py @@ -179,7 +179,18 @@ def tutorial5_evaluation(): metrics = advanced_eval_result.calculate_metrics() print(metrics["Reader"]["sas"]) + ## Isolated Evaluation Mode to Understand Upper Bounds of the Reader's Performance + # The isolated node evaluation uses labels as input to the reader node instead of the output of the preceeding retriever node. + # Thereby, we can additionally calculate the upper bounds of the evaluation metrics of the reader. + eval_result_with_upper_bounds = pipeline.eval( + labels=eval_labels, + params={"Retriever": {"top_k": 1}}, + add_isolated_node_eval=True + ) + pipeline.print_eval_report(eval_result_with_upper_bounds) + ## Evaluation of Individual Components + # Sometimes you might want to evaluate individual components, for example, if you don't have a pipeline but only a retriever or a reader with a model that you trained yourself. # Evaluate Retriever on its own # Here we evaluate only the retriever, based on whether the gold_label document is retrieved. retriever_eval_results = retriever.eval(top_k=10, label_index=label_index, doc_index=doc_index)