mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-10-14 09:28:56 +00:00
Evaluating a pipeline consisting only of a reader node (#2132)
* pass documents as extra param to eval * pass documents via labels to eval * rename param in docs * Update Documentation & Code Style * Revert "rename param in docs" This reverts commit 2f4c2ec79575e9dd33a8300785f789a327df36f4. * Revert "pass documents via labels to eval" This reverts commit dcc51e41f2637d093d81c7d193b873c17c36b174. * simplify iterating through labels and docs * Update Documentation & Code Style Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
This commit is contained in:
parent
1e3edef803
commit
7fab027bf0
@ -242,7 +242,7 @@ then be found in the dict returned by this method under the key "_debug"
|
|||||||
#### eval
|
#### eval
|
||||||
|
|
||||||
```python
|
```python
|
||||||
def eval(labels: List[MultiLabel], params: Optional[dict] = None, sas_model_name_or_path: str = None, add_isolated_node_eval: bool = False) -> EvaluationResult
|
def eval(labels: List[MultiLabel], documents: Optional[List[List[Document]]] = None, params: Optional[dict] = None, sas_model_name_or_path: str = None, add_isolated_node_eval: bool = False) -> EvaluationResult
|
||||||
```
|
```
|
||||||
|
|
||||||
Evaluates the pipeline by running the pipeline once per query in debug mode
|
Evaluates the pipeline by running the pipeline once per query in debug mode
|
||||||
@ -252,6 +252,7 @@ and putting together all data that is needed for evaluation, e.g. calculating me
|
|||||||
**Arguments**:
|
**Arguments**:
|
||||||
|
|
||||||
- `labels`: The labels to evaluate on
|
- `labels`: The labels to evaluate on
|
||||||
|
- `documents`: List of List of Document that the first node in the pipeline should get as input per multilabel. Can be used to evaluate a pipeline that consists of a reader without a retriever.
|
||||||
- `params`: Dictionary of parameters to be dispatched to the nodes.
|
- `params`: Dictionary of parameters to be dispatched to the nodes.
|
||||||
If you want to pass a param to all nodes, you can just use: {"top_k":10}
|
If you want to pass a param to all nodes, you can just use: {"top_k":10}
|
||||||
If you want to pass it to targeted nodes, you can do:
|
If you want to pass it to targeted nodes, you can do:
|
||||||
|
@ -480,6 +480,7 @@ class Pipeline(BasePipeline):
|
|||||||
def eval(
|
def eval(
|
||||||
self,
|
self,
|
||||||
labels: List[MultiLabel],
|
labels: List[MultiLabel],
|
||||||
|
documents: Optional[List[List[Document]]] = None,
|
||||||
params: Optional[dict] = None,
|
params: Optional[dict] = None,
|
||||||
sas_model_name_or_path: str = None,
|
sas_model_name_or_path: str = None,
|
||||||
add_isolated_node_eval: bool = False,
|
add_isolated_node_eval: bool = False,
|
||||||
@ -489,6 +490,7 @@ class Pipeline(BasePipeline):
|
|||||||
and putting together all data that is needed for evaluation, e.g. calculating metrics.
|
and putting together all data that is needed for evaluation, e.g. calculating metrics.
|
||||||
|
|
||||||
:param labels: The labels to evaluate on
|
:param labels: The labels to evaluate on
|
||||||
|
:param documents: List of List of Document that the first node in the pipeline should get as input per multilabel. Can be used to evaluate a pipeline that consists of a reader without a retriever.
|
||||||
:param params: Dictionary of parameters to be dispatched to the nodes.
|
:param params: Dictionary of parameters to be dispatched to the nodes.
|
||||||
If you want to pass a param to all nodes, you can just use: {"top_k":10}
|
If you want to pass a param to all nodes, you can just use: {"top_k":10}
|
||||||
If you want to pass it to targeted nodes, you can do:
|
If you want to pass it to targeted nodes, you can do:
|
||||||
@ -518,7 +520,9 @@ class Pipeline(BasePipeline):
|
|||||||
if params is None:
|
if params is None:
|
||||||
params = {}
|
params = {}
|
||||||
params["add_isolated_node_eval"] = True
|
params["add_isolated_node_eval"] = True
|
||||||
for label in labels:
|
|
||||||
|
# if documents is None, set docs_per_label to None for each label
|
||||||
|
for docs_per_label, label in zip(documents or [None] * len(labels), labels):
|
||||||
params_per_label = copy.deepcopy(params)
|
params_per_label = copy.deepcopy(params)
|
||||||
if label.filters is not None:
|
if label.filters is not None:
|
||||||
if params_per_label is None:
|
if params_per_label is None:
|
||||||
@ -526,7 +530,9 @@ class Pipeline(BasePipeline):
|
|||||||
else:
|
else:
|
||||||
# join both filters and overwrite filters in params with filters in labels
|
# join both filters and overwrite filters in params with filters in labels
|
||||||
params_per_label["filters"] = {**params_per_label.get("filters", {}), **label.filters}
|
params_per_label["filters"] = {**params_per_label.get("filters", {}), **label.filters}
|
||||||
predictions = self.run(query=label.query, labels=label, params=params_per_label, debug=True)
|
predictions = self.run(
|
||||||
|
query=label.query, labels=label, documents=docs_per_label, params=params_per_label, debug=True
|
||||||
|
)
|
||||||
|
|
||||||
for node_name in predictions["_debug"].keys():
|
for node_name in predictions["_debug"].keys():
|
||||||
node_output = predictions["_debug"][node_name]["output"]
|
node_output = predictions["_debug"][node_name]["output"]
|
||||||
|
@ -519,6 +519,21 @@ def test_extractive_qa_eval_sas(reader, retriever_with_docs):
|
|||||||
assert metrics["Reader"]["sas"] == pytest.approx(1.0)
|
assert metrics["Reader"]["sas"] == pytest.approx(1.0)
|
||||||
|
|
||||||
|
|
||||||
|
def test_reader_eval_in_pipeline(reader):
|
||||||
|
pipeline = Pipeline()
|
||||||
|
pipeline.add_node(component=reader, name="Reader", inputs=["Query"])
|
||||||
|
eval_result: EvaluationResult = pipeline.eval(
|
||||||
|
labels=EVAL_LABELS,
|
||||||
|
documents=[[label.document for label in multilabel.labels] for multilabel in EVAL_LABELS],
|
||||||
|
params={},
|
||||||
|
)
|
||||||
|
|
||||||
|
metrics = eval_result.calculate_metrics()
|
||||||
|
|
||||||
|
assert metrics["Reader"]["exact_match"] == 1.0
|
||||||
|
assert metrics["Reader"]["f1"] == 1.0
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("retriever_with_docs", ["tfidf"], indirect=True)
|
@pytest.mark.parametrize("retriever_with_docs", ["tfidf"], indirect=True)
|
||||||
@pytest.mark.parametrize("document_store_with_docs", ["memory"], indirect=True)
|
@pytest.mark.parametrize("document_store_with_docs", ["memory"], indirect=True)
|
||||||
def test_extractive_qa_eval_doc_relevance_col(reader, retriever_with_docs):
|
def test_extractive_qa_eval_doc_relevance_col(reader, retriever_with_docs):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user