mirror of
https://github.com/deepset-ai/haystack.git
synced 2026-01-04 11:07:52 +00:00
feat: Add Eval and EvaluationResult (#6505)
* Add initial implementation for Eval and EvaluationResult * Add release notes * Update files with suggestions from review * Remove serialization * Add eval e2e tests * Update eval e2e tests
This commit is contained in:
parent
3e0e81b1e0
commit
46b395eec3
85
e2e/pipelines/test_eval_dense_doc_search.py
Normal file
85
e2e/pipelines/test_eval_dense_doc_search.py
Normal file
@ -0,0 +1,85 @@
|
||||
from haystack import Pipeline
|
||||
from haystack.components.converters import PyPDFToDocument, TextFileToDocument
|
||||
from haystack.components.embedders import SentenceTransformersDocumentEmbedder, SentenceTransformersTextEmbedder
|
||||
from haystack.components.preprocessors import DocumentCleaner, DocumentSplitter
|
||||
from haystack.components.retrievers import InMemoryEmbeddingRetriever
|
||||
from haystack.components.routers import DocumentJoiner, FileTypeRouter
|
||||
from haystack.components.writers import DocumentWriter
|
||||
from haystack.dataclasses import Document
|
||||
from haystack.document_stores import InMemoryDocumentStore
|
||||
from haystack.evaluation.eval import eval
|
||||
|
||||
|
||||
def test_dense_doc_search_pipeline(samples_path):
|
||||
# Create the indexing pipeline
|
||||
indexing_pipeline = Pipeline()
|
||||
indexing_pipeline.add_component(
|
||||
instance=FileTypeRouter(mime_types=["text/plain", "application/pdf"]), name="file_type_router"
|
||||
)
|
||||
indexing_pipeline.add_component(instance=TextFileToDocument(), name="text_file_converter")
|
||||
indexing_pipeline.add_component(instance=PyPDFToDocument(), name="pdf_file_converter")
|
||||
indexing_pipeline.add_component(instance=DocumentJoiner(), name="joiner")
|
||||
indexing_pipeline.add_component(instance=DocumentCleaner(), name="cleaner")
|
||||
indexing_pipeline.add_component(
|
||||
instance=DocumentSplitter(split_by="sentence", split_length=250, split_overlap=30), name="splitter"
|
||||
)
|
||||
indexing_pipeline.add_component(
|
||||
instance=SentenceTransformersDocumentEmbedder(model_name_or_path="sentence-transformers/all-MiniLM-L6-v2"),
|
||||
name="embedder",
|
||||
)
|
||||
indexing_pipeline.add_component(instance=DocumentWriter(document_store=InMemoryDocumentStore()), name="writer")
|
||||
|
||||
indexing_pipeline.connect("file_type_router.text/plain", "text_file_converter.sources")
|
||||
indexing_pipeline.connect("file_type_router.application/pdf", "pdf_file_converter.sources")
|
||||
indexing_pipeline.connect("text_file_converter.documents", "joiner.documents")
|
||||
indexing_pipeline.connect("pdf_file_converter.documents", "joiner.documents")
|
||||
indexing_pipeline.connect("joiner.documents", "cleaner.documents")
|
||||
indexing_pipeline.connect("cleaner.documents", "splitter.documents")
|
||||
indexing_pipeline.connect("splitter.documents", "embedder.documents")
|
||||
indexing_pipeline.connect("embedder.documents", "writer.documents")
|
||||
|
||||
indexing_pipeline.run({"file_type_router": {"sources": list(samples_path.iterdir())}})
|
||||
filled_document_store = indexing_pipeline.get_component("writer").document_store
|
||||
|
||||
# Create the querying pipeline
|
||||
query_pipeline = Pipeline()
|
||||
query_pipeline.add_component(
|
||||
instance=SentenceTransformersTextEmbedder(model_name_or_path="sentence-transformers/all-MiniLM-L6-v2"),
|
||||
name="text_embedder",
|
||||
)
|
||||
query_pipeline.add_component(
|
||||
instance=InMemoryEmbeddingRetriever(document_store=filled_document_store, top_k=20), name="embedding_retriever"
|
||||
)
|
||||
query_pipeline.connect("text_embedder", "embedding_retriever")
|
||||
|
||||
inputs = [{"text_embedder": {"text": "Who lives in Rome?"}}]
|
||||
expected_outputs = [
|
||||
{
|
||||
"embedding_retriever": {
|
||||
"documents": [
|
||||
Document(
|
||||
id="d219162e5d0b8e5eab901e32ce0d9c12d24e5ea26a92780442fcfa560eb0b7d6",
|
||||
content="My name is Giorgio and I live in Rome.",
|
||||
meta={
|
||||
"file_path": "/home/ashwin/data_science/0ashwin/opensource/haystack/e2e/samples/doc_1.txt",
|
||||
"source_id": "0366ae1654f4573564e29184cd4a2232286a93f4f25d6790ce703ae7d4d7d63c",
|
||||
},
|
||||
score=0.627746287158654,
|
||||
),
|
||||
Document(
|
||||
id="2dcf2bc0307ba21fbb7e97a307d987a05297e577a44f170081acdbab9fc4b95f",
|
||||
content="A sample PDF file History and standardizationFormat (PDF) Adobe Systems made the PDF specification ava...",
|
||||
meta={"source_id": "ec1ac6c430ecd0cc74ae56f3e2d84f93fef3f5393de6901fe8aa01e494ebcdbe"},
|
||||
score=-0.060180130727963355,
|
||||
),
|
||||
]
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
eval_result = eval(query_pipeline, inputs=inputs, expected_outputs=expected_outputs)
|
||||
|
||||
assert eval_result.inputs == inputs
|
||||
assert eval_result.expected_outputs == expected_outputs
|
||||
assert len(eval_result.outputs) == len(expected_outputs) == len(inputs)
|
||||
assert eval_result.runnable.to_dict() == query_pipeline.to_dict()
|
||||
125
e2e/pipelines/test_eval_extractive_qa_pipeline.py
Normal file
125
e2e/pipelines/test_eval_extractive_qa_pipeline.py
Normal file
@ -0,0 +1,125 @@
|
||||
from haystack import Pipeline
|
||||
from haystack.components.readers import ExtractiveReader
|
||||
from haystack.components.retrievers import InMemoryBM25Retriever
|
||||
from haystack.dataclasses import Document, ExtractedAnswer
|
||||
from haystack.document_stores import InMemoryDocumentStore
|
||||
from haystack.evaluation.eval import eval
|
||||
|
||||
|
||||
def test_extractive_qa_pipeline():
|
||||
# Create the pipeline
|
||||
qa_pipeline = Pipeline()
|
||||
qa_pipeline.add_component(instance=InMemoryBM25Retriever(document_store=InMemoryDocumentStore()), name="retriever")
|
||||
qa_pipeline.add_component(instance=ExtractiveReader(model_name_or_path="deepset/tinyroberta-squad2"), name="reader")
|
||||
qa_pipeline.connect("retriever", "reader")
|
||||
|
||||
# Populate the document store
|
||||
documents = [
|
||||
Document(content="My name is Jean and I live in Paris."),
|
||||
Document(content="My name is Mark and I live in Berlin."),
|
||||
Document(content="My name is Giorgio and I live in Rome."),
|
||||
]
|
||||
qa_pipeline.get_component("retriever").document_store.write_documents(documents)
|
||||
|
||||
# Query and assert
|
||||
questions = ["Who lives in Paris?", "Who lives in Berlin?", "Who lives in Rome?"]
|
||||
inputs = [{"retriever": {"query": question}, "reader": {"query": question, "top_k": 1}} for question in questions]
|
||||
expected_outputs = [
|
||||
{
|
||||
"reader": {
|
||||
"answers": [
|
||||
ExtractedAnswer(
|
||||
query="Who lives in Paris?",
|
||||
score=0.7713339924812317,
|
||||
data="Jean and I",
|
||||
document=Document(
|
||||
id="6c90b78ad94e4e634e2a067b5fe2d26d4ce95405ec222cbaefaeb09ab4dce81e",
|
||||
content="My name is Jean and I live in Paris.",
|
||||
score=0.33144005810482535,
|
||||
),
|
||||
context=None,
|
||||
document_offset=ExtractedAnswer.Span(start=11, end=21),
|
||||
context_offset=None,
|
||||
meta={},
|
||||
),
|
||||
ExtractedAnswer(
|
||||
query="Who lives in Paris?",
|
||||
score=0.2286660075187683,
|
||||
data=None,
|
||||
document=None,
|
||||
context=None,
|
||||
document_offset=None,
|
||||
context_offset=None,
|
||||
meta={},
|
||||
),
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"reader": {
|
||||
"answers": [
|
||||
ExtractedAnswer(
|
||||
query="Who lives in Berlin?",
|
||||
score=0.7047999501228333,
|
||||
data="Mark and I",
|
||||
document=Document(
|
||||
id="10a183e965c2e107e20507c717f16559c58a8ba4bc7c577ea8dc32a8d6ca7a20",
|
||||
content="My name is Mark and I live in Berlin.",
|
||||
score=0.33144005810482535,
|
||||
),
|
||||
context=None,
|
||||
document_offset=ExtractedAnswer.Span(start=11, end=21),
|
||||
context_offset=None,
|
||||
meta={},
|
||||
),
|
||||
ExtractedAnswer(
|
||||
query="Who lives in Berlin?",
|
||||
score=0.29520004987716675,
|
||||
data=None,
|
||||
document=None,
|
||||
context=None,
|
||||
document_offset=None,
|
||||
context_offset=None,
|
||||
meta={},
|
||||
),
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"reader": {
|
||||
"answers": [
|
||||
ExtractedAnswer(
|
||||
query="Who lives in Rome?",
|
||||
score=0.7661304473876953,
|
||||
data="Giorgio and I",
|
||||
document=Document(
|
||||
id="fb0f1efe94b3c78aa1c4e5a17a5ef8270f70e89d36a3665c8362675e8a769a27",
|
||||
content="My name is Giorgio and I live in Rome.",
|
||||
score=0.33144005810482535,
|
||||
),
|
||||
context=None,
|
||||
document_offset=ExtractedAnswer.Span(start=11, end=24),
|
||||
context_offset=None,
|
||||
meta={},
|
||||
),
|
||||
ExtractedAnswer(
|
||||
query="Who lives in Rome?",
|
||||
score=0.2338695526123047,
|
||||
data=None,
|
||||
document=None,
|
||||
context=None,
|
||||
document_offset=None,
|
||||
context_offset=None,
|
||||
meta={},
|
||||
),
|
||||
]
|
||||
}
|
||||
},
|
||||
]
|
||||
|
||||
eval_result = eval(qa_pipeline, inputs=inputs, expected_outputs=expected_outputs)
|
||||
|
||||
assert eval_result.inputs == inputs
|
||||
assert eval_result.expected_outputs == expected_outputs
|
||||
assert len(eval_result.outputs) == len(expected_outputs) == len(inputs)
|
||||
assert eval_result.runnable.to_dict() == qa_pipeline.to_dict()
|
||||
100
e2e/pipelines/test_eval_hybrid_doc_search_pipeline.py
Normal file
100
e2e/pipelines/test_eval_hybrid_doc_search_pipeline.py
Normal file
@ -0,0 +1,100 @@
|
||||
from haystack import Document, Pipeline
|
||||
from haystack.components.embedders import SentenceTransformersTextEmbedder
|
||||
from haystack.components.rankers import TransformersSimilarityRanker
|
||||
from haystack.components.retrievers import InMemoryBM25Retriever, InMemoryEmbeddingRetriever
|
||||
from haystack.components.routers.document_joiner import DocumentJoiner
|
||||
from haystack.document_stores import InMemoryDocumentStore
|
||||
from haystack.evaluation.eval import eval
|
||||
|
||||
|
||||
def test_hybrid_doc_search_pipeline():
|
||||
# Create the pipeline
|
||||
document_store = InMemoryDocumentStore()
|
||||
hybrid_pipeline = Pipeline()
|
||||
hybrid_pipeline.add_component(instance=InMemoryBM25Retriever(document_store=document_store), name="bm25_retriever")
|
||||
hybrid_pipeline.add_component(
|
||||
instance=SentenceTransformersTextEmbedder(model_name_or_path="sentence-transformers/all-MiniLM-L6-v2"),
|
||||
name="text_embedder",
|
||||
)
|
||||
hybrid_pipeline.add_component(
|
||||
instance=InMemoryEmbeddingRetriever(document_store=document_store), name="embedding_retriever"
|
||||
)
|
||||
hybrid_pipeline.add_component(instance=DocumentJoiner(), name="joiner")
|
||||
hybrid_pipeline.add_component(instance=TransformersSimilarityRanker(top_k=2), name="ranker")
|
||||
|
||||
hybrid_pipeline.connect("bm25_retriever", "joiner")
|
||||
hybrid_pipeline.connect("text_embedder", "embedding_retriever")
|
||||
hybrid_pipeline.connect("embedding_retriever", "joiner")
|
||||
hybrid_pipeline.connect("joiner", "ranker")
|
||||
|
||||
# Populate the document store
|
||||
documents = [
|
||||
Document(content="My name is Jean and I live in Paris."),
|
||||
Document(content="My name is Mark and I live in Berlin."),
|
||||
Document(content="My name is Mario and I live in the capital of Italy."),
|
||||
Document(content="My name is Giorgio and I live in Rome."),
|
||||
]
|
||||
hybrid_pipeline.get_component("bm25_retriever").document_store.write_documents(documents)
|
||||
|
||||
questions = ["Who lives in Paris?", "Who lives in Berlin?", "Who lives in Rome?"]
|
||||
inputs = [
|
||||
{"bm25_retriever": {"query": question}, "text_embedder": {"text": question}, "ranker": {"query": question}}
|
||||
for question in questions
|
||||
]
|
||||
expected_outputs = [
|
||||
{
|
||||
"ranker": {
|
||||
"documents": [
|
||||
Document(
|
||||
id="6c90b78ad94e4e634e2a067b5fe2d26d4ce95405ec222cbaefaeb09ab4dce81e",
|
||||
content="My name is Jean and I live in Paris.",
|
||||
score=2.2277960777282715,
|
||||
),
|
||||
Document(
|
||||
id="10a183e965c2e107e20507c717f16559c58a8ba4bc7c577ea8dc32a8d6ca7a20",
|
||||
content="My name is Mark and I live in Berlin.",
|
||||
score=-7.304897308349609,
|
||||
),
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"ranker": {
|
||||
"documents": [
|
||||
Document(
|
||||
id="10a183e965c2e107e20507c717f16559c58a8ba4bc7c577ea8dc32a8d6ca7a20",
|
||||
content="My name is Mark and I live in Berlin.",
|
||||
score=3.694173812866211,
|
||||
),
|
||||
Document(
|
||||
id="f7533b5c6c968680d0ef8e38f366d4e68b7ac0d7238f1b1b366d15cb9c33efd8",
|
||||
content="My name is Mario and I live in the capital of Italy.",
|
||||
score=-9.008655548095703,
|
||||
),
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"ranker": {
|
||||
"documents": [
|
||||
Document(
|
||||
id="fb0f1efe94b3c78aa1c4e5a17a5ef8270f70e89d36a3665c8362675e8a769a27",
|
||||
content="My name is Giorgio and I live in Rome.",
|
||||
score=3.487802028656006,
|
||||
),
|
||||
Document(
|
||||
id="f7533b5c6c968680d0ef8e38f366d4e68b7ac0d7238f1b1b366d15cb9c33efd8",
|
||||
content="My name is Mario and I live in the capital of Italy.",
|
||||
score=-2.873128890991211,
|
||||
),
|
||||
]
|
||||
}
|
||||
},
|
||||
]
|
||||
|
||||
eval_result = eval(hybrid_pipeline, inputs=inputs, expected_outputs=expected_outputs)
|
||||
|
||||
assert eval_result.inputs == inputs
|
||||
assert eval_result.expected_outputs == expected_outputs
|
||||
assert len(eval_result.outputs) == len(expected_outputs) == len(inputs)
|
||||
assert eval_result.runnable.to_dict() == hybrid_pipeline.to_dict()
|
||||
145
e2e/pipelines/test_eval_rag_pipelines.py
Normal file
145
e2e/pipelines/test_eval_rag_pipelines.py
Normal file
@ -0,0 +1,145 @@
|
||||
from haystack import Pipeline
|
||||
from haystack.components.builders.answer_builder import AnswerBuilder
|
||||
from haystack.components.builders.prompt_builder import PromptBuilder
|
||||
from haystack.components.embedders import SentenceTransformersDocumentEmbedder, SentenceTransformersTextEmbedder
|
||||
from haystack.components.generators import HuggingFaceLocalGenerator
|
||||
from haystack.components.retrievers import InMemoryBM25Retriever, InMemoryEmbeddingRetriever
|
||||
from haystack.components.writers import DocumentWriter
|
||||
from haystack.dataclasses import Document
|
||||
from haystack.document_stores import InMemoryDocumentStore
|
||||
from haystack.evaluation.eval import eval
|
||||
|
||||
|
||||
def test_bm25_rag_pipeline():
|
||||
prompt_template = """
|
||||
Given these documents, answer the question.\nDocuments:
|
||||
{% for doc in documents %}
|
||||
{{ doc.content }}
|
||||
{% endfor %}
|
||||
|
||||
\nQuestion: {{question}}
|
||||
\nAnswer:
|
||||
"""
|
||||
rag_pipeline = Pipeline()
|
||||
rag_pipeline.add_component(instance=InMemoryBM25Retriever(document_store=InMemoryDocumentStore()), name="retriever")
|
||||
rag_pipeline.add_component(instance=PromptBuilder(template=prompt_template), name="prompt_builder")
|
||||
rag_pipeline.add_component(
|
||||
instance=HuggingFaceLocalGenerator(
|
||||
model_name_or_path="google/flan-t5-small",
|
||||
task="text2text-generation",
|
||||
generation_kwargs={"max_new_tokens": 100, "temperature": 0.5, "do_sample": True},
|
||||
),
|
||||
name="llm",
|
||||
)
|
||||
rag_pipeline.add_component(instance=AnswerBuilder(), name="answer_builder")
|
||||
rag_pipeline.connect("retriever", "prompt_builder.documents")
|
||||
rag_pipeline.connect("prompt_builder", "llm")
|
||||
rag_pipeline.connect("llm.replies", "answer_builder.replies")
|
||||
rag_pipeline.connect("retriever", "answer_builder.documents")
|
||||
|
||||
# Populate the document store
|
||||
documents = [
|
||||
Document(content="My name is Jean and I live in Paris."),
|
||||
Document(content="My name is Mark and I live in Berlin."),
|
||||
Document(content="My name is Giorgio and I live in Rome."),
|
||||
]
|
||||
rag_pipeline.get_component("retriever").document_store.write_documents(documents)
|
||||
|
||||
questions = ["Who lives in Paris?", "Who lives in Berlin?", "Who lives in Rome?"]
|
||||
inputs = [
|
||||
{
|
||||
"retriever": {"query": question},
|
||||
"prompt_builder": {"question": question},
|
||||
"answer_builder": {"query": question},
|
||||
}
|
||||
for question in questions
|
||||
]
|
||||
|
||||
expected_outputs = [
|
||||
{"llm": {"replies": ["Jean"]}},
|
||||
{"llm": {"replies": ["Mark"]}},
|
||||
{"llm": {"replies": ["Giorgio"]}},
|
||||
]
|
||||
|
||||
eval_result = eval(rag_pipeline, inputs=inputs, expected_outputs=expected_outputs)
|
||||
|
||||
assert eval_result.inputs == inputs
|
||||
assert eval_result.expected_outputs == expected_outputs
|
||||
assert len(eval_result.outputs) == len(expected_outputs) == len(inputs)
|
||||
assert eval_result.runnable.to_dict() == rag_pipeline.to_dict()
|
||||
|
||||
|
||||
def test_embedding_retrieval_rag_pipeline():
|
||||
# Create the RAG pipeline
|
||||
prompt_template = """
|
||||
Given these documents, answer the question.\nDocuments:
|
||||
{% for doc in documents %}
|
||||
{{ doc.content }}
|
||||
{% endfor %}
|
||||
|
||||
\nQuestion: {{question}}
|
||||
\nAnswer:
|
||||
"""
|
||||
rag_pipeline = Pipeline()
|
||||
rag_pipeline.add_component(
|
||||
instance=SentenceTransformersTextEmbedder(model_name_or_path="sentence-transformers/all-MiniLM-L6-v2"),
|
||||
name="text_embedder",
|
||||
)
|
||||
rag_pipeline.add_component(
|
||||
instance=InMemoryEmbeddingRetriever(document_store=InMemoryDocumentStore()), name="retriever"
|
||||
)
|
||||
rag_pipeline.add_component(instance=PromptBuilder(template=prompt_template), name="prompt_builder")
|
||||
rag_pipeline.add_component(
|
||||
instance=HuggingFaceLocalGenerator(
|
||||
model_name_or_path="google/flan-t5-small",
|
||||
task="text2text-generation",
|
||||
generation_kwargs={"max_new_tokens": 100, "temperature": 0.5, "do_sample": True},
|
||||
),
|
||||
name="llm",
|
||||
)
|
||||
rag_pipeline.add_component(instance=AnswerBuilder(), name="answer_builder")
|
||||
rag_pipeline.connect("text_embedder", "retriever")
|
||||
rag_pipeline.connect("retriever", "prompt_builder.documents")
|
||||
rag_pipeline.connect("prompt_builder", "llm")
|
||||
rag_pipeline.connect("llm.replies", "answer_builder.replies")
|
||||
rag_pipeline.connect("retriever", "answer_builder.documents")
|
||||
|
||||
# Populate the document store
|
||||
documents = [
|
||||
Document(content="My name is Jean and I live in Paris."),
|
||||
Document(content="My name is Mark and I live in Berlin."),
|
||||
Document(content="My name is Giorgio and I live in Rome."),
|
||||
]
|
||||
document_store = rag_pipeline.get_component("retriever").document_store
|
||||
indexing_pipeline = Pipeline()
|
||||
indexing_pipeline.add_component(
|
||||
instance=SentenceTransformersDocumentEmbedder(model_name_or_path="sentence-transformers/all-MiniLM-L6-v2"),
|
||||
name="document_embedder",
|
||||
)
|
||||
indexing_pipeline.add_component(instance=DocumentWriter(document_store=document_store), name="document_writer")
|
||||
indexing_pipeline.connect("document_embedder", "document_writer")
|
||||
indexing_pipeline.run({"document_embedder": {"documents": documents}})
|
||||
|
||||
# Query and assert
|
||||
questions = ["Who lives in Paris?", "Who lives in Berlin?", "Who lives in Rome?"]
|
||||
inputs = [
|
||||
{
|
||||
"prompt_builder": {"question": question},
|
||||
"text_embedder": {"text": question},
|
||||
"answer_builder": {"query": question},
|
||||
}
|
||||
for question in questions
|
||||
]
|
||||
|
||||
expected_outputs = [
|
||||
{"llm": {"replies": ["Jean"]}},
|
||||
{"llm": {"replies": ["Mark"]}},
|
||||
{"llm": {"replies": ["Giorgio"]}},
|
||||
]
|
||||
|
||||
eval_result = eval(rag_pipeline, inputs=inputs, expected_outputs=expected_outputs)
|
||||
|
||||
assert eval_result.inputs == inputs
|
||||
assert eval_result.expected_outputs == expected_outputs
|
||||
assert len(eval_result.outputs) == len(expected_outputs) == len(inputs)
|
||||
assert eval_result.runnable.to_dict() == rag_pipeline.to_dict()
|
||||
3
haystack/evaluation/__init__.py
Normal file
3
haystack/evaluation/__init__.py
Normal file
@ -0,0 +1,3 @@
|
||||
from haystack.evaluation.eval import EvaluationResult, eval
|
||||
|
||||
__all__ = ["eval", "EvaluationResult"]
|
||||
60
haystack/evaluation/eval.py
Normal file
60
haystack/evaluation/eval.py
Normal file
@ -0,0 +1,60 @@
|
||||
from typing import Any, Dict, List, Union
|
||||
|
||||
from haystack import Pipeline
|
||||
from haystack.core.component import Component
|
||||
|
||||
|
||||
class EvaluationResult:
|
||||
"""
|
||||
EvaluationResult keeps track of all the information related to evaluation, namely the runnable (Pipeline or component), inputs, outputs, and expected outputs.
|
||||
The EvaluationResult keeps track of all the information stored by eval.
|
||||
|
||||
:param runnable: The runnable (Pipeline or component) used for evaluation.
|
||||
:param inputs: List of inputs used for evaluation.
|
||||
:param outputs: List of outputs generated by the runnable.
|
||||
:param expected_outputs: List of expected outputs used for evaluation.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
runnable: Union[Pipeline, Component],
|
||||
inputs: List[Dict[str, Any]],
|
||||
outputs: List[Dict[str, Any]],
|
||||
expected_outputs: List[Dict[str, Any]],
|
||||
) -> None:
|
||||
self.runnable = runnable
|
||||
self.inputs = inputs
|
||||
self.outputs = outputs
|
||||
self.expected_outputs = expected_outputs
|
||||
|
||||
|
||||
def eval(
|
||||
runnable: Union[Pipeline, Component], inputs: List[Dict[str, Any]], expected_outputs: List[Dict[str, Any]]
|
||||
) -> EvaluationResult:
|
||||
"""
|
||||
Evaluates the provided Pipeline or component based on the given inputs and expected outputs.
|
||||
|
||||
This function facilitates the evaluation of a given runnable (either a Pipeline or a component) using the provided
|
||||
inputs and corresponding expected outputs.
|
||||
|
||||
:param runnable: The runnable (Pipeline or component) used for evaluation.
|
||||
:param inputs: List of inputs used for evaluation.
|
||||
:param expected_outputs: List of expected outputs used for evaluation.
|
||||
|
||||
:return: An instance of EvaluationResult containing information about the evaluation, including the runnable, inputs, outputs, and expected outputs.
|
||||
"""
|
||||
|
||||
outputs = []
|
||||
|
||||
# Check that expected outputs has the correct shape
|
||||
if len(inputs) != len(expected_outputs):
|
||||
raise ValueError(
|
||||
f"The number of inputs ({len(inputs)}) does not match the number of expected outputs ({len(expected_outputs)}). "
|
||||
" Please ensure that each input has a corresponding expected output."
|
||||
)
|
||||
|
||||
for input_ in inputs:
|
||||
output = runnable.run(input_)
|
||||
outputs.append(output)
|
||||
|
||||
return EvaluationResult(runnable, inputs, outputs, expected_outputs)
|
||||
@ -0,0 +1,4 @@
|
||||
preview:
|
||||
- |
|
||||
Add eval function for evaluation of components and Pipelines.
|
||||
Adds EvaluationResult to store results of evaluation.
|
||||
Loading…
x
Reference in New Issue
Block a user