# SPDX-FileCopyrightText: 2022-present deepset GmbH # # SPDX-License-Identifier: Apache-2.0 # mypy: ignore-errors # pylint: skip-file ### ### This is an example script of how to use the eval function to evaluate a RAG Pipeline. ### For more information see the relative proposal. ### import os from haystack import Pipeline from haystack.components.builders.prompt_builder import PromptBuilder from haystack.components.generators.openai.gpt35 import GPT35Generator from haystack.components.retrievers.memory import MemoryBM25Retriever from haystack.dataclasses.document import Document from haystack.document_stores.memory import MemoryDocumentStore docstore = MemoryDocumentStore() # Write some fake documents docstore.write_documents( [ Document(content="This is not the answer you are looking for.", meta={"name": "Obi-Wan Kenobi"}), Document(content="This is the way.", meta={"name": "Mandalorian"}), Document(content="The answer to life, the universe and everything is 42.", meta={"name": "Deep Thought"}), Document(content="When you play the game of thrones, you win or you die.", meta={"name": "Cersei Lannister"}), Document(content="Winter is coming.", meta={"name": "Ned Stark"}), ] ) # Create our retriever, we set top_k to 3 to get only the best 3 documents otherwise by default we get 10 retriever = MemoryBM25Retriever(document_store=docstore, top_k=3) # Create our prompt template template = """Given the context please answer the question. Context: {# We're receiving a list of lists, so we handle it like this #} {% for list in documents %} {% for doc in list %} {{- doc -}}; {% endfor %} {% endfor %} Question: {{ question }}; Answer: """ prompt_builder = PromptBuilder(template) # We're using OpenAI gpt-3.5 OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "") generator = GPT35Generator(api_key=OPENAI_API_KEY) # Build the pipeline pipe = Pipeline() pipe.add_component("docs_retriever", retriever) pipe.add_component("builder", prompt_builder) pipe.add_component("gpt35", generator) pipe.connect("docs_retriever.documents", "builder.documents") pipe.connect("builder.prompt", "gpt35.prompt") # Run the pipeline query = "What is the answer to life, the universe and everything?" result = pipe.run({"docs_retriever": {"queries": [query]}, "builder": {"question": query}}) print(result["gpt35"]["replies"]) # These are the input that will be passed to the Pipeline when running eval, much like we've done a couple of lines above inputs = [ {"docs_retriever": {"queries": ["What is the answer?"]}, "builder": {"question": "What is the answer?"}}, { "docs_retriever": {"queries": ["Take a deep breath and think. What is the answer?"]}, "builder": {"question": "Take a deep breath and think. What is the answer?"}, }, { "docs_retriever": {"queries": ["What is the answer to life, the universe and everything?"]}, "builder": {"question": "What is the answer to life, the universe and everything?"}, }, ] # These are the expected output that will be compared to the actual output of the Pipeline. # We have a dictionary for each input so that len(inputs) == len(expected_output). # This gives the possibility to have different expected output for each different input. # NOTE: I omitted the gpt35 metadata output because it's too long. expected_output = [ { # This is the output that we expect from the docs_retriever component "docs_retriever": { "documents": [ [ Document( content="The answer to life, the universe and everything is 42.", meta={"name": "Deep Thought"} ), Document(content="This is not the answer you are looking for.", meta={"name": "Obi-Wan Kenobi"}), Document(content="This is the way.", meta={"name": "Mandalorian"}), ] ] }, # This is the output that we expect from the builder component "builder": {"prompt": "I should write the actual template here but I'm lazy so I won't."}, # This is the output that we expect from the gpt35 component "gpt35": {"replies": ["The answer to life, the universe and everything is 42."], "metadata": {}}, }, { "docs_retriever": { "documents": [ [ Document( content="The answer to life, the universe and everything is 42.", meta={"name": "Deep Thought"} ), Document(content="This is not the answer you are looking for.", meta={"name": "Obi-Wan Kenobi"}), Document(content="This is the way.", meta={"name": "Mandalorian"}), ] ] }, "builder": {"prompt": "I should write the actual template here but I'm lazy so I won't."}, "gpt35": {"replies": ["The answer to life, the universe and everything is 42."], "metadata": {}}, }, { "docs_retriever": { "documents": [ [ Document( content="The answer to life, the universe and everything is 42.", meta={"name": "Deep Thought"} ), Document(content="This is not the answer you are looking for.", meta={"name": "Obi-Wan Kenobi"}), Document(content="This is the way.", meta={"name": "Mandalorian"}), ] ] }, "builder": {"prompt": "I should write the actual template here but I'm lazy so I won't."}, "gpt35": {"replies": ["The answer to life, the universe and everything is 42."], "metadata": {}}, }, ] eval_result = eval(pipe, inputs=inputs, expected_output=expected_output) metrics = result.calculate_metrics(Metric.SAS) # noqa metrics.save("path/to/file.csv")