haystack/examples/pipelines/rag_pipeline.py
Stefano Fiorucci 7cc6080dfa
chore: replace metadata w meta in tests/examples (#6612)
* replace metadata w meta in tests/examples

* do not touch already broken e2e tests

* Revert "do not touch already broken e2e tests"

This reverts commit 1f911920d98954b57daacfe8d8ed02fd77d136db.
2023-12-21 14:09:31 +01:00

52 lines
2.3 KiB
Python

import os
from haystack import Pipeline, Document
from haystack.document_stores import InMemoryDocumentStore
from haystack.components.retrievers import InMemoryBM25Retriever
from haystack.components.generators import GPTGenerator
from haystack.components.builders.answer_builder import AnswerBuilder
from haystack.components.builders.prompt_builder import PromptBuilder
# We are model agnostic :) Here, we use OpenAI models and load an api key from environment variables.
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
# We support many different databases. Here we load a simple and lightweight in-memory document store.
document_store = InMemoryDocumentStore()
# Create some example documents and add them to the document store.
documents = [
Document(content="My name is Jean and I live in Paris."),
Document(content="My name is Mark and I live in Berlin."),
Document(content="My name is Giorgio and I live in Rome."),
]
document_store.write_documents(documents)
# Build a RAG pipeline with a Retriever to get relevant documents to the query and a GPTGenerator interacting with LLMs using a custom prompt.
prompt_template = """
Given these documents, answer the question.\nDocuments:
{% for doc in documents %}
{{ doc.content }}
{% endfor %}
\nQuestion: {{question}}
\nAnswer:
"""
rag_pipeline = Pipeline()
rag_pipeline.add_component(instance=InMemoryBM25Retriever(document_store=document_store), name="retriever")
rag_pipeline.add_component(instance=PromptBuilder(template=prompt_template), name="prompt_builder")
rag_pipeline.add_component(instance=GPTGenerator(api_key=OPENAI_API_KEY), name="llm")
rag_pipeline.add_component(instance=AnswerBuilder(), name="answer_builder")
rag_pipeline.connect("retriever", "prompt_builder.documents")
rag_pipeline.connect("prompt_builder", "llm")
rag_pipeline.connect("llm.replies", "answer_builder.replies")
rag_pipeline.connect("llm.meta", "answer_builder.meta")
rag_pipeline.connect("retriever", "answer_builder.documents")
# Ask a question on the data you just added.
question = "Where does Mark live?"
result = rag_pipeline.run(
{"retriever": {"query": question}, "prompt_builder": {"question": question}, "answer_builder": {"query": question}}
)
# For details, like which documents were used to generate the answer, look into the GeneratedAnswer object
print(result["answer_builder"]["answers"])