feat: Add RAG pipeline (#6461)

* add rag pipeline * Update examples/getting_started/rag.py Co-authored-by: Massimiliano Pippi <mpippi@gmail.com> --------- Co-authored-by: Vladimir Blagojevic <dovlex@gmail.com> Co-authored-by: Massimiliano Pippi <mpippi@gmail.com>
2025-12-11 14:57:25 +00:00 · 2023-12-04 14:25:29 +00:00 · 2023-12-04 14:25:29 +00:00 · a38f871dbd
commit a38f871dbd
parent 4912f7cb58
5 changed files with 224 additions and 0 deletions
--- a/examples/getting_started/rag.py
+++ b/examples/getting_started/rag.py
@ -0,0 +1,22 @@
+import os
+from haystack import Document
+from haystack.document_stores import InMemoryDocumentStore
+from haystack.pipeline_utils import build_rag_pipeline
+
+API_KEY = "SET YOUR OPENAI API KEY HERE"
+
+# We support many different databases. Here we load a simple and lightweight in-memory document store.
+document_store = InMemoryDocumentStore()
+
+# Create some example documents and add them to the document store.
+documents = [
+    Document(content="My name is Jean and I live in Paris."),
+    Document(content="My name is Mark and I live in Berlin."),
+    Document(content="My name is Giorgio and I live in Rome."),
+]
+document_store.write_documents(documents)
+
+# Let's now build a simple RAG pipeline that uses a generative model to answer questions.
+rag_pipeline = build_rag_pipeline(llm_api_key=API_KEY, document_store=document_store)
+answers = rag_pipeline.run(query="Who lives in Rome?")
+print(answers.data)
--- a/haystack/pipeline_utils/init.py
+++ b/haystack/pipeline_utils/init.py
@ -0,0 +1,3 @@
+from haystack.pipeline_utils.rag import build_rag_pipeline
+
+__all__ = ["build_rag_pipeline"]
--- a/haystack/pipeline_utils/rag.py
+++ b/haystack/pipeline_utils/rag.py
@ -0,0 +1,134 @@
+from typing import Optional
+
+from haystack import Pipeline
+from haystack.dataclasses import Answer
+from haystack.document_stores import InMemoryDocumentStore
+from haystack.components.retrievers import InMemoryBM25Retriever, InMemoryEmbeddingRetriever
+from haystack.components.embedders import SentenceTransformersTextEmbedder
+from haystack.components.generators import GPTGenerator
+from haystack.components.builders.answer_builder import AnswerBuilder
+from haystack.components.builders.prompt_builder import PromptBuilder
+
+
+def build_rag_pipeline(
+    document_store: "InMemoryDocumentStore",
+    generation_model: str = "gpt-3.5-turbo",
+    prompt_template: Optional[str] = None,
+    embedding_model: Optional[str] = None,
+    llm_api_key: Optional[str] = None,
+):
+    """
+    Returns a prebuilt pipeline to perform retrieval augmented generation with or without an embedding model
+    (without embeddings, it performs retrieval using BM25).
+
+    Example usage:
+
+    ```python
+    from haystack.utils import build_rag_pipeline
+    pipeline = build_rag_pipeline(document_store=your_document_store_instance)
+    pipeline.run(query="What's the capital of France?")
+
+    >>> Answer(data="The capital of France is Paris.")
+    ```
+
+    :param document_store: An instance of a DocumentStore to read from.
+    :param generation_model: The name of the model to use for generation.
+    :param prompt_template: The template to use for the prompt. If not given, a default template is used.
+    :param embedding_model: The name of the model to use for embedding. If not given, BM25 is used.
+    :param llm_api_key: The API key to use for the OpenAI Language Model. If not given, the value of the
+    llm_api_key will be attempted to be read from the environment variable OPENAI_API_KEY.
+    """
+    return _RAGPipeline(
+        document_store=document_store,
+        generation_model=generation_model,
+        prompt_template=prompt_template,
+        embedding_model=embedding_model,
+        llm_api_key=llm_api_key,
+    )
+
+
+class _RAGPipeline:
+    """
+    A simple ready-made pipeline for RAG. It requires a populated document store.
+
+    If an embedding model is given, it uses embedding retrieval. Otherwise, it falls back to BM25 retrieval.
+
+    Example usage:
+
+    ```python
+    rag_pipe = RAGPipeline(document_store=InMemoryDocumentStore())
+    answers = rag_pipe.run(query="Who lives in Rome?")
+    >>> Answer(data="Giorgio")
+    ```
+
+    """
+
+    def __init__(
+        self,
+        document_store: InMemoryDocumentStore,
+        generation_model: str = "gpt-3.5-turbo",
+        prompt_template: Optional[str] = None,
+        embedding_model: Optional[str] = None,
+        llm_api_key: Optional[str] = None,
+    ):
+        """
+        :param document_store: An instance of a DocumentStore to retrieve documents from.
+        :param generation_model: The name of the model to use for generation.
+        :param prompt_template: The template to use for the prompt. If not given, a default template is used.
+        :param embedding_model: The name of the model to use for embedding. If not given, BM25 is used.
+        :param llm_api_key: The API key to use for the OpenAI Language Model.
+        """
+        prompt_template = (
+            prompt_template
+            or """
+        Given these documents, answer the question.
+
+        Documents:
+        {% for doc in documents %}
+            {{ doc.content }}
+        {% endfor %}
+
+        Question: {{question}}
+
+        Answer:
+        """
+        )
+        if not isinstance(document_store, InMemoryDocumentStore):
+            raise ValueError("RAGPipeline only works with an InMemoryDocumentStore.")
+
+        self.pipeline = Pipeline()
+
+        if embedding_model:
+            self.pipeline.add_component(
+                instance=SentenceTransformersTextEmbedder(model_name_or_path=embedding_model), name="text_embedder"
+            )
+            self.pipeline.add_component(
+                instance=InMemoryEmbeddingRetriever(document_store=document_store), name="retriever"
+            )
+            self.pipeline.connect("text_embedder", "retriever")
+        else:
+            self.pipeline.add_component(instance=InMemoryBM25Retriever(document_store=document_store), name="retriever")
+
+        self.pipeline.add_component(instance=PromptBuilder(template=prompt_template), name="prompt_builder")
+        self.pipeline.add_component(instance=GPTGenerator(api_key=llm_api_key, model_name=generation_model), name="llm")
+        self.pipeline.add_component(instance=AnswerBuilder(), name="answer_builder")
+        self.pipeline.connect("retriever", "prompt_builder.documents")
+        self.pipeline.connect("prompt_builder.prompt", "llm.prompt")
+        self.pipeline.connect("llm.replies", "answer_builder.replies")
+        self.pipeline.connect("llm.metadata", "answer_builder.metadata")
+        self.pipeline.connect("retriever", "answer_builder.documents")
+
+    def run(self, query: str) -> Answer:
+        """
+        Performs RAG using the given query.
+
+        :param query: The query to ask.
+        :return: An Answer object.
+        """
+        run_values = {"prompt_builder": {"question": query}, "answer_builder": {"query": query}}
+        if self.pipeline.graph.nodes.get("text_embedder"):
+            run_values["text_embedder"] = {"text": query}
+        else:
+            run_values["retriever"] = {"query": query}
+
+        return self.pipeline.run(run_values)["answer_builder"]["answers"][0]
--- a/releasenotes/notes/rag_pipeline-4e9dfc82a4402935.yaml
+++ b/releasenotes/notes/rag_pipeline-4e9dfc82a4402935.yaml
@ -0,0 +1,4 @@
+---
+features:
+  - |
+    Add a `build_rag_pipeline` utility function
--- a/test/pipelines/test_rag_pipelines.py
+++ b/test/pipelines/test_rag_pipelines.py
@ -0,0 +1,61 @@
+from unittest.mock import patch, Mock
+import pytest
+
+from haystack.dataclasses import Answer
+from haystack.testing.factory import document_store_class
+from haystack.document_stores import InMemoryDocumentStore
+from haystack.pipeline_utils.rag import build_rag_pipeline
+
+
+@pytest.fixture
+def mock_chat_completion():
+    """
+    Mock the OpenAI API completion response and reuse it for tests
+    """
+    with patch("openai.ChatCompletion.create", autospec=True) as mock_chat_completion_create:
+        # mimic the response from the OpenAI API
+        mock_choice = Mock()
+        mock_choice.index = 0
+        mock_choice.finish_reason = "stop"
+
+        mock_message = Mock()
+        mock_message.content = "I'm fine, thanks. How are you?"
+        mock_message.role = "user"
+
+        mock_choice.message = mock_message
+
+        mock_response = Mock()
+        mock_response.model = "gpt-3.5-turbo"
+        mock_response.usage = Mock()
+        mock_response.usage.items.return_value = [
+            ("prompt_tokens", 57),
+            ("completion_tokens", 40),
+            ("total_tokens", 97),
+        ]
+        mock_response.choices = [mock_choice]
+        mock_chat_completion_create.return_value = mock_response
+        yield mock_chat_completion_create
+
+
+def test_rag_pipeline(mock_chat_completion):
+    rag_pipe = build_rag_pipeline(document_store=InMemoryDocumentStore())
+    answer = rag_pipe.run(query="question")
+    assert isinstance(answer, Answer)
+
+
+def test_rag_pipeline_other_docstore():
+    FakeStore = document_store_class("FakeStore")
+    with pytest.raises(ValueError, match="InMemoryDocumentStore"):
+        assert build_rag_pipeline(document_store=FakeStore())
+
+
+def test_rag_pipeline_no_embedder_if_no_model():
+    rag_pipe = build_rag_pipeline(document_store=InMemoryDocumentStore())
+    assert "text_embedder" not in rag_pipe.pipeline.graph.nodes
+
+
+def test_rag_pipeline_embedder_exist_if_model_is_given():
+    rag_pipe = build_rag_pipeline(
+        document_store=InMemoryDocumentStore(), embedding_model="sentence-transformers/all-mpnet-base-v2"
+    )
+    assert "text_embedder" in rag_pipe.pipeline.graph.nodes