haystack/examples/retrievers/in_memory_bm25_rag.py
Massimiliano Pippi e1ec4e5e4d
refact!: Remove symbols under the haystack.document_stores namespace (#6714)
* remove symbols under the haystack.document_stores namespace

* Update haystack/document_stores/types/protocol.py

Co-authored-by: Silvano Cerza <3314350+silvanocerza@users.noreply.github.com>

* fix

* same for retrievers

* leftovers

* more leftovers

* add relnote

* leftovers

* one more

* fix examples

---------

Co-authored-by: Silvano Cerza <3314350+silvanocerza@users.noreply.github.com>
2024-01-10 21:20:42 +01:00

55 lines
2.2 KiB
Python

import os
from pathlib import Path
from haystack import Document
from haystack import Pipeline
from haystack.components.builders.answer_builder import AnswerBuilder
from haystack.components.builders.prompt_builder import PromptBuilder
from haystack.components.generators import OpenAIGenerator
from haystack.components.retrievers.in_memory import InMemoryBM25Retriever
from haystack.document_stores.in_memory import InMemoryDocumentStore
# Create a RAG query pipeline
prompt_template = """
Given these documents, answer the question.\nDocuments:
{% for doc in documents %}
{{ doc.content }}
{% endfor %}
\nQuestion: {{question}}
\nAnswer:
"""
rag_pipeline = Pipeline()
rag_pipeline.add_component(instance=InMemoryBM25Retriever(document_store=InMemoryDocumentStore()), name="retriever")
rag_pipeline.add_component(instance=PromptBuilder(template=prompt_template), name="prompt_builder")
rag_pipeline.add_component(instance=OpenAIGenerator(api_key=os.environ.get("OPENAI_API_KEY")), name="llm")
rag_pipeline.add_component(instance=AnswerBuilder(), name="answer_builder")
rag_pipeline.connect("retriever", "prompt_builder.documents")
rag_pipeline.connect("prompt_builder", "llm")
rag_pipeline.connect("llm.replies", "answer_builder.replies")
rag_pipeline.connect("llm.meta", "answer_builder.meta")
rag_pipeline.connect("retriever", "answer_builder.documents")
# Draw the pipeline
rag_pipeline.draw(Path("./rag_pipeline.png"))
# Add Documents
documents = [
Document(content="There are over 7,000 languages spoken around the world today."),
Document(
content="Elephants have been observed to behave in a way that indicates a high level of self-awareness, such as recognizing themselves in mirrors."
),
Document(
content="In certain parts of the world, like the Maldives, Puerto Rico, and San Diego, you can witness the phenomenon of bioluminescent waves."
),
]
rag_pipeline.get_component("retriever").document_store.write_documents(documents) # type: ignore
# Run the pipeline
question = "How many languages are there?"
result = rag_pipeline.run(
{"retriever": {"query": question}, "prompt_builder": {"question": question}, "answer_builder": {"query": question}}
)
print(result["answer_builder"]["answers"][0])