From b34c35d982dad6a2bcc9f470d75b09ea2c37e738 Mon Sep 17 00:00:00 2001 From: Timo Moeller Date: Thu, 23 Nov 2023 10:32:54 +0100 Subject: [PATCH] initial (#6355) --- .../in_memory_bm25_documentsearch.py | 28 ++++++++++ .../preview/retrievers/in_memory_bm25_rag.py | 53 +++++++++++++++++++ 2 files changed, 81 insertions(+) create mode 100644 examples/preview/retrievers/in_memory_bm25_documentsearch.py create mode 100644 examples/preview/retrievers/in_memory_bm25_rag.py diff --git a/examples/preview/retrievers/in_memory_bm25_documentsearch.py b/examples/preview/retrievers/in_memory_bm25_documentsearch.py new file mode 100644 index 000000000..e153bbefa --- /dev/null +++ b/examples/preview/retrievers/in_memory_bm25_documentsearch.py @@ -0,0 +1,28 @@ +from haystack.preview import Document +from haystack.preview.components.retrievers import InMemoryBM25Retriever +from haystack.preview.document_stores import InMemoryDocumentStore +from haystack.preview.pipeline import Pipeline + +# Create components and a query pipeline +document_store = InMemoryDocumentStore() +retriever = InMemoryBM25Retriever(document_store=document_store) + +pipeline = Pipeline() +pipeline.add_component(instance=retriever, name="retriever") + +# Add Documents +documents = [ + Document(content="There are over 7,000 languages spoken around the world today."), + Document( + content="Elephants have been observed to behave in a way that indicates a high level of self-awareness, such as recognizing themselves in mirrors." + ), + Document( + content="In certain parts of the world, like the Maldives, Puerto Rico, and San Diego, you can witness the phenomenon of bioluminescent waves." + ), +] +document_store.write_documents(documents) + +# Run the pipeline +result = pipeline.run(data={"retriever": {"query": "How many languages are there?"}}) + +print(result["retriever"]["documents"][0]) diff --git a/examples/preview/retrievers/in_memory_bm25_rag.py b/examples/preview/retrievers/in_memory_bm25_rag.py new file mode 100644 index 000000000..ebb9ec5b0 --- /dev/null +++ b/examples/preview/retrievers/in_memory_bm25_rag.py @@ -0,0 +1,53 @@ +import os + +from haystack.preview import Document +from haystack.preview import Pipeline +from haystack.preview.components.builders.answer_builder import AnswerBuilder +from haystack.preview.components.builders.prompt_builder import PromptBuilder +from haystack.preview.components.generators import GPTGenerator +from haystack.preview.components.retrievers import InMemoryBM25Retriever +from haystack.preview.document_stores import InMemoryDocumentStore + +# Create a RAG query pipeline +prompt_template = """ + Given these documents, answer the question.\nDocuments: + {% for doc in documents %} + {{ doc.content }} + {% endfor %} + + \nQuestion: {{question}} + \nAnswer: + """ + +rag_pipeline = Pipeline() +rag_pipeline.add_component(instance=InMemoryBM25Retriever(document_store=InMemoryDocumentStore()), name="retriever") +rag_pipeline.add_component(instance=PromptBuilder(template=prompt_template), name="prompt_builder") +rag_pipeline.add_component(instance=GPTGenerator(api_key=os.environ.get("OPENAI_API_KEY")), name="llm") +rag_pipeline.add_component(instance=AnswerBuilder(), name="answer_builder") +rag_pipeline.connect("retriever", "prompt_builder.documents") +rag_pipeline.connect("prompt_builder", "llm") +rag_pipeline.connect("llm.replies", "answer_builder.replies") +rag_pipeline.connect("llm.metadata", "answer_builder.metadata") +rag_pipeline.connect("retriever", "answer_builder.documents") + +# Draw the pipeline +rag_pipeline.draw("./rag_pipeline.png") + +# Add Documents +documents = [ + Document(content="There are over 7,000 languages spoken around the world today."), + Document( + content="Elephants have been observed to behave in a way that indicates a high level of self-awareness, such as recognizing themselves in mirrors." + ), + Document( + content="In certain parts of the world, like the Maldives, Puerto Rico, and San Diego, you can witness the phenomenon of bioluminescent waves." + ), +] +rag_pipeline.get_component("retriever").document_store.write_documents(documents) + +# Run the pipeline +question = "How many languages are there?" +result = rag_pipeline.run( + {"retriever": {"query": question}, "prompt_builder": {"question": question}, "answer_builder": {"query": question}} +) +print(result["answer_builder"]["answers"][0])