mirror of
https://github.com/deepset-ai/haystack.git
synced 2026-01-08 04:56:45 +00:00
update and replace recipes (#10021)
This commit is contained in:
parent
df3b13857e
commit
89f420843d
@ -59,9 +59,45 @@ Here, the custom component `WelcomeTextGenerator` accepts one input: `name` stri
|
||||
|
||||
## Extended Example
|
||||
|
||||
Click on the Recipe below to see how to create two custom components and connect them in a Haystack pipeline.
|
||||
Check out an example below on how to create two custom components and connect them in a Haystack pipeline.
|
||||
|
||||
RECIPE MISSING
|
||||
```python
|
||||
# import necessary dependencies
|
||||
from typing import List
|
||||
from haystack import component, Pipeline
|
||||
|
||||
# Create two custom components. Note the mandatory @component decorator and @component.output_types, as well as the mandatory run method.
|
||||
@component
|
||||
class WelcomeTextGenerator:
|
||||
"""
|
||||
A component generating personal welcome message and making it upper case
|
||||
"""
|
||||
@component.output_types(welcome_text=str, note=str)
|
||||
def run(self, name: str):
|
||||
return {"welcome_text": ('Hello {name}, welcome to Haystack!'.format(name=name)).upper(), "note": "welcome message is ready"}
|
||||
|
||||
@component
|
||||
class WhitespaceSplitter:
|
||||
"""
|
||||
A component for splitting the text by whitespace
|
||||
"""
|
||||
@component.output_types(split_text=List[str])
|
||||
def run(self, text: str):
|
||||
return {"split_text": text.split()}
|
||||
|
||||
# create a pipeline and add the custom components to it
|
||||
text_pipeline = Pipeline()
|
||||
text_pipeline.add_component(name="welcome_text_generator", instance=WelcomeTextGenerator())
|
||||
text_pipeline.add_component(name="splitter", instance=WhitespaceSplitter())
|
||||
|
||||
# connect the components
|
||||
text_pipeline.connect(sender="welcome_text_generator.welcome_text", receiver="splitter.text")
|
||||
|
||||
# define the result and run the pipeline
|
||||
result = text_pipeline.run({"welcome_text_generator": {"name": "Bilge"}})
|
||||
|
||||
print(result["splitter"]["split_text"])
|
||||
```
|
||||
|
||||
## Extending the Existing Components
|
||||
|
||||
|
||||
@ -175,11 +175,73 @@ pipeline.run(data=
|
||||
|
||||
## Example
|
||||
|
||||
This recipe walks you through creating a RAG pipeline, explaining the code:
|
||||
The following example walks you through creating a RAG pipeline.
|
||||
|
||||
RECIPE MISSING
|
||||
```python
|
||||
# import necessary dependencies
|
||||
from haystack import Pipeline, Document
|
||||
from haystack.components.generators.chat import OpenAIChatGenerator
|
||||
from haystack.components.retrievers import InMemoryBM25Retriever
|
||||
from haystack.document_stores.in_memory import InMemoryDocumentStore
|
||||
from haystack.components.builders import ChatPromptBuilder
|
||||
from haystack.utils import Secret
|
||||
from haystack.dataclasses import ChatMessage
|
||||
|
||||
# create a document store and write documents to it
|
||||
document_store = InMemoryDocumentStore()
|
||||
document_store.write_documents([
|
||||
Document(content="My name is Jean and I live in Paris."),
|
||||
Document(content="My name is Mark and I live in Berlin."),
|
||||
Document(content="My name is Giorgio and I live in Rome.")
|
||||
])
|
||||
|
||||
# A prompt corresponds to an NLP task and contains instructions for the model. Here, the pipeline will go through each Document to figure out the answer.
|
||||
prompt_template = [
|
||||
ChatMessage.from_system(
|
||||
"""
|
||||
Given these documents, answer the question.
|
||||
Documents:
|
||||
{% for doc in documents %}
|
||||
{{ doc.content }}
|
||||
{% endfor %}
|
||||
Question:
|
||||
"""
|
||||
),
|
||||
ChatMessage.from_user(
|
||||
"{{question}}"
|
||||
),
|
||||
ChatMessage.from_system("Answer:")
|
||||
]
|
||||
|
||||
# create the components adding the necessary parameters
|
||||
retriever = InMemoryBM25Retriever(document_store=document_store)
|
||||
prompt_builder = ChatPromptBuilder(template=prompt_template, required_variables="*")
|
||||
llm = OpenAIChatGenerator(api_key=Secret.from_env_var("OPENAI_API_KEY"), model="gpt-4o-mini")
|
||||
|
||||
# Create the pipeline and add the components to it. The order doesn't matter.
|
||||
# At this stage, the Pipeline validates the components without running them yet.
|
||||
rag_pipeline = Pipeline()
|
||||
rag_pipeline.add_component("retriever", retriever)
|
||||
rag_pipeline.add_component("prompt_builder", prompt_builder)
|
||||
rag_pipeline.add_component("llm", llm)
|
||||
|
||||
# Arrange pipeline components in the order you need them. If a component has more than one inputs or outputs, indicate which input you want to connect to which output using the format ("component_name.output_name", "component_name, input_name").
|
||||
rag_pipeline.connect("retriever", "prompt_builder.documents")
|
||||
rag_pipeline.connect("prompt_builder", "llm")
|
||||
|
||||
# Run the pipeline by specifying the first component in the pipeline and passing its mandatory inputs. Optionally, you can pass inputs to other components.
|
||||
question = "Who lives in paris?"
|
||||
results = rag_pipeline.run(
|
||||
{
|
||||
"retriever": {"query": question},
|
||||
"prompt_builder": {"question": question},
|
||||
}
|
||||
)
|
||||
|
||||
print(results["llm"]["replies"])
|
||||
```
|
||||
|
||||
Here's what a [visualized Mermaid graph](visualizing-pipelines.mdx) of this pipeline would look like:
|
||||
|
||||
<br />
|
||||
<ClickableImage src="/img/5ffb7db-doc.png" alt="Simple RAG pipeline flowchart showing sequential flow from query input through retriever, prompt builder, and language model to final output" size="large" />
|
||||
<ClickableImage src="/img/vizualised-rag-pipeline.png" alt="RAG pipeline diagram with three connected components: InMemoryBM25Retriever receives a query string and outputs documents, ChatPromptBuilder combines the documents with a question input to create prompt messages, and OpenAIChatGenerator processes the messages to produce replies. Each component box displays its class name and optional input parameters." size="large" />
|
||||
|
||||
@ -42,15 +42,16 @@ If you have any questions, please reach out to us on the [GitHub Discussion](htt
|
||||
In the example below, we show how to set an API key using a Haystack [Secret](../concepts/secret-management.mdx). However, for easier use, you can also set an OpenAI key as an `OPENAI_API_KEY` environment variable.
|
||||
|
||||
```python
|
||||
# import necessary dependencies
|
||||
from haystack import Pipeline, Document
|
||||
from haystack.utils import Secret
|
||||
from haystack.document_stores.in_memory import InMemoryDocumentStore
|
||||
from haystack.components.retrievers.in_memory import InMemoryBM25Retriever
|
||||
from haystack.components.generators.chat import OpenAIChatGenerator
|
||||
from haystack.components.builders.chat_prompt_builder import ChatPromptBuilder
|
||||
from haystack.components.retrievers import InMemoryBM25Retriever
|
||||
from haystack.document_stores.in_memory import InMemoryDocumentStore
|
||||
from haystack.components.builders import ChatPromptBuilder
|
||||
from haystack.utils import Secret
|
||||
from haystack.dataclasses import ChatMessage
|
||||
|
||||
## Write documents to InMemoryDocumentStore
|
||||
# create a document store and write documents to it
|
||||
document_store = InMemoryDocumentStore()
|
||||
document_store.write_documents([
|
||||
Document(content="My name is Jean and I live in Paris."),
|
||||
@ -58,31 +59,41 @@ document_store.write_documents([
|
||||
Document(content="My name is Giorgio and I live in Rome.")
|
||||
])
|
||||
|
||||
## Build a RAG pipeline
|
||||
# A prompt corresponds to an NLP task and contains instructions for the model. Here, the pipeline will go through each Document to figure out the answer.
|
||||
prompt_template = [
|
||||
ChatMessage.from_system("You are a helpful assistant."),
|
||||
ChatMessage.from_system(
|
||||
"""
|
||||
Given these documents, answer the question.
|
||||
Documents:
|
||||
{% for doc in documents %}
|
||||
{{ doc.content }}
|
||||
{% endfor %}
|
||||
Question:
|
||||
"""
|
||||
),
|
||||
ChatMessage.from_user(
|
||||
"Given these documents, answer the question.\n"
|
||||
"Documents:\n{% for doc in documents %}{{ doc.content }}{% endfor %}\n"
|
||||
"Question: {{question}}\n"
|
||||
"Answer:"
|
||||
)
|
||||
"{{question}}"
|
||||
),
|
||||
ChatMessage.from_system("Answer:")
|
||||
]
|
||||
|
||||
## Define required variables explicitly
|
||||
prompt_builder = ChatPromptBuilder(template=prompt_template, required_variables={"question", "documents"})
|
||||
|
||||
# create the components adding the necessary parameters
|
||||
retriever = InMemoryBM25Retriever(document_store=document_store)
|
||||
llm = OpenAIChatGenerator(api_key=Secret.from_env_var("OPENAI_API_KEY"))
|
||||
prompt_builder = ChatPromptBuilder(template=prompt_template, required_variables="*")
|
||||
llm = OpenAIChatGenerator(api_key=Secret.from_env_var("OPENAI_API_KEY"), model="gpt-4o-mini")
|
||||
|
||||
# Create the pipeline and add the components to it. The order doesn't matter.
|
||||
# At this stage, the Pipeline validates the components without running them yet.
|
||||
rag_pipeline = Pipeline()
|
||||
rag_pipeline.add_component("retriever", retriever)
|
||||
rag_pipeline.add_component("prompt_builder", prompt_builder)
|
||||
rag_pipeline.add_component("llm", llm)
|
||||
rag_pipeline.connect("retriever", "prompt_builder.documents")
|
||||
rag_pipeline.connect("prompt_builder", "llm.messages")
|
||||
|
||||
## Ask a question
|
||||
# Arrange pipeline components in the order you need them. If a component has more than one inputs or outputs, indicate which input you want to connect to which output using the format ("component_name.output_name", "component_name, input_name").
|
||||
rag_pipeline.connect("retriever", "prompt_builder.documents")
|
||||
rag_pipeline.connect("prompt_builder", "llm")
|
||||
|
||||
# Run the pipeline by specifying the first component in the pipeline and passing its mandatory inputs. Optionally, you can pass inputs to other components.
|
||||
question = "Who lives in Paris?"
|
||||
results = rag_pipeline.run(
|
||||
{
|
||||
@ -95,10 +106,6 @@ print(results["llm"]["replies"])
|
||||
|
||||
```
|
||||
|
||||
Are you curious about what each step does in this code example? Check out the recipe below for details:
|
||||
|
||||
RECIPE MISSING
|
||||
|
||||
### Adding Your Data
|
||||
|
||||
Instead of running the RAG pipeline on example data, learn how you can add your own custom data using [Document Stores](../concepts/document-store.mdx).
|
||||
|
||||
@ -51,6 +51,74 @@ print(docs)
|
||||
|
||||
### In a pipeline
|
||||
|
||||
To best understand how can you use a `TopPSampler` and which components to pair it with, have a look at this recipe:
|
||||
To best understand how can you use a `TopPSampler` and which components to pair it with, explore the following example.
|
||||
|
||||
RECIPE MISSING
|
||||
```python
|
||||
# import necessary dependencies
|
||||
from haystack import Pipeline
|
||||
from haystack.components.builders import ChatPromptBuilder
|
||||
from haystack.components.fetchers import LinkContentFetcher
|
||||
from haystack.components.converters import HTMLToDocument
|
||||
from haystack.components.generators.chat import OpenAIChatGenerator
|
||||
from haystack.components.preprocessors import DocumentSplitter
|
||||
from haystack.components.rankers import SentenceTransformersSimilarityRanker
|
||||
from haystack.components.routers.file_type_router import FileTypeRouter
|
||||
from haystack.components.samplers import TopPSampler
|
||||
from haystack.components.websearch import SerperDevWebSearch
|
||||
from haystack.utils import Secret
|
||||
from haystack.dataclasses import ChatMessage
|
||||
|
||||
# initialize the components
|
||||
web_search = SerperDevWebSearch(
|
||||
api_key=Secret.from_token("<your-api-key>"),
|
||||
top_k=10
|
||||
)
|
||||
|
||||
lcf = LinkContentFetcher()
|
||||
html_converter = HTMLToDocument()
|
||||
router = FileTypeRouter(["text/html", "application/pdf", "application/octet-stream"])
|
||||
|
||||
# ChatPromptBuilder uses a different template format with ChatMessage
|
||||
template = [
|
||||
ChatMessage.from_user("Given these paragraphs below: \n {% for doc in documents %}{{ doc.content }}{% endfor %}\n\nAnswer the question: {{ query }}")
|
||||
]
|
||||
# set required_variables to avoid warnings in multi-branch pipelines
|
||||
prompt_builder = ChatPromptBuilder(template=template, required_variables=["documents", "query"])
|
||||
|
||||
# The Ranker plays an important role, as it will assign the scores to the top 10 found documents based on our query. We will need these scores to work with the TopPSampler.
|
||||
similarity_ranker = SentenceTransformersSimilarityRanker(top_k=10)
|
||||
splitter = DocumentSplitter()
|
||||
# We are setting the top_p parameter to 0.95. This will help identify the most relevant documents to our query.
|
||||
top_p_sampler = TopPSampler(top_p=0.95)
|
||||
|
||||
llm = OpenAIChatGenerator(api_key=Secret.from_token("<your-api-key>"))
|
||||
|
||||
# create the pipeline and add the components to it
|
||||
pipe = Pipeline()
|
||||
pipe.add_component("search", web_search)
|
||||
pipe.add_component("fetcher", lcf)
|
||||
pipe.add_component("router", router)
|
||||
pipe.add_component("converter", html_converter)
|
||||
pipe.add_component("splitter", splitter)
|
||||
pipe.add_component("ranker", similarity_ranker)
|
||||
pipe.add_component("sampler", top_p_sampler)
|
||||
pipe.add_component("prompt_builder", prompt_builder)
|
||||
pipe.add_component("llm", llm)
|
||||
|
||||
# Arrange pipeline components in the order you need them. If a component has more than one inputs or outputs, indicate which input you want to connect to which output using the format ("component_name.output_name", "component_name, input_name").
|
||||
pipe.connect("search.links", "fetcher.urls")
|
||||
pipe.connect("fetcher.streams", "router.sources")
|
||||
pipe.connect("router.text/html", "converter.sources")
|
||||
pipe.connect("converter.documents", "splitter.documents")
|
||||
pipe.connect("splitter.documents", "ranker.documents")
|
||||
pipe.connect("ranker.documents", "sampler.documents")
|
||||
pipe.connect("sampler.documents", "prompt_builder.documents")
|
||||
pipe.connect("prompt_builder.prompt", "llm.messages")
|
||||
|
||||
# run the pipeline
|
||||
question = "Why are cats afraid of cucumbers?"
|
||||
query_dict = {"query": question}
|
||||
|
||||
result = pipe.run(data={"search": query_dict, "prompt_builder": query_dict, "ranker": query_dict})
|
||||
print(result)
|
||||
```
|
||||
|
||||
Binary file not shown.
|
Before Width: | Height: | Size: 56 KiB |
BIN
docs-website/static/img/vizualised-rag-pipeline.png
Normal file
BIN
docs-website/static/img/vizualised-rag-pipeline.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 72 KiB |
@ -59,9 +59,45 @@ Here, the custom component `WelcomeTextGenerator` accepts one input: `name` stri
|
||||
|
||||
## Extended Example
|
||||
|
||||
Click on the Recipe below to see how to create two custom components and connect them in a Haystack pipeline.
|
||||
Check out an example below on how to create two custom components and connect them in a Haystack pipeline.
|
||||
|
||||
RECIPE MISSING
|
||||
```python
|
||||
# import necessary dependencies
|
||||
from typing import List
|
||||
from haystack import component, Pipeline
|
||||
|
||||
# Create two custom components. Note the mandatory @component decorator and @component.output_types, as well as the mandatory run method.
|
||||
@component
|
||||
class WelcomeTextGenerator:
|
||||
"""
|
||||
A component generating personal welcome message and making it upper case
|
||||
"""
|
||||
@component.output_types(welcome_text=str, note=str)
|
||||
def run(self, name: str):
|
||||
return {"welcome_text": ('Hello {name}, welcome to Haystack!'.format(name=name)).upper(), "note": "welcome message is ready"}
|
||||
|
||||
@component
|
||||
class WhitespaceSplitter:
|
||||
"""
|
||||
A component for splitting the text by whitespace
|
||||
"""
|
||||
@component.output_types(split_text=List[str])
|
||||
def run(self, text: str):
|
||||
return {"split_text": text.split()}
|
||||
|
||||
# create a pipeline and add the custom components to it
|
||||
text_pipeline = Pipeline()
|
||||
text_pipeline.add_component(name="welcome_text_generator", instance=WelcomeTextGenerator())
|
||||
text_pipeline.add_component(name="splitter", instance=WhitespaceSplitter())
|
||||
|
||||
# connect the components
|
||||
text_pipeline.connect(sender="welcome_text_generator.welcome_text", receiver="splitter.text")
|
||||
|
||||
# define the result and run the pipeline
|
||||
result = text_pipeline.run({"welcome_text_generator": {"name": "Bilge"}})
|
||||
|
||||
print(result["splitter"]["split_text"])
|
||||
```
|
||||
|
||||
## Extending the Existing Components
|
||||
|
||||
|
||||
@ -153,14 +153,14 @@ pipeline.inputs()
|
||||
|
||||
## {'fetcher': {'urls': {'type': typing.List[str], 'is_mandatory': True}},
|
||||
## 'converter': {'meta': {'type': typing.Union[typing.Dict[str, typing.Any], typing.List[typing.Dict[str, typing.Any]], NoneType],
|
||||
## 'is_mandatory': False,
|
||||
## 'default_value': None},
|
||||
## 'extraction_kwargs': {'type': typing.Optional[typing.Dict[str, typing.Any]],
|
||||
## 'is_mandatory': False,
|
||||
## 'default_value': None}},
|
||||
## 'is_mandatory': False,
|
||||
## 'default_value': None},
|
||||
## 'extraction_kwargs': {'type': typing.Optional[typing.Dict[str, typing.Any]],
|
||||
## 'is_mandatory': False,
|
||||
## 'default_value': None}},
|
||||
## 'writer': {'policy': {'type': typing.Optional[haystack.document_stores.types.policy.DuplicatePolicy],
|
||||
## 'is_mandatory': False,
|
||||
## 'default_value': None}}}
|
||||
## 'is_mandatory': False,
|
||||
## 'default_value': None}}}
|
||||
```
|
||||
|
||||
From the above response, you can see that the `urls` input is mandatory for `LinkContentFetcher`. This is how you would then run this pipeline:
|
||||
@ -175,12 +175,73 @@ pipeline.run(data=
|
||||
|
||||
## Example
|
||||
|
||||
This recipe walks you through creating a RAG pipeline, explaining the code:
|
||||
The following example walks you through creating a RAG pipeline.
|
||||
|
||||
RECIPE MISSING
|
||||
```python
|
||||
# import necessary dependencies
|
||||
from haystack import Pipeline, Document
|
||||
from haystack.components.generators.chat import OpenAIChatGenerator
|
||||
from haystack.components.retrievers import InMemoryBM25Retriever
|
||||
from haystack.document_stores.in_memory import InMemoryDocumentStore
|
||||
from haystack.components.builders import ChatPromptBuilder
|
||||
from haystack.utils import Secret
|
||||
from haystack.dataclasses import ChatMessage
|
||||
|
||||
# create a document store and write documents to it
|
||||
document_store = InMemoryDocumentStore()
|
||||
document_store.write_documents([
|
||||
Document(content="My name is Jean and I live in Paris."),
|
||||
Document(content="My name is Mark and I live in Berlin."),
|
||||
Document(content="My name is Giorgio and I live in Rome.")
|
||||
])
|
||||
|
||||
# A prompt corresponds to an NLP task and contains instructions for the model. Here, the pipeline will go through each Document to figure out the answer.
|
||||
prompt_template = [
|
||||
ChatMessage.from_system(
|
||||
"""
|
||||
Given these documents, answer the question.
|
||||
Documents:
|
||||
{% for doc in documents %}
|
||||
{{ doc.content }}
|
||||
{% endfor %}
|
||||
Question:
|
||||
"""
|
||||
),
|
||||
ChatMessage.from_user(
|
||||
"{{question}}"
|
||||
),
|
||||
ChatMessage.from_system("Answer:")
|
||||
]
|
||||
|
||||
# create the components adding the necessary parameters
|
||||
retriever = InMemoryBM25Retriever(document_store=document_store)
|
||||
prompt_builder = ChatPromptBuilder(template=prompt_template, required_variables="*")
|
||||
llm = OpenAIChatGenerator(api_key=Secret.from_env_var("OPENAI_API_KEY"), model="gpt-4o-mini")
|
||||
|
||||
# Create the pipeline and add the components to it. The order doesn't matter.
|
||||
# At this stage, the Pipeline validates the components without running them yet.
|
||||
rag_pipeline = Pipeline()
|
||||
rag_pipeline.add_component("retriever", retriever)
|
||||
rag_pipeline.add_component("prompt_builder", prompt_builder)
|
||||
rag_pipeline.add_component("llm", llm)
|
||||
|
||||
# Arrange pipeline components in the order you need them. If a component has more than one inputs or outputs, indicate which input you want to connect to which output using the format ("component_name.output_name", "component_name, input_name").
|
||||
rag_pipeline.connect("retriever", "prompt_builder.documents")
|
||||
rag_pipeline.connect("prompt_builder", "llm")
|
||||
|
||||
# Run the pipeline by specifying the first component in the pipeline and passing its mandatory inputs. Optionally, you can pass inputs to other components.
|
||||
question = "Who lives in paris?"
|
||||
results = rag_pipeline.run(
|
||||
{
|
||||
"retriever": {"query": question},
|
||||
"prompt_builder": {"question": question},
|
||||
}
|
||||
)
|
||||
|
||||
print(results["llm"]["replies"])
|
||||
```
|
||||
|
||||
Here's what a [visualized Mermaid graph](visualizing-pipelines.mdx) of this pipeline would look like:
|
||||
|
||||
<br />
|
||||
|
||||
<ClickableImage src="/img/5ffb7db-doc.png" alt="Simple RAG pipeline flowchart showing sequential flow from query input through retriever, prompt builder, and language model to final output" size="large" />
|
||||
<ClickableImage src="/img/vizualised-rag-pipeline.png" alt="RAG pipeline diagram with three connected components: InMemoryBM25Retriever receives a query string and outputs documents, ChatPromptBuilder combines the documents with a question input to create prompt messages, and OpenAIChatGenerator processes the messages to produce replies. Each component box displays its class name and optional input parameters." size="large" />
|
||||
|
||||
@ -42,15 +42,16 @@ If you have any questions, please reach out to us on the [GitHub Discussion](htt
|
||||
In the example below, we show how to set an API key using a Haystack [Secret](../concepts/secret-management.mdx). However, for easier use, you can also set an OpenAI key as an `OPENAI_API_KEY` environment variable.
|
||||
|
||||
```python
|
||||
# import necessary dependencies
|
||||
from haystack import Pipeline, Document
|
||||
from haystack.utils import Secret
|
||||
from haystack.document_stores.in_memory import InMemoryDocumentStore
|
||||
from haystack.components.retrievers.in_memory import InMemoryBM25Retriever
|
||||
from haystack.components.generators.chat import OpenAIChatGenerator
|
||||
from haystack.components.builders.chat_prompt_builder import ChatPromptBuilder
|
||||
from haystack.components.retrievers import InMemoryBM25Retriever
|
||||
from haystack.document_stores.in_memory import InMemoryDocumentStore
|
||||
from haystack.components.builders import ChatPromptBuilder
|
||||
from haystack.utils import Secret
|
||||
from haystack.dataclasses import ChatMessage
|
||||
|
||||
## Write documents to InMemoryDocumentStore
|
||||
# create a document store and write documents to it
|
||||
document_store = InMemoryDocumentStore()
|
||||
document_store.write_documents([
|
||||
Document(content="My name is Jean and I live in Paris."),
|
||||
@ -58,31 +59,41 @@ document_store.write_documents([
|
||||
Document(content="My name is Giorgio and I live in Rome.")
|
||||
])
|
||||
|
||||
## Build a RAG pipeline
|
||||
# A prompt corresponds to an NLP task and contains instructions for the model. Here, the pipeline will go through each Document to figure out the answer.
|
||||
prompt_template = [
|
||||
ChatMessage.from_system("You are a helpful assistant."),
|
||||
ChatMessage.from_system(
|
||||
"""
|
||||
Given these documents, answer the question.
|
||||
Documents:
|
||||
{% for doc in documents %}
|
||||
{{ doc.content }}
|
||||
{% endfor %}
|
||||
Question:
|
||||
"""
|
||||
),
|
||||
ChatMessage.from_user(
|
||||
"Given these documents, answer the question.\n"
|
||||
"Documents:\n{% for doc in documents %}{{ doc.content }}{% endfor %}\n"
|
||||
"Question: {{question}}\n"
|
||||
"Answer:"
|
||||
)
|
||||
"{{question}}"
|
||||
),
|
||||
ChatMessage.from_system("Answer:")
|
||||
]
|
||||
|
||||
## Define required variables explicitly
|
||||
prompt_builder = ChatPromptBuilder(template=prompt_template, required_variables={"question", "documents"})
|
||||
|
||||
# create the components adding the necessary parameters
|
||||
retriever = InMemoryBM25Retriever(document_store=document_store)
|
||||
llm = OpenAIChatGenerator(api_key=Secret.from_env_var("OPENAI_API_KEY"))
|
||||
prompt_builder = ChatPromptBuilder(template=prompt_template, required_variables="*")
|
||||
llm = OpenAIChatGenerator(api_key=Secret.from_env_var("OPENAI_API_KEY"), model="gpt-4o-mini")
|
||||
|
||||
# Create the pipeline and add the components to it. The order doesn't matter.
|
||||
# At this stage, the Pipeline validates the components without running them yet.
|
||||
rag_pipeline = Pipeline()
|
||||
rag_pipeline.add_component("retriever", retriever)
|
||||
rag_pipeline.add_component("prompt_builder", prompt_builder)
|
||||
rag_pipeline.add_component("llm", llm)
|
||||
rag_pipeline.connect("retriever", "prompt_builder.documents")
|
||||
rag_pipeline.connect("prompt_builder", "llm.messages")
|
||||
|
||||
## Ask a question
|
||||
# Arrange pipeline components in the order you need them. If a component has more than one inputs or outputs, indicate which input you want to connect to which output using the format ("component_name.output_name", "component_name, input_name").
|
||||
rag_pipeline.connect("retriever", "prompt_builder.documents")
|
||||
rag_pipeline.connect("prompt_builder", "llm")
|
||||
|
||||
# Run the pipeline by specifying the first component in the pipeline and passing its mandatory inputs. Optionally, you can pass inputs to other components.
|
||||
question = "Who lives in Paris?"
|
||||
results = rag_pipeline.run(
|
||||
{
|
||||
@ -95,10 +106,6 @@ print(results["llm"]["replies"])
|
||||
|
||||
```
|
||||
|
||||
Are you curious about what each step does in this code example? Check out the recipe below for details:
|
||||
|
||||
RECIPE MISSING
|
||||
|
||||
### Adding Your Data
|
||||
|
||||
Instead of running the RAG pipeline on example data, learn how you can add your own custom data using [Document Stores](../concepts/document-store.mdx).
|
||||
|
||||
@ -47,6 +47,74 @@ print(docs)
|
||||
|
||||
### In a pipeline
|
||||
|
||||
To best understand how can you use a `TopPSampler` and which components to pair it with, have a look at this recipe:
|
||||
To best understand how can you use a `TopPSampler` and which components to pair it with, explore the following example.
|
||||
|
||||
RECIPE MISSING
|
||||
```python
|
||||
# import necessary dependencies
|
||||
from haystack import Pipeline
|
||||
from haystack.components.builders import ChatPromptBuilder
|
||||
from haystack.components.fetchers import LinkContentFetcher
|
||||
from haystack.components.converters import HTMLToDocument
|
||||
from haystack.components.generators.chat import OpenAIChatGenerator
|
||||
from haystack.components.preprocessors import DocumentSplitter
|
||||
from haystack.components.rankers import SentenceTransformersSimilarityRanker
|
||||
from haystack.components.routers.file_type_router import FileTypeRouter
|
||||
from haystack.components.samplers import TopPSampler
|
||||
from haystack.components.websearch import SerperDevWebSearch
|
||||
from haystack.utils import Secret
|
||||
from haystack.dataclasses import ChatMessage
|
||||
|
||||
# initialize the components
|
||||
web_search = SerperDevWebSearch(
|
||||
api_key=Secret.from_token("<your-api-key>"),
|
||||
top_k=10
|
||||
)
|
||||
|
||||
lcf = LinkContentFetcher()
|
||||
html_converter = HTMLToDocument()
|
||||
router = FileTypeRouter(["text/html", "application/pdf", "application/octet-stream"])
|
||||
|
||||
# ChatPromptBuilder uses a different template format with ChatMessage
|
||||
template = [
|
||||
ChatMessage.from_user("Given these paragraphs below: \n {% for doc in documents %}{{ doc.content }}{% endfor %}\n\nAnswer the question: {{ query }}")
|
||||
]
|
||||
# set required_variables to avoid warnings in multi-branch pipelines
|
||||
prompt_builder = ChatPromptBuilder(template=template, required_variables=["documents", "query"])
|
||||
|
||||
# The Ranker plays an important role, as it will assign the scores to the top 10 found documents based on our query. We will need these scores to work with the TopPSampler.
|
||||
similarity_ranker = SentenceTransformersSimilarityRanker(top_k=10)
|
||||
splitter = DocumentSplitter()
|
||||
# We are setting the top_p parameter to 0.95. This will help identify the most relevant documents to our query.
|
||||
top_p_sampler = TopPSampler(top_p=0.95)
|
||||
|
||||
llm = OpenAIChatGenerator(api_key=Secret.from_token("<your-api-key>"))
|
||||
|
||||
# create the pipeline and add the components to it
|
||||
pipe = Pipeline()
|
||||
pipe.add_component("search", web_search)
|
||||
pipe.add_component("fetcher", lcf)
|
||||
pipe.add_component("router", router)
|
||||
pipe.add_component("converter", html_converter)
|
||||
pipe.add_component("splitter", splitter)
|
||||
pipe.add_component("ranker", similarity_ranker)
|
||||
pipe.add_component("sampler", top_p_sampler)
|
||||
pipe.add_component("prompt_builder", prompt_builder)
|
||||
pipe.add_component("llm", llm)
|
||||
|
||||
# Arrange pipeline components in the order you need them. If a component has more than one inputs or outputs, indicate which input you want to connect to which output using the format ("component_name.output_name", "component_name, input_name").
|
||||
pipe.connect("search.links", "fetcher.urls")
|
||||
pipe.connect("fetcher.streams", "router.sources")
|
||||
pipe.connect("router.text/html", "converter.sources")
|
||||
pipe.connect("converter.documents", "splitter.documents")
|
||||
pipe.connect("splitter.documents", "ranker.documents")
|
||||
pipe.connect("ranker.documents", "sampler.documents")
|
||||
pipe.connect("sampler.documents", "prompt_builder.documents")
|
||||
pipe.connect("prompt_builder.prompt", "llm.messages")
|
||||
|
||||
# run the pipeline
|
||||
question = "Why are cats afraid of cucumbers?"
|
||||
query_dict = {"query": question}
|
||||
|
||||
result = pipe.run(data={"search": query_dict, "prompt_builder": query_dict, "ranker": query_dict})
|
||||
print(result)
|
||||
```
|
||||
|
||||
@ -59,9 +59,45 @@ Here, the custom component `WelcomeTextGenerator` accepts one input: `name` stri
|
||||
|
||||
## Extended Example
|
||||
|
||||
Click on the Recipe below to see how to create two custom components and connect them in a Haystack pipeline.
|
||||
Check out an example below on how to create two custom components and connect them in a Haystack pipeline.
|
||||
|
||||
RECIPE MISSING
|
||||
```python
|
||||
# import necessary dependencies
|
||||
from typing import List
|
||||
from haystack import component, Pipeline
|
||||
|
||||
# Create two custom components. Note the mandatory @component decorator and @component.output_types, as well as the mandatory run method.
|
||||
@component
|
||||
class WelcomeTextGenerator:
|
||||
"""
|
||||
A component generating personal welcome message and making it upper case
|
||||
"""
|
||||
@component.output_types(welcome_text=str, note=str)
|
||||
def run(self, name: str):
|
||||
return {"welcome_text": ('Hello {name}, welcome to Haystack!'.format(name=name)).upper(), "note": "welcome message is ready"}
|
||||
|
||||
@component
|
||||
class WhitespaceSplitter:
|
||||
"""
|
||||
A component for splitting the text by whitespace
|
||||
"""
|
||||
@component.output_types(split_text=List[str])
|
||||
def run(self, text: str):
|
||||
return {"split_text": text.split()}
|
||||
|
||||
# create a pipeline and add the custom components to it
|
||||
text_pipeline = Pipeline()
|
||||
text_pipeline.add_component(name="welcome_text_generator", instance=WelcomeTextGenerator())
|
||||
text_pipeline.add_component(name="splitter", instance=WhitespaceSplitter())
|
||||
|
||||
# connect the components
|
||||
text_pipeline.connect(sender="welcome_text_generator.welcome_text", receiver="splitter.text")
|
||||
|
||||
# define the result and run the pipeline
|
||||
result = text_pipeline.run({"welcome_text_generator": {"name": "Bilge"}})
|
||||
|
||||
print(result["splitter"]["split_text"])
|
||||
```
|
||||
|
||||
## Extending the Existing Components
|
||||
|
||||
|
||||
@ -153,14 +153,14 @@ pipeline.inputs()
|
||||
|
||||
## {'fetcher': {'urls': {'type': typing.List[str], 'is_mandatory': True}},
|
||||
## 'converter': {'meta': {'type': typing.Union[typing.Dict[str, typing.Any], typing.List[typing.Dict[str, typing.Any]], NoneType],
|
||||
## 'is_mandatory': False,
|
||||
## 'default_value': None},
|
||||
## 'extraction_kwargs': {'type': typing.Optional[typing.Dict[str, typing.Any]],
|
||||
## 'is_mandatory': False,
|
||||
## 'default_value': None}},
|
||||
## 'is_mandatory': False,
|
||||
## 'default_value': None},
|
||||
## 'extraction_kwargs': {'type': typing.Optional[typing.Dict[str, typing.Any]],
|
||||
## 'is_mandatory': False,
|
||||
## 'default_value': None}},
|
||||
## 'writer': {'policy': {'type': typing.Optional[haystack.document_stores.types.policy.DuplicatePolicy],
|
||||
## 'is_mandatory': False,
|
||||
## 'default_value': None}}}
|
||||
## 'is_mandatory': False,
|
||||
## 'default_value': None}}}
|
||||
```
|
||||
|
||||
From the above response, you can see that the `urls` input is mandatory for `LinkContentFetcher`. This is how you would then run this pipeline:
|
||||
@ -175,12 +175,73 @@ pipeline.run(data=
|
||||
|
||||
## Example
|
||||
|
||||
This recipe walks you through creating a RAG pipeline, explaining the code:
|
||||
The following example walks you through creating a RAG pipeline.
|
||||
|
||||
RECIPE MISSING
|
||||
```python
|
||||
# import necessary dependencies
|
||||
from haystack import Pipeline, Document
|
||||
from haystack.components.generators.chat import OpenAIChatGenerator
|
||||
from haystack.components.retrievers import InMemoryBM25Retriever
|
||||
from haystack.document_stores.in_memory import InMemoryDocumentStore
|
||||
from haystack.components.builders import ChatPromptBuilder
|
||||
from haystack.utils import Secret
|
||||
from haystack.dataclasses import ChatMessage
|
||||
|
||||
# create a document store and write documents to it
|
||||
document_store = InMemoryDocumentStore()
|
||||
document_store.write_documents([
|
||||
Document(content="My name is Jean and I live in Paris."),
|
||||
Document(content="My name is Mark and I live in Berlin."),
|
||||
Document(content="My name is Giorgio and I live in Rome.")
|
||||
])
|
||||
|
||||
# A prompt corresponds to an NLP task and contains instructions for the model. Here, the pipeline will go through each Document to figure out the answer.
|
||||
prompt_template = [
|
||||
ChatMessage.from_system(
|
||||
"""
|
||||
Given these documents, answer the question.
|
||||
Documents:
|
||||
{% for doc in documents %}
|
||||
{{ doc.content }}
|
||||
{% endfor %}
|
||||
Question:
|
||||
"""
|
||||
),
|
||||
ChatMessage.from_user(
|
||||
"{{question}}"
|
||||
),
|
||||
ChatMessage.from_system("Answer:")
|
||||
]
|
||||
|
||||
# create the components adding the necessary parameters
|
||||
retriever = InMemoryBM25Retriever(document_store=document_store)
|
||||
prompt_builder = ChatPromptBuilder(template=prompt_template, required_variables="*")
|
||||
llm = OpenAIChatGenerator(api_key=Secret.from_env_var("OPENAI_API_KEY"), model="gpt-4o-mini")
|
||||
|
||||
# Create the pipeline and add the components to it. The order doesn't matter.
|
||||
# At this stage, the Pipeline validates the components without running them yet.
|
||||
rag_pipeline = Pipeline()
|
||||
rag_pipeline.add_component("retriever", retriever)
|
||||
rag_pipeline.add_component("prompt_builder", prompt_builder)
|
||||
rag_pipeline.add_component("llm", llm)
|
||||
|
||||
# Arrange pipeline components in the order you need them. If a component has more than one inputs or outputs, indicate which input you want to connect to which output using the format ("component_name.output_name", "component_name, input_name").
|
||||
rag_pipeline.connect("retriever", "prompt_builder.documents")
|
||||
rag_pipeline.connect("prompt_builder", "llm")
|
||||
|
||||
# Run the pipeline by specifying the first component in the pipeline and passing its mandatory inputs. Optionally, you can pass inputs to other components.
|
||||
question = "Who lives in paris?"
|
||||
results = rag_pipeline.run(
|
||||
{
|
||||
"retriever": {"query": question},
|
||||
"prompt_builder": {"question": question},
|
||||
}
|
||||
)
|
||||
|
||||
print(results["llm"]["replies"])
|
||||
```
|
||||
|
||||
Here's what a [visualized Mermaid graph](visualizing-pipelines.mdx) of this pipeline would look like:
|
||||
|
||||
<br />
|
||||
|
||||
<ClickableImage src="/img/5ffb7db-doc.png" alt="Simple RAG pipeline flowchart showing sequential flow from query input through retriever, prompt builder, and language model to final output" size="large" />
|
||||
<ClickableImage src="/img/vizualised-rag-pipeline.png" alt="RAG pipeline diagram with three connected components: InMemoryBM25Retriever receives a query string and outputs documents, ChatPromptBuilder combines the documents with a question input to create prompt messages, and OpenAIChatGenerator processes the messages to produce replies. Each component box displays its class name and optional input parameters." size="large" />
|
||||
|
||||
@ -42,15 +42,16 @@ If you have any questions, please reach out to us on the [GitHub Discussion](htt
|
||||
In the example below, we show how to set an API key using a Haystack [Secret](../concepts/secret-management.mdx). However, for easier use, you can also set an OpenAI key as an `OPENAI_API_KEY` environment variable.
|
||||
|
||||
```python
|
||||
# import necessary dependencies
|
||||
from haystack import Pipeline, Document
|
||||
from haystack.utils import Secret
|
||||
from haystack.document_stores.in_memory import InMemoryDocumentStore
|
||||
from haystack.components.retrievers.in_memory import InMemoryBM25Retriever
|
||||
from haystack.components.generators.chat import OpenAIChatGenerator
|
||||
from haystack.components.builders.chat_prompt_builder import ChatPromptBuilder
|
||||
from haystack.components.retrievers import InMemoryBM25Retriever
|
||||
from haystack.document_stores.in_memory import InMemoryDocumentStore
|
||||
from haystack.components.builders import ChatPromptBuilder
|
||||
from haystack.utils import Secret
|
||||
from haystack.dataclasses import ChatMessage
|
||||
|
||||
## Write documents to InMemoryDocumentStore
|
||||
# create a document store and write documents to it
|
||||
document_store = InMemoryDocumentStore()
|
||||
document_store.write_documents([
|
||||
Document(content="My name is Jean and I live in Paris."),
|
||||
@ -58,31 +59,41 @@ document_store.write_documents([
|
||||
Document(content="My name is Giorgio and I live in Rome.")
|
||||
])
|
||||
|
||||
## Build a RAG pipeline
|
||||
# A prompt corresponds to an NLP task and contains instructions for the model. Here, the pipeline will go through each Document to figure out the answer.
|
||||
prompt_template = [
|
||||
ChatMessage.from_system("You are a helpful assistant."),
|
||||
ChatMessage.from_system(
|
||||
"""
|
||||
Given these documents, answer the question.
|
||||
Documents:
|
||||
{% for doc in documents %}
|
||||
{{ doc.content }}
|
||||
{% endfor %}
|
||||
Question:
|
||||
"""
|
||||
),
|
||||
ChatMessage.from_user(
|
||||
"Given these documents, answer the question.\n"
|
||||
"Documents:\n{% for doc in documents %}{{ doc.content }}{% endfor %}\n"
|
||||
"Question: {{question}}\n"
|
||||
"Answer:"
|
||||
)
|
||||
"{{question}}"
|
||||
),
|
||||
ChatMessage.from_system("Answer:")
|
||||
]
|
||||
|
||||
## Define required variables explicitly
|
||||
prompt_builder = ChatPromptBuilder(template=prompt_template, required_variables={"question", "documents"})
|
||||
|
||||
# create the components adding the necessary parameters
|
||||
retriever = InMemoryBM25Retriever(document_store=document_store)
|
||||
llm = OpenAIChatGenerator(api_key=Secret.from_env_var("OPENAI_API_KEY"))
|
||||
prompt_builder = ChatPromptBuilder(template=prompt_template, required_variables="*")
|
||||
llm = OpenAIChatGenerator(api_key=Secret.from_env_var("OPENAI_API_KEY"), model="gpt-4o-mini")
|
||||
|
||||
# Create the pipeline and add the components to it. The order doesn't matter.
|
||||
# At this stage, the Pipeline validates the components without running them yet.
|
||||
rag_pipeline = Pipeline()
|
||||
rag_pipeline.add_component("retriever", retriever)
|
||||
rag_pipeline.add_component("prompt_builder", prompt_builder)
|
||||
rag_pipeline.add_component("llm", llm)
|
||||
rag_pipeline.connect("retriever", "prompt_builder.documents")
|
||||
rag_pipeline.connect("prompt_builder", "llm.messages")
|
||||
|
||||
## Ask a question
|
||||
# Arrange pipeline components in the order you need them. If a component has more than one inputs or outputs, indicate which input you want to connect to which output using the format ("component_name.output_name", "component_name, input_name").
|
||||
rag_pipeline.connect("retriever", "prompt_builder.documents")
|
||||
rag_pipeline.connect("prompt_builder", "llm")
|
||||
|
||||
# Run the pipeline by specifying the first component in the pipeline and passing its mandatory inputs. Optionally, you can pass inputs to other components.
|
||||
question = "Who lives in Paris?"
|
||||
results = rag_pipeline.run(
|
||||
{
|
||||
@ -95,10 +106,6 @@ print(results["llm"]["replies"])
|
||||
|
||||
```
|
||||
|
||||
Are you curious about what each step does in this code example? Check out the recipe below for details:
|
||||
|
||||
RECIPE MISSING
|
||||
|
||||
### Adding Your Data
|
||||
|
||||
Instead of running the RAG pipeline on example data, learn how you can add your own custom data using [Document Stores](../concepts/document-store.mdx).
|
||||
|
||||
@ -51,6 +51,74 @@ print(docs)
|
||||
|
||||
### In a pipeline
|
||||
|
||||
To best understand how can you use a `TopPSampler` and which components to pair it with, have a look at this recipe:
|
||||
To best understand how can you use a `TopPSampler` and which components to pair it with, explore the following example.
|
||||
|
||||
RECIPE MISSING
|
||||
```python
|
||||
# import necessary dependencies
|
||||
from haystack import Pipeline
|
||||
from haystack.components.builders import ChatPromptBuilder
|
||||
from haystack.components.fetchers import LinkContentFetcher
|
||||
from haystack.components.converters import HTMLToDocument
|
||||
from haystack.components.generators.chat import OpenAIChatGenerator
|
||||
from haystack.components.preprocessors import DocumentSplitter
|
||||
from haystack.components.rankers import SentenceTransformersSimilarityRanker
|
||||
from haystack.components.routers.file_type_router import FileTypeRouter
|
||||
from haystack.components.samplers import TopPSampler
|
||||
from haystack.components.websearch import SerperDevWebSearch
|
||||
from haystack.utils import Secret
|
||||
from haystack.dataclasses import ChatMessage
|
||||
|
||||
# initialize the components
|
||||
web_search = SerperDevWebSearch(
|
||||
api_key=Secret.from_token("<your-api-key>"),
|
||||
top_k=10
|
||||
)
|
||||
|
||||
lcf = LinkContentFetcher()
|
||||
html_converter = HTMLToDocument()
|
||||
router = FileTypeRouter(["text/html", "application/pdf", "application/octet-stream"])
|
||||
|
||||
# ChatPromptBuilder uses a different template format with ChatMessage
|
||||
template = [
|
||||
ChatMessage.from_user("Given these paragraphs below: \n {% for doc in documents %}{{ doc.content }}{% endfor %}\n\nAnswer the question: {{ query }}")
|
||||
]
|
||||
# set required_variables to avoid warnings in multi-branch pipelines
|
||||
prompt_builder = ChatPromptBuilder(template=template, required_variables=["documents", "query"])
|
||||
|
||||
# The Ranker plays an important role, as it will assign the scores to the top 10 found documents based on our query. We will need these scores to work with the TopPSampler.
|
||||
similarity_ranker = SentenceTransformersSimilarityRanker(top_k=10)
|
||||
splitter = DocumentSplitter()
|
||||
# We are setting the top_p parameter to 0.95. This will help identify the most relevant documents to our query.
|
||||
top_p_sampler = TopPSampler(top_p=0.95)
|
||||
|
||||
llm = OpenAIChatGenerator(api_key=Secret.from_token("<your-api-key>"))
|
||||
|
||||
# create the pipeline and add the components to it
|
||||
pipe = Pipeline()
|
||||
pipe.add_component("search", web_search)
|
||||
pipe.add_component("fetcher", lcf)
|
||||
pipe.add_component("router", router)
|
||||
pipe.add_component("converter", html_converter)
|
||||
pipe.add_component("splitter", splitter)
|
||||
pipe.add_component("ranker", similarity_ranker)
|
||||
pipe.add_component("sampler", top_p_sampler)
|
||||
pipe.add_component("prompt_builder", prompt_builder)
|
||||
pipe.add_component("llm", llm)
|
||||
|
||||
# Arrange pipeline components in the order you need them. If a component has more than one inputs or outputs, indicate which input you want to connect to which output using the format ("component_name.output_name", "component_name, input_name").
|
||||
pipe.connect("search.links", "fetcher.urls")
|
||||
pipe.connect("fetcher.streams", "router.sources")
|
||||
pipe.connect("router.text/html", "converter.sources")
|
||||
pipe.connect("converter.documents", "splitter.documents")
|
||||
pipe.connect("splitter.documents", "ranker.documents")
|
||||
pipe.connect("ranker.documents", "sampler.documents")
|
||||
pipe.connect("sampler.documents", "prompt_builder.documents")
|
||||
pipe.connect("prompt_builder.prompt", "llm.messages")
|
||||
|
||||
# run the pipeline
|
||||
question = "Why are cats afraid of cucumbers?"
|
||||
query_dict = {"query": question}
|
||||
|
||||
result = pipe.run(data={"search": query_dict, "prompt_builder": query_dict, "ranker": query_dict})
|
||||
print(result)
|
||||
```
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user