mirror of
https://github.com/deepset-ai/haystack.git
synced 2026-01-05 11:38:20 +00:00
feat: Add from_template class method to Pipeline (#7240)
* move templating code under the core package * make from_predefined part of the Pipeline API * add tests * amend release notes * import under haystack package * Apply suggestions from code review Co-authored-by: David S. Batista <dsbatista@gmail.com> * from_predefined -> from_template * remove template inheritance for more readability --------- Co-authored-by: David S. Batista <dsbatista@gmail.com>
This commit is contained in:
parent
4766efbf19
commit
e7809b6fea
@ -1,6 +1,6 @@
|
||||
from haystack.core.component import component
|
||||
from haystack.core.errors import ComponentError, DeserializationError
|
||||
from haystack.core.pipeline import Pipeline
|
||||
from haystack.core.pipeline import Pipeline, PredefinedPipeline
|
||||
from haystack.core.serialization import default_from_dict, default_to_dict
|
||||
from haystack.dataclasses import Answer, Document, ExtractedAnswer, GeneratedAnswer
|
||||
import haystack.logging
|
||||
@ -16,6 +16,7 @@ __all__ = [
|
||||
"DeserializationError",
|
||||
"ComponentError",
|
||||
"Pipeline",
|
||||
"PredefinedPipeline",
|
||||
"Document",
|
||||
"Answer",
|
||||
"GeneratedAnswer",
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
from haystack.core.pipeline.pipeline import Pipeline
|
||||
from .pipeline import Pipeline
|
||||
from .template import PredefinedPipeline
|
||||
|
||||
__all__ = ["Pipeline"]
|
||||
__all__ = ["Pipeline", "PredefinedPipeline"]
|
||||
|
||||
@ -20,6 +20,7 @@ from haystack.core.errors import (
|
||||
PipelineMaxLoops,
|
||||
PipelineRuntimeError,
|
||||
PipelineValidationError,
|
||||
PipelineUnmarshalError,
|
||||
)
|
||||
from haystack.core.serialization import component_from_dict, component_to_dict
|
||||
from haystack.core.type_utils import _type_name, _types_are_compatible
|
||||
@ -30,6 +31,7 @@ from haystack import tracing
|
||||
|
||||
from .descriptions import find_pipeline_inputs, find_pipeline_outputs
|
||||
from .draw import _to_mermaid_image
|
||||
from .template import PipelineTemplate, PredefinedPipeline
|
||||
|
||||
DEFAULT_MARSHALLER = YamlMarshaller()
|
||||
logger = logging.getLogger(__name__)
|
||||
@ -983,6 +985,30 @@ class Pipeline:
|
||||
|
||||
return pipeline_input_data, unresolved_kwargs
|
||||
|
||||
@classmethod
|
||||
def from_template(
|
||||
cls, predefined_pipeline: PredefinedPipeline, template_params: Optional[Dict[str, Any]] = None
|
||||
) -> "Pipeline":
|
||||
"""
|
||||
Create a Pipeline from a predefined template. See `PredefinedPipeline` for available options.
|
||||
|
||||
:param predefined_pipeline: The predefined pipeline to use.
|
||||
:param template_params: An optional dictionary of parameters to use when rendering the pipeline template.
|
||||
:returns: An instance of `Pipeline`.
|
||||
"""
|
||||
tpl = PipelineTemplate.from_predefined(predefined_pipeline)
|
||||
# If tpl.render() fails, we let bubble up the original error
|
||||
rendered = tpl.render(template_params)
|
||||
|
||||
# If there was a problem with the rendered version of the
|
||||
# template, we add it to the error stack for debugging
|
||||
try:
|
||||
return cls.loads(rendered)
|
||||
except Exception as e:
|
||||
msg = f"Error unmarshalling pipeline: {e}\n"
|
||||
msg += f"Source:\n{rendered}"
|
||||
raise PipelineUnmarshalError(msg)
|
||||
|
||||
|
||||
def _connections_status(
|
||||
sender_node: str, receiver_node: str, sender_sockets: List[OutputSocket], receiver_sockets: List[InputSocket]
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
{% extends "base.yaml.jinja2" %}
|
||||
---
|
||||
|
||||
{% block components %}
|
||||
components:
|
||||
generator:
|
||||
init_parameters:
|
||||
api_key:
|
||||
@ -12,13 +12,13 @@
|
||||
|
||||
prompt_builder:
|
||||
init_parameters:
|
||||
template: {% raw %}"Answer the question {{question}}.\n\nAnswer:"{% endraw +%}
|
||||
template: {% raw %}"Answer the question {{question}}.\n\nAnswer:\n"{% endraw %}
|
||||
type: "haystack.components.builders.prompt_builder.PromptBuilder"
|
||||
|
||||
{% endblock %}
|
||||
|
||||
|
||||
{% block connections %}
|
||||
connections:
|
||||
- receiver: generator.prompt
|
||||
sender: prompt_builder.prompt
|
||||
{% endblock %}
|
||||
|
||||
metadata:
|
||||
{}
|
||||
@ -1,6 +1,6 @@
|
||||
{% extends "base.yaml.jinja2" %}
|
||||
---
|
||||
|
||||
{% block components %}
|
||||
components:
|
||||
cleaner:
|
||||
init_parameters:
|
||||
remove_empty_lines: true
|
||||
@ -73,9 +73,8 @@
|
||||
type: "haystack.document_stores.in_memory.document_store.InMemoryDocumentStore"
|
||||
policy: "FAIL"
|
||||
type: "haystack.components.writers.document_writer.DocumentWriter"
|
||||
{% endblock %}
|
||||
|
||||
{% block connections %}
|
||||
connections:
|
||||
- receiver: text_file_converter.sources
|
||||
sender: file_type_router.text/plain
|
||||
- receiver: doc_joiner.documents
|
||||
@ -94,4 +93,6 @@
|
||||
sender: splitter.documents
|
||||
- receiver: writer.documents
|
||||
sender: embedder.documents
|
||||
{% endblock %}
|
||||
|
||||
metadata:
|
||||
{}
|
||||
@ -1,6 +1,6 @@
|
||||
{% extends "base.yaml.jinja2" %}
|
||||
---
|
||||
|
||||
{% block components %}
|
||||
components:
|
||||
answer_builder:
|
||||
init_parameters: {}
|
||||
type: "haystack.components.builders.answer_builder.AnswerBuilder"
|
||||
@ -47,9 +47,8 @@
|
||||
template: |
|
||||
{% raw %}"\nGiven these documents, answer the question.\n\nDocuments:\n{% for doc in documents %}\n{{ doc.content }}\n {% endfor %}\n\nQuestion: {{question}}\n\nAnswer:\n"{% endraw %}
|
||||
type: "haystack.components.builders.prompt_builder.PromptBuilder"
|
||||
{% endblock %}
|
||||
|
||||
{% block connections %}
|
||||
connections:
|
||||
- receiver: retriever.query_embedding
|
||||
sender: text_embedder.embedding
|
||||
- receiver: prompt_builder.documents
|
||||
@ -62,4 +61,6 @@
|
||||
sender: generator.replies
|
||||
- receiver: answer_builder.meta
|
||||
sender: generator.meta
|
||||
{% endblock %}
|
||||
|
||||
metadata:
|
||||
{}
|
||||
@ -4,9 +4,6 @@ from typing import Dict, Any, Optional, Union
|
||||
|
||||
from jinja2 import meta, TemplateSyntaxError, Environment, PackageLoader
|
||||
|
||||
from haystack import Pipeline
|
||||
from haystack.core.errors import PipelineUnmarshalError
|
||||
|
||||
|
||||
TEMPLATE_FILE_EXTENSION = ".yaml.jinja2"
|
||||
TEMPLATE_HOME_DIR = Path(__file__).resolve().parent / "predefined"
|
||||
@ -17,9 +14,6 @@ class PredefinedPipeline(Enum):
|
||||
Enumeration of predefined pipeline templates that can be used to create a `PipelineTemplate`.
|
||||
"""
|
||||
|
||||
# When type is empty, the template source must be provided to the PipelineTemplate before calling build()
|
||||
EMPTY = "empty"
|
||||
|
||||
# Maintain 1-to-1 mapping between the enum name and the template file name in templates directory
|
||||
GENERATIVE_QA = "generative_qa"
|
||||
RAG = "rag"
|
||||
@ -73,7 +67,7 @@ class PipelineTemplate:
|
||||
:param template_content: The raw template source to use in the template.
|
||||
"""
|
||||
env = Environment(
|
||||
loader=PackageLoader("haystack.templates", "predefined"), trim_blocks=True, lstrip_blocks=True
|
||||
loader=PackageLoader("haystack.core.pipeline", "predefined"), trim_blocks=True, lstrip_blocks=True
|
||||
)
|
||||
try:
|
||||
self._template = env.from_string(template_content)
|
||||
@ -84,7 +78,7 @@ class PipelineTemplate:
|
||||
self.template_variables = meta.find_undeclared_variables(env.parse(template_content))
|
||||
self._template_content = template_content
|
||||
|
||||
def build(self, template_params: Optional[Dict[str, Any]] = None) -> Pipeline:
|
||||
def render(self, template_params: Optional[Dict[str, Any]] = None) -> str:
|
||||
"""
|
||||
Constructs a `Pipeline` instance based on the template.
|
||||
|
||||
@ -93,13 +87,7 @@ class PipelineTemplate:
|
||||
:return: An instance of `Pipeline` constructed from the rendered template and custom component configurations.
|
||||
"""
|
||||
template_params = template_params or {}
|
||||
rendered = self._template.render(**template_params)
|
||||
try:
|
||||
return Pipeline.loads(rendered)
|
||||
except Exception as e:
|
||||
msg = f"Error unmarshalling pipeline: {e}\n"
|
||||
msg += f"Source:\n{rendered}"
|
||||
raise PipelineUnmarshalError(msg)
|
||||
return self._template.render(**template_params)
|
||||
|
||||
@classmethod
|
||||
def from_file(cls, file_path: Union[Path, str]) -> "PipelineTemplate":
|
||||
@ -118,10 +106,6 @@ class PipelineTemplate:
|
||||
:param predefined_pipeline: The predefined pipeline to use.
|
||||
:return: An instance of `PipelineTemplate `.
|
||||
"""
|
||||
if predefined_pipeline == PredefinedPipeline.EMPTY:
|
||||
# This is temporary, to ease the refactoring
|
||||
raise ValueError("Please provide a PipelineType value")
|
||||
|
||||
template_path = f"{TEMPLATE_HOME_DIR}/{predefined_pipeline.value}{TEMPLATE_FILE_EXTENSION}"
|
||||
return cls.from_file(template_path)
|
||||
|
||||
@ -1,3 +0,0 @@
|
||||
from haystack.templates.pipeline import PipelineTemplate, PredefinedPipeline
|
||||
|
||||
__all__ = ["PipelineTemplate", "PredefinedPipeline"]
|
||||
@ -1,17 +0,0 @@
|
||||
---
|
||||
|
||||
components:
|
||||
{% block components %}
|
||||
{}
|
||||
{% endblock %}
|
||||
|
||||
connections:
|
||||
{% block connections %}
|
||||
[]
|
||||
{% endblock %}
|
||||
|
||||
|
||||
metadata:
|
||||
{% block metadata %}
|
||||
{}
|
||||
{% endblock %}
|
||||
@ -1,35 +1,23 @@
|
||||
---
|
||||
highlights: >
|
||||
Introducing a flexible and dynamic approach to creating NLP pipelines with Haystack's new PipelineTemplate class!
|
||||
This innovative feature utilizes Jinja templated YAML files, allowing users to effortlessly construct and customize
|
||||
complex data processing pipelines for various NLP tasks. From question answering and document indexing to custom
|
||||
pipeline requirements, the PipelineTemplate simplifies configuration and enhances adaptability. Users can now easily
|
||||
override default components or integrate custom settings with simple, straightforward code.
|
||||
Introducing a flexible and dynamic approach to creating NLP pipelines with predefined templates.
|
||||
This innovative feature allows users to easily construct and customize complex data processing pipelines for various
|
||||
NLP tasks.
|
||||
|
||||
For example, the following pipeline template can be used to create an indexing pipeline:
|
||||
```python
|
||||
from haystack.components.embedders import SentenceTransformersDocumentEmbedder
|
||||
from haystack.templates import PipelineTemplate, PredefinedPipeline
|
||||
from haystack import Pipeline, PredefinedPipeline
|
||||
|
||||
pt = PipelineTemplate(PredefinedPipeline.INDEXING)
|
||||
pipe = pt.build(template_params={"use_pdf_file_converter": True})
|
||||
result = pipe.run(data={"sources": ["some_local_dir/and_text_file.txt", "some_other_local_dir/and_pdf_file.pdf"]})
|
||||
print(result)
|
||||
pipe = Pipeline.from_template(PredefinedPipeline.INDEXING)
|
||||
result = pipe.run(data={"sources": ["some_local_dir/and_text_file.txt"]})
|
||||
```
|
||||
|
||||
In the above example, a PredefinedPipeline.INDEXING enum is used to create a pipeline with the PDF file converter
|
||||
enabled. The pipeline is then run on a list of local files and the result is printed (number of indexed documents).
|
||||
The above example creates a PredefinedPipeline.INDEXING pipeline ready to be used. We can use the same template
|
||||
to create a slightly different indexing pipeline, adding a PDF to text converter:
|
||||
|
||||
We could have of course used the same PipelineTemplate class to create any other pre-defined pipeline or even a
|
||||
custom pipeline with custom components and settings.
|
||||
|
||||
On the other hand, the following pipeline template can be used to create a pre-defined RAG pipeline:
|
||||
```python
|
||||
from haystack.templates import PipelineTemplate, PredefinedPipeline
|
||||
from haystack import Pipeline, PredefinedPipeline
|
||||
|
||||
pipe = PipelineTemplate(PredefinedPipeline.RAG).build()
|
||||
pipe = Pipeline.from_template(PredefinedPipeline.INDEXING, template_params={"use_pdf_file_converter": True})
|
||||
result = pipe.run(query="What's the meaning of life?")
|
||||
print(result)
|
||||
```
|
||||
|
||||
PipelineTemplate can load templates from various inputs, including strings, files, and predefined templates.
|
||||
|
||||
@ -10,7 +10,7 @@ import pytest
|
||||
from haystack.core.component import component
|
||||
from haystack.core.component.types import InputSocket, OutputSocket
|
||||
from haystack.core.errors import PipelineDrawingError, PipelineError, PipelineMaxLoops, PipelineRuntimeError
|
||||
from haystack.core.pipeline import Pipeline
|
||||
from haystack.core.pipeline import Pipeline, PredefinedPipeline
|
||||
from haystack.testing.factory import component_class
|
||||
from haystack.testing.sample_components import AddFixedValue, Double
|
||||
|
||||
@ -654,3 +654,14 @@ def test_describe_no_outputs():
|
||||
p.connect("a.x", "c.x")
|
||||
p.connect("b.y", "c.y")
|
||||
assert p.outputs() == {}
|
||||
|
||||
|
||||
def test_from_template():
|
||||
pipe = Pipeline.from_template(PredefinedPipeline.INDEXING)
|
||||
assert pipe.get_component("cleaner")
|
||||
with pytest.raises(ValueError):
|
||||
pipe.get_component("pdf_file_converter")
|
||||
|
||||
pipe = Pipeline.from_template(PredefinedPipeline.INDEXING, template_params={"use_pdf_file_converter": True})
|
||||
assert pipe.get_component("cleaner")
|
||||
assert pipe.get_component("pdf_file_converter")
|
||||
|
||||
@ -3,7 +3,7 @@ import tempfile
|
||||
import pytest
|
||||
|
||||
from haystack import Pipeline
|
||||
from haystack.templates.pipeline import PipelineTemplate, PredefinedPipeline
|
||||
from haystack.core.pipeline.template import PipelineTemplate, PredefinedPipeline
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
@ -45,8 +45,8 @@ class TestPipelineTemplate:
|
||||
|
||||
# Building a pipeline directly using all default components specified in a predefined or custom template.
|
||||
def test_build_pipeline_with_default_components(self):
|
||||
pipeline = PipelineTemplate.from_predefined(PredefinedPipeline.INDEXING).build()
|
||||
assert isinstance(pipeline, Pipeline)
|
||||
rendered = PipelineTemplate.from_predefined(PredefinedPipeline.INDEXING).render()
|
||||
pipeline = Pipeline.loads(rendered)
|
||||
|
||||
# pipeline has components
|
||||
assert pipeline.get_component("cleaner")
|
||||
Loading…
x
Reference in New Issue
Block a user