diff --git a/haystack/__init__.py b/haystack/__init__.py index dd4e910b8..c62e89a20 100644 --- a/haystack/__init__.py +++ b/haystack/__init__.py @@ -1,6 +1,6 @@ from haystack.core.component import component from haystack.core.errors import ComponentError, DeserializationError -from haystack.core.pipeline import Pipeline +from haystack.core.pipeline import Pipeline, PredefinedPipeline from haystack.core.serialization import default_from_dict, default_to_dict from haystack.dataclasses import Answer, Document, ExtractedAnswer, GeneratedAnswer import haystack.logging @@ -16,6 +16,7 @@ __all__ = [ "DeserializationError", "ComponentError", "Pipeline", + "PredefinedPipeline", "Document", "Answer", "GeneratedAnswer", diff --git a/haystack/core/pipeline/__init__.py b/haystack/core/pipeline/__init__.py index 6020b1bd0..534c24854 100644 --- a/haystack/core/pipeline/__init__.py +++ b/haystack/core/pipeline/__init__.py @@ -1,6 +1,7 @@ # SPDX-FileCopyrightText: 2022-present deepset GmbH # # SPDX-License-Identifier: Apache-2.0 -from haystack.core.pipeline.pipeline import Pipeline +from .pipeline import Pipeline +from .template import PredefinedPipeline -__all__ = ["Pipeline"] +__all__ = ["Pipeline", "PredefinedPipeline"] diff --git a/haystack/core/pipeline/pipeline.py b/haystack/core/pipeline/pipeline.py index 5495afbc2..8201fc96c 100644 --- a/haystack/core/pipeline/pipeline.py +++ b/haystack/core/pipeline/pipeline.py @@ -20,6 +20,7 @@ from haystack.core.errors import ( PipelineMaxLoops, PipelineRuntimeError, PipelineValidationError, + PipelineUnmarshalError, ) from haystack.core.serialization import component_from_dict, component_to_dict from haystack.core.type_utils import _type_name, _types_are_compatible @@ -30,6 +31,7 @@ from haystack import tracing from .descriptions import find_pipeline_inputs, find_pipeline_outputs from .draw import _to_mermaid_image +from .template import PipelineTemplate, PredefinedPipeline DEFAULT_MARSHALLER = YamlMarshaller() logger = logging.getLogger(__name__) @@ -983,6 +985,30 @@ class Pipeline: return pipeline_input_data, unresolved_kwargs + @classmethod + def from_template( + cls, predefined_pipeline: PredefinedPipeline, template_params: Optional[Dict[str, Any]] = None + ) -> "Pipeline": + """ + Create a Pipeline from a predefined template. See `PredefinedPipeline` for available options. + + :param predefined_pipeline: The predefined pipeline to use. + :param template_params: An optional dictionary of parameters to use when rendering the pipeline template. + :returns: An instance of `Pipeline`. + """ + tpl = PipelineTemplate.from_predefined(predefined_pipeline) + # If tpl.render() fails, we let bubble up the original error + rendered = tpl.render(template_params) + + # If there was a problem with the rendered version of the + # template, we add it to the error stack for debugging + try: + return cls.loads(rendered) + except Exception as e: + msg = f"Error unmarshalling pipeline: {e}\n" + msg += f"Source:\n{rendered}" + raise PipelineUnmarshalError(msg) + def _connections_status( sender_node: str, receiver_node: str, sender_sockets: List[OutputSocket], receiver_sockets: List[InputSocket] diff --git a/haystack/templates/predefined/generative_qa.yaml.jinja2 b/haystack/core/pipeline/predefined/generative_qa.yaml.jinja2 similarity index 78% rename from haystack/templates/predefined/generative_qa.yaml.jinja2 rename to haystack/core/pipeline/predefined/generative_qa.yaml.jinja2 index 584792b1a..1382650e3 100644 --- a/haystack/templates/predefined/generative_qa.yaml.jinja2 +++ b/haystack/core/pipeline/predefined/generative_qa.yaml.jinja2 @@ -1,6 +1,6 @@ -{% extends "base.yaml.jinja2" %} +--- -{% block components %} +components: generator: init_parameters: api_key: @@ -12,13 +12,13 @@ prompt_builder: init_parameters: - template: {% raw %}"Answer the question {{question}}.\n\nAnswer:"{% endraw +%} + template: {% raw %}"Answer the question {{question}}.\n\nAnswer:\n"{% endraw %} type: "haystack.components.builders.prompt_builder.PromptBuilder" -{% endblock %} - -{% block connections %} +connections: - receiver: generator.prompt sender: prompt_builder.prompt -{% endblock %} + +metadata: + {} diff --git a/haystack/templates/predefined/indexing.yaml.jinja2 b/haystack/core/pipeline/predefined/indexing.yaml.jinja2 similarity index 96% rename from haystack/templates/predefined/indexing.yaml.jinja2 rename to haystack/core/pipeline/predefined/indexing.yaml.jinja2 index 5c4ae2548..822ffceef 100644 --- a/haystack/templates/predefined/indexing.yaml.jinja2 +++ b/haystack/core/pipeline/predefined/indexing.yaml.jinja2 @@ -1,6 +1,6 @@ -{% extends "base.yaml.jinja2" %} +--- -{% block components %} +components: cleaner: init_parameters: remove_empty_lines: true @@ -73,9 +73,8 @@ type: "haystack.document_stores.in_memory.document_store.InMemoryDocumentStore" policy: "FAIL" type: "haystack.components.writers.document_writer.DocumentWriter" -{% endblock %} -{% block connections %} +connections: - receiver: text_file_converter.sources sender: file_type_router.text/plain - receiver: doc_joiner.documents @@ -94,4 +93,6 @@ sender: splitter.documents - receiver: writer.documents sender: embedder.documents -{% endblock %} + +metadata: + {} diff --git a/haystack/templates/predefined/rag.yaml.jinja2 b/haystack/core/pipeline/predefined/rag.yaml.jinja2 similarity index 94% rename from haystack/templates/predefined/rag.yaml.jinja2 rename to haystack/core/pipeline/predefined/rag.yaml.jinja2 index 23789fddd..982f561f9 100644 --- a/haystack/templates/predefined/rag.yaml.jinja2 +++ b/haystack/core/pipeline/predefined/rag.yaml.jinja2 @@ -1,6 +1,6 @@ -{% extends "base.yaml.jinja2" %} +--- -{% block components %} +components: answer_builder: init_parameters: {} type: "haystack.components.builders.answer_builder.AnswerBuilder" @@ -47,9 +47,8 @@ template: | {% raw %}"\nGiven these documents, answer the question.\n\nDocuments:\n{% for doc in documents %}\n{{ doc.content }}\n {% endfor %}\n\nQuestion: {{question}}\n\nAnswer:\n"{% endraw %} type: "haystack.components.builders.prompt_builder.PromptBuilder" -{% endblock %} -{% block connections %} +connections: - receiver: retriever.query_embedding sender: text_embedder.embedding - receiver: prompt_builder.documents @@ -62,4 +61,6 @@ sender: generator.replies - receiver: answer_builder.meta sender: generator.meta -{% endblock %} + +metadata: + {} diff --git a/haystack/templates/pipeline.py b/haystack/core/pipeline/template.py similarity index 85% rename from haystack/templates/pipeline.py rename to haystack/core/pipeline/template.py index 2ecd0b4e2..72b08bdaa 100644 --- a/haystack/templates/pipeline.py +++ b/haystack/core/pipeline/template.py @@ -4,9 +4,6 @@ from typing import Dict, Any, Optional, Union from jinja2 import meta, TemplateSyntaxError, Environment, PackageLoader -from haystack import Pipeline -from haystack.core.errors import PipelineUnmarshalError - TEMPLATE_FILE_EXTENSION = ".yaml.jinja2" TEMPLATE_HOME_DIR = Path(__file__).resolve().parent / "predefined" @@ -17,9 +14,6 @@ class PredefinedPipeline(Enum): Enumeration of predefined pipeline templates that can be used to create a `PipelineTemplate`. """ - # When type is empty, the template source must be provided to the PipelineTemplate before calling build() - EMPTY = "empty" - # Maintain 1-to-1 mapping between the enum name and the template file name in templates directory GENERATIVE_QA = "generative_qa" RAG = "rag" @@ -73,7 +67,7 @@ class PipelineTemplate: :param template_content: The raw template source to use in the template. """ env = Environment( - loader=PackageLoader("haystack.templates", "predefined"), trim_blocks=True, lstrip_blocks=True + loader=PackageLoader("haystack.core.pipeline", "predefined"), trim_blocks=True, lstrip_blocks=True ) try: self._template = env.from_string(template_content) @@ -84,7 +78,7 @@ class PipelineTemplate: self.template_variables = meta.find_undeclared_variables(env.parse(template_content)) self._template_content = template_content - def build(self, template_params: Optional[Dict[str, Any]] = None) -> Pipeline: + def render(self, template_params: Optional[Dict[str, Any]] = None) -> str: """ Constructs a `Pipeline` instance based on the template. @@ -93,13 +87,7 @@ class PipelineTemplate: :return: An instance of `Pipeline` constructed from the rendered template and custom component configurations. """ template_params = template_params or {} - rendered = self._template.render(**template_params) - try: - return Pipeline.loads(rendered) - except Exception as e: - msg = f"Error unmarshalling pipeline: {e}\n" - msg += f"Source:\n{rendered}" - raise PipelineUnmarshalError(msg) + return self._template.render(**template_params) @classmethod def from_file(cls, file_path: Union[Path, str]) -> "PipelineTemplate": @@ -118,10 +106,6 @@ class PipelineTemplate: :param predefined_pipeline: The predefined pipeline to use. :return: An instance of `PipelineTemplate `. """ - if predefined_pipeline == PredefinedPipeline.EMPTY: - # This is temporary, to ease the refactoring - raise ValueError("Please provide a PipelineType value") - template_path = f"{TEMPLATE_HOME_DIR}/{predefined_pipeline.value}{TEMPLATE_FILE_EXTENSION}" return cls.from_file(template_path) diff --git a/haystack/templates/__init__.py b/haystack/templates/__init__.py deleted file mode 100644 index 6a0c611f4..000000000 --- a/haystack/templates/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from haystack.templates.pipeline import PipelineTemplate, PredefinedPipeline - -__all__ = ["PipelineTemplate", "PredefinedPipeline"] diff --git a/haystack/templates/predefined/base.yaml.jinja2 b/haystack/templates/predefined/base.yaml.jinja2 deleted file mode 100644 index 8316ef8d9..000000000 --- a/haystack/templates/predefined/base.yaml.jinja2 +++ /dev/null @@ -1,17 +0,0 @@ ---- - -components: -{% block components %} - {} -{% endblock %} - -connections: -{% block connections %} - [] -{% endblock %} - - -metadata: -{% block metadata %} - {} -{% endblock %} diff --git a/releasenotes/notes/add-pipeline-templates-831f857c6387f8c3.yaml b/releasenotes/notes/add-pipeline-templates-831f857c6387f8c3.yaml index c805f907a..f204c57a8 100644 --- a/releasenotes/notes/add-pipeline-templates-831f857c6387f8c3.yaml +++ b/releasenotes/notes/add-pipeline-templates-831f857c6387f8c3.yaml @@ -1,35 +1,23 @@ --- highlights: > - Introducing a flexible and dynamic approach to creating NLP pipelines with Haystack's new PipelineTemplate class! - This innovative feature utilizes Jinja templated YAML files, allowing users to effortlessly construct and customize - complex data processing pipelines for various NLP tasks. From question answering and document indexing to custom - pipeline requirements, the PipelineTemplate simplifies configuration and enhances adaptability. Users can now easily - override default components or integrate custom settings with simple, straightforward code. + Introducing a flexible and dynamic approach to creating NLP pipelines with predefined templates. + This innovative feature allows users to easily construct and customize complex data processing pipelines for various + NLP tasks. For example, the following pipeline template can be used to create an indexing pipeline: ```python - from haystack.components.embedders import SentenceTransformersDocumentEmbedder - from haystack.templates import PipelineTemplate, PredefinedPipeline + from haystack import Pipeline, PredefinedPipeline - pt = PipelineTemplate(PredefinedPipeline.INDEXING) - pipe = pt.build(template_params={"use_pdf_file_converter": True}) - result = pipe.run(data={"sources": ["some_local_dir/and_text_file.txt", "some_other_local_dir/and_pdf_file.pdf"]}) - print(result) + pipe = Pipeline.from_template(PredefinedPipeline.INDEXING) + result = pipe.run(data={"sources": ["some_local_dir/and_text_file.txt"]}) ``` - In the above example, a PredefinedPipeline.INDEXING enum is used to create a pipeline with the PDF file converter - enabled. The pipeline is then run on a list of local files and the result is printed (number of indexed documents). + The above example creates a PredefinedPipeline.INDEXING pipeline ready to be used. We can use the same template + to create a slightly different indexing pipeline, adding a PDF to text converter: - We could have of course used the same PipelineTemplate class to create any other pre-defined pipeline or even a - custom pipeline with custom components and settings. - - On the other hand, the following pipeline template can be used to create a pre-defined RAG pipeline: ```python - from haystack.templates import PipelineTemplate, PredefinedPipeline + from haystack import Pipeline, PredefinedPipeline - pipe = PipelineTemplate(PredefinedPipeline.RAG).build() + pipe = Pipeline.from_template(PredefinedPipeline.INDEXING, template_params={"use_pdf_file_converter": True}) result = pipe.run(query="What's the meaning of life?") - print(result) ``` - - PipelineTemplate can load templates from various inputs, including strings, files, and predefined templates. diff --git a/test/core/pipeline/test_pipeline.py b/test/core/pipeline/test_pipeline.py index d3690855a..03a159df6 100644 --- a/test/core/pipeline/test_pipeline.py +++ b/test/core/pipeline/test_pipeline.py @@ -10,7 +10,7 @@ import pytest from haystack.core.component import component from haystack.core.component.types import InputSocket, OutputSocket from haystack.core.errors import PipelineDrawingError, PipelineError, PipelineMaxLoops, PipelineRuntimeError -from haystack.core.pipeline import Pipeline +from haystack.core.pipeline import Pipeline, PredefinedPipeline from haystack.testing.factory import component_class from haystack.testing.sample_components import AddFixedValue, Double @@ -654,3 +654,14 @@ def test_describe_no_outputs(): p.connect("a.x", "c.x") p.connect("b.y", "c.y") assert p.outputs() == {} + + +def test_from_template(): + pipe = Pipeline.from_template(PredefinedPipeline.INDEXING) + assert pipe.get_component("cleaner") + with pytest.raises(ValueError): + pipe.get_component("pdf_file_converter") + + pipe = Pipeline.from_template(PredefinedPipeline.INDEXING, template_params={"use_pdf_file_converter": True}) + assert pipe.get_component("cleaner") + assert pipe.get_component("pdf_file_converter") diff --git a/test/templates/test_templates.py b/test/core/pipeline/test_templates.py similarity index 88% rename from test/templates/test_templates.py rename to test/core/pipeline/test_templates.py index e8d38f9fa..46a2149f9 100644 --- a/test/templates/test_templates.py +++ b/test/core/pipeline/test_templates.py @@ -3,7 +3,7 @@ import tempfile import pytest from haystack import Pipeline -from haystack.templates.pipeline import PipelineTemplate, PredefinedPipeline +from haystack.core.pipeline.template import PipelineTemplate, PredefinedPipeline @pytest.fixture @@ -45,8 +45,8 @@ class TestPipelineTemplate: # Building a pipeline directly using all default components specified in a predefined or custom template. def test_build_pipeline_with_default_components(self): - pipeline = PipelineTemplate.from_predefined(PredefinedPipeline.INDEXING).build() - assert isinstance(pipeline, Pipeline) + rendered = PipelineTemplate.from_predefined(PredefinedPipeline.INDEXING).render() + pipeline = Pipeline.loads(rendered) # pipeline has components assert pipeline.get_component("cleaner")