haystack/haystack/core/pipeline/base.py

# pylint: disable=too-many-lines
# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
#
# SPDX-License-Identifier: Apache-2.0

import itertools
from collections import defaultdict
from datetime import datetime
from enum import IntEnum
from pathlib import Path
from typing import (
    Any,
    ContextManager,
    Dict,
    Iterator,
    List,
    Mapping,
    Optional,
    Set,
    TextIO,
    Tuple,
    Type,
    TypeVar,
    Union,
)

import networkx  # type:ignore

from haystack import logging, tracing
from haystack.core.component import Component, InputSocket, OutputSocket, component
from haystack.core.errors import (
    DeserializationError,
    PipelineComponentsBlockedError,
    PipelineConnectError,
    PipelineDrawingError,
    PipelineError,
    PipelineMaxComponentRuns,
    PipelineUnmarshalError,
    PipelineValidationError,
)
from haystack.core.pipeline.component_checks import (
    _NO_OUTPUT_PRODUCED,
    all_predecessors_executed,
    are_all_lazy_variadic_sockets_resolved,
    are_all_sockets_ready,
    can_component_run,
    is_any_greedy_socket_ready,
    is_socket_lazy_variadic,
)
from haystack.core.pipeline.utils import FIFOPriorityQueue, _deepcopy_with_exceptions, parse_connect_string
from haystack.core.serialization import DeserializationCallbacks, component_from_dict, component_to_dict
from haystack.core.type_utils import _type_name, _types_are_compatible
from haystack.marshal import Marshaller, YamlMarshaller
from haystack.utils import is_in_jupyter, type_serialization

from .descriptions import find_pipeline_inputs, find_pipeline_outputs
from .draw import _to_mermaid_image
from .template import PipelineTemplate, PredefinedPipeline

DEFAULT_MARSHALLER = YamlMarshaller()

# We use a generic type to annotate the return value of class methods,
# so that static analyzers won't be confused when derived classes
# use those methods.
T = TypeVar("T", bound="PipelineBase")

logger = logging.getLogger(__name__)


# Constants for tracing tags
_COMPONENT_INPUT = "haystack.component.input"
_COMPONENT_OUTPUT = "haystack.component.output"
_COMPONENT_VISITS = "haystack.component.visits"


class ComponentPriority(IntEnum):
    HIGHEST = 1
    READY = 2
    DEFER = 3
    DEFER_LAST = 4
    BLOCKED = 5


class PipelineBase:  # noqa: PLW1641
    """
    Components orchestration engine.

    Builds a graph of components and orchestrates their execution according to the execution graph.
    """

    def __init__(
        self,
        metadata: Optional[Dict[str, Any]] = None,
        max_runs_per_component: int = 100,
        connection_type_validation: bool = True,
    ):
        """
        Creates the Pipeline.

        :param metadata:
            Arbitrary dictionary to store metadata about this `Pipeline`. Make sure all the values contained in
            this dictionary can be serialized and deserialized if you wish to save this `Pipeline` to file.
        :param max_runs_per_component:
            How many times the `Pipeline` can run the same Component.
            If this limit is reached a `PipelineMaxComponentRuns` exception is raised.
            If not set defaults to 100 runs per Component.
        :param connection_type_validation: Whether the pipeline will validate the types of the connections.
            Defaults to True.
        """
        self._telemetry_runs = 0
        self._last_telemetry_sent: Optional[datetime] = None
        self.metadata = metadata or {}
        self.graph = networkx.MultiDiGraph()
        self._max_runs_per_component = max_runs_per_component
        self._connection_type_validation = connection_type_validation

    def __eq__(self, other: object) -> bool:
        """
        Pipeline equality is defined by their type and the equality of their serialized form.

        Pipelines of the same type share every metadata, node and edge, but they're not required to use
        the same node instances: this allows pipeline saved and then loaded back to be equal to themselves.
        """
        if not isinstance(self, type(other)):
            return False
        assert isinstance(other, PipelineBase)
        return self.to_dict() == other.to_dict()

    def __repr__(self) -> str:
        """
        Returns a text representation of the Pipeline.
        """
        res = f"{object.__repr__(self)}\n"
        if self.metadata:
            res += "🧱 Metadata\n"
            for k, v in self.metadata.items():
                res += f"  - {k}: {v}\n"

        res += "🚅 Components\n"
        for name, instance in self.graph.nodes(data="instance"):  # type: ignore # type wrongly defined in networkx
            res += f"  - {name}: {instance.__class__.__name__}\n"

        res += "🛤️ Connections\n"
        for sender, receiver, edge_data in self.graph.edges(data=True):
            sender_socket = edge_data["from_socket"].name
            receiver_socket = edge_data["to_socket"].name
            res += f"  - {sender}.{sender_socket} -> {receiver}.{receiver_socket} ({edge_data['conn_type']})\n"

        return res

    def to_dict(self) -> Dict[str, Any]:
        """
        Serializes the pipeline to a dictionary.

        This is meant to be an intermediate representation but it can be also used to save a pipeline to file.

        :returns:
            Dictionary with serialized data.
        """
        components = {}
        for name, instance in self.graph.nodes(data="instance"):  # type:ignore
            components[name] = component_to_dict(instance, name)

        connections = []
        for sender, receiver, edge_data in self.graph.edges.data():
            sender_socket = edge_data["from_socket"].name
            receiver_socket = edge_data["to_socket"].name
            connections.append({"sender": f"{sender}.{sender_socket}", "receiver": f"{receiver}.{receiver_socket}"})
        return {
            "metadata": self.metadata,
            "max_runs_per_component": self._max_runs_per_component,
            "components": components,
            "connections": connections,
            "connection_type_validation": self._connection_type_validation,
        }

    @classmethod
    def from_dict(
        cls: Type[T], data: Dict[str, Any], callbacks: Optional[DeserializationCallbacks] = None, **kwargs: Any
    ) -> T:
        """
        Deserializes the pipeline from a dictionary.

        :param data:
            Dictionary to deserialize from.
        :param callbacks:
            Callbacks to invoke during deserialization.
        :param kwargs:
            `components`: a dictionary of `{name: instance}` to reuse instances of components instead of creating new
            ones.
        :returns:
            Deserialized component.
        """
        data_copy = _deepcopy_with_exceptions(data)  # to prevent modification of original data
        metadata = data_copy.get("metadata", {})
        max_runs_per_component = data_copy.get("max_runs_per_component", 100)
        connection_type_validation = data_copy.get("connection_type_validation", True)
        pipe = cls(
            metadata=metadata,
            max_runs_per_component=max_runs_per_component,
            connection_type_validation=connection_type_validation,
        )
        components_to_reuse = kwargs.get("components", {})
        for name, component_data in data_copy.get("components", {}).items():
            if name in components_to_reuse:
                # Reuse an instance
                instance = components_to_reuse[name]
            else:
                if "type" not in component_data:
                    raise PipelineError(f"Missing 'type' in component '{name}'")

                if component_data["type"] not in component.registry:
                    try:
                        # Import the module first...
                        module, _ = component_data["type"].rsplit(".", 1)
                        logger.debug("Trying to import module {module_name}", module_name=module)
                        type_serialization.thread_safe_import(module)
                        # ...then try again
                        if component_data["type"] not in component.registry:
                            raise PipelineError(
                                f"Successfully imported module '{module}' but couldn't find "
                                f"'{component_data['type']}' in the component registry.\n"
                                f"The component might be registered under a different path. "
                                f"Here are the registered components:\n {list(component.registry.keys())}\n"
                            )
                    except (ImportError, PipelineError, ValueError) as e:
                        raise PipelineError(
                            f"Component '{component_data['type']}' (name: '{name}') not imported. Please "
                            f"check that the package is installed and the component path is correct."
                        ) from e

                # Create a new one
                component_class = component.registry[component_data["type"]]

                try:
                    instance = component_from_dict(component_class, component_data, name, callbacks)
                except Exception as e:
                    msg = (
                        f"Couldn't deserialize component '{name}' of class '{component_class.__name__}' "
                        f"with the following data: {str(component_data)}. Possible reasons include "
                        "malformed serialized data, mismatch between the serialized component and the "
                        "loaded one (due to a breaking change, see "
                        "https://github.com/deepset-ai/haystack/releases), etc."
                    )
                    raise DeserializationError(msg) from e
            pipe.add_component(name=name, instance=instance)

        for connection in data.get("connections", []):
            if "sender" not in connection:
                raise PipelineError(f"Missing sender in connection: {connection}")
            if "receiver" not in connection:
                raise PipelineError(f"Missing receiver in connection: {connection}")
            pipe.connect(sender=connection["sender"], receiver=connection["receiver"])

        return pipe

    def dumps(self, marshaller: Marshaller = DEFAULT_MARSHALLER) -> str:
        """
        Returns the string representation of this pipeline according to the format dictated by the `Marshaller` in use.

        :param marshaller:
            The Marshaller used to create the string representation. Defaults to `YamlMarshaller`.
        :returns:
            A string representing the pipeline.
        """
        return marshaller.marshal(self.to_dict())

    def dump(self, fp: TextIO, marshaller: Marshaller = DEFAULT_MARSHALLER) -> None:
        """
        Writes the string representation of this pipeline to the file-like object passed in the `fp` argument.

        :param fp:
            A file-like object ready to be written to.
        :param marshaller:
            The Marshaller used to create the string representation. Defaults to `YamlMarshaller`.
        """
        fp.write(marshaller.marshal(self.to_dict()))

    @classmethod
    def loads(
        cls: Type[T],
        data: Union[str, bytes, bytearray],
        marshaller: Marshaller = DEFAULT_MARSHALLER,
        callbacks: Optional[DeserializationCallbacks] = None,
    ) -> T:
        """
        Creates a `Pipeline` object from the string representation passed in the `data` argument.

        :param data:
            The string representation of the pipeline, can be `str`, `bytes` or `bytearray`.
        :param marshaller:
            The Marshaller used to create the string representation. Defaults to `YamlMarshaller`.
        :param callbacks:
            Callbacks to invoke during deserialization.
        :raises DeserializationError:
            If an error occurs during deserialization.
        :returns:
            A `Pipeline` object.
        """
        try:
            deserialized_data = marshaller.unmarshal(data)
        except Exception as e:
            raise DeserializationError(
                "Error while unmarshalling serialized pipeline data. This is usually "
                "caused by malformed or invalid syntax in the serialized representation."
            ) from e

        return cls.from_dict(deserialized_data, callbacks)

    @classmethod
    def load(
        cls: Type[T],
        fp: TextIO,
        marshaller: Marshaller = DEFAULT_MARSHALLER,
        callbacks: Optional[DeserializationCallbacks] = None,
    ) -> T:
        """
        Creates a `Pipeline` object a string representation.

        The string representation is read from the file-like object passed in the `fp` argument.


        :param fp:
            A file-like object ready to be read from.
        :param marshaller:
            The Marshaller used to create the string representation. Defaults to `YamlMarshaller`.
        :param callbacks:
            Callbacks to invoke during deserialization.
        :raises DeserializationError:
            If an error occurs during deserialization.
        :returns:
            A `Pipeline` object.
        """
        return cls.loads(fp.read(), marshaller, callbacks)

    def add_component(self, name: str, instance: Component) -> None:
        """
        Add the given component to the pipeline.

        Components are not connected to anything by default: use `Pipeline.connect()` to connect components together.
        Component names must be unique, but component instances can be reused if needed.

        :param name:
            The name of the component to add.
        :param instance:
            The component instance to add.

        :raises ValueError:
            If a component with the same name already exists.
        :raises PipelineValidationError:
            If the given instance is not a component.
        """
        # Component names are unique
        if name in self.graph.nodes:
            raise ValueError(f"A component named '{name}' already exists in this pipeline: choose another name.")

        # Components can't be named `_debug`
        if name == "_debug":
            raise ValueError("'_debug' is a reserved name for debug output. Choose another name.")

        # Component names can't have "."
        if "." in name:
            raise ValueError(f"{name} is an invalid component name, cannot contain '.' (dot) characters.")

        # Component instances must be components
        if not isinstance(instance, Component):
            raise PipelineValidationError(
                f"'{type(instance)}' doesn't seem to be a component. Is this class decorated with @component?"
            )

        if getattr(instance, "__haystack_added_to_pipeline__", None):
            msg = (
                "Component has already been added in another Pipeline. Components can't be shared between Pipelines. "
                "Create a new instance instead."
            )
            raise PipelineError(msg)

        setattr(instance, "__haystack_added_to_pipeline__", self)
        setattr(instance, "__component_name__", name)

        # Add component to the graph, disconnected
        logger.debug("Adding component '{component_name}' ({component})", component_name=name, component=instance)
        # We're completely sure the fields exist so we ignore the type error
        self.graph.add_node(
            name,
            instance=instance,
            input_sockets=instance.__haystack_input__._sockets_dict,  # type: ignore[attr-defined]
            output_sockets=instance.__haystack_output__._sockets_dict,  # type: ignore[attr-defined]
            visits=0,
        )

    def remove_component(self, name: str) -> Component:
        """
        Remove and returns component from the pipeline.

        Remove an existing component from the pipeline by providing its name.
        All edges that connect to the component will also be deleted.

        :param name:
            The name of the component to remove.
        :returns:
            The removed Component instance.

        :raises ValueError:
            If there is no component with that name already in the Pipeline.
        """

        # Check that a component with that name is in the Pipeline
        try:
            instance = self.get_component(name)
        except ValueError as exc:
            raise ValueError(
                f"There is no component named '{name}' in the pipeline. The valid component names are: ",
                ", ".join(n for n in self.graph.nodes),
            ) from exc

        # Delete component from the graph, deleting all its connections
        self.graph.remove_node(name)

        # Reset the Component sockets' senders and receivers
        input_sockets = instance.__haystack_input__._sockets_dict  # type: ignore[attr-defined]
        for socket in input_sockets.values():
            socket.senders = []

        output_sockets = instance.__haystack_output__._sockets_dict  # type: ignore[attr-defined]
        for socket in output_sockets.values():
            socket.receivers = []

        # Reset the Component's pipeline reference
        setattr(instance, "__haystack_added_to_pipeline__", None)

        return instance

    def connect(self, sender: str, receiver: str) -> "PipelineBase":  # noqa: PLR0915 PLR0912
        """
        Connects two components together.

        All components to connect must exist in the pipeline.
        If connecting to a component that has several output connections, specify the inputs and output names as
        'component_name.connections_name'.

        :param sender:
            The component that delivers the value. This can be either just a component name or can be
            in the format `component_name.connection_name` if the component has multiple outputs.
        :param receiver:
            The component that receives the value. This can be either just a component name or can be
            in the format `component_name.connection_name` if the component has multiple inputs.

        :returns:
            The Pipeline instance.

        :raises PipelineConnectError:
            If the two components cannot be connected (for example if one of the components is
            not present in the pipeline, or the connections don't match by type, and so on).
        """
        # Edges may be named explicitly by passing 'node_name.edge_name' to connect().
        sender_component_name, sender_socket_name = parse_connect_string(sender)
        receiver_component_name, receiver_socket_name = parse_connect_string(receiver)

        if sender_component_name == receiver_component_name:
            raise PipelineConnectError("Connecting a Component to itself is not supported.")

        # Get the nodes data.
        try:
            sender_sockets = self.graph.nodes[sender_component_name]["output_sockets"]
        except KeyError as exc:
            raise ValueError(f"Component named {sender_component_name} not found in the pipeline.") from exc
        try:
            receiver_sockets = self.graph.nodes[receiver_component_name]["input_sockets"]
        except KeyError as exc:
            raise ValueError(f"Component named {receiver_component_name} not found in the pipeline.") from exc

        # If the name of either socket is given, get the socket
        sender_socket: Optional[OutputSocket] = None
        if sender_socket_name:
            sender_socket = sender_sockets.get(sender_socket_name)
            if not sender_socket:
                raise PipelineConnectError(
                    f"'{sender} does not exist. "
                    f"Output connections of {sender_component_name} are: "
                    + ", ".join([f"{name} (type {_type_name(socket.type)})" for name, socket in sender_sockets.items()])
                )

        receiver_socket: Optional[InputSocket] = None
        if receiver_socket_name:
            receiver_socket = receiver_sockets.get(receiver_socket_name)
            if not receiver_socket:
                raise PipelineConnectError(
                    f"'{receiver} does not exist. "
                    f"Input connections of {receiver_component_name} are: "
                    + ", ".join(
                        [f"{name} (type {_type_name(socket.type)})" for name, socket in receiver_sockets.items()]
                    )
                )

        # Look for a matching connection among the possible ones.
        # Note that if there is more than one possible connection but two sockets match by name, they're paired.
        sender_socket_candidates: List[OutputSocket] = (
            [sender_socket] if sender_socket else list(sender_sockets.values())
        )
        receiver_socket_candidates: List[InputSocket] = (
            [receiver_socket] if receiver_socket else list(receiver_sockets.values())
        )

        # Find all possible connections between these two components
        possible_connections = []
        for sender_sock, receiver_sock in itertools.product(sender_socket_candidates, receiver_socket_candidates):
            if _types_are_compatible(sender_sock.type, receiver_sock.type, self._connection_type_validation):
                possible_connections.append((sender_sock, receiver_sock))

        # We need this status for error messages, since we might need it in multiple places we calculate it here
        status = _connections_status(
            sender_node=sender_component_name,
            sender_sockets=sender_socket_candidates,
            receiver_node=receiver_component_name,
            receiver_sockets=receiver_socket_candidates,
        )

        if not possible_connections:
            # There's no possible connection between these two components
            if len(sender_socket_candidates) == len(receiver_socket_candidates) == 1:
                msg = (
                    f"Cannot connect '{sender_component_name}.{sender_socket_candidates[0].name}' with "
                    f"'{receiver_component_name}.{receiver_socket_candidates[0].name}': "
                    f"their declared input and output types do not match.\n{status}"
                )
            else:
                msg = (
                    f"Cannot connect '{sender_component_name}' with '{receiver_component_name}': "
                    f"no matching connections available.\n{status}"
                )
            raise PipelineConnectError(msg)

        if len(possible_connections) == 1:
            # There's only one possible connection, use it
            sender_socket = possible_connections[0][0]
            receiver_socket = possible_connections[0][1]

        if len(possible_connections) > 1:
            # There are multiple possible connection, let's try to match them by name
            name_matches = [
                (out_sock, in_sock) for out_sock, in_sock in possible_connections if in_sock.name == out_sock.name
            ]
            if len(name_matches) != 1:
                # There's are either no matches or more than one, we can't pick one reliably
                msg = (
                    f"Cannot connect '{sender_component_name}' with "
                    f"'{receiver_component_name}': more than one connection is possible "
                    "between these components. Please specify the connection name, like: "
                    f"pipeline.connect('{sender_component_name}.{possible_connections[0][0].name}', "
                    f"'{receiver_component_name}.{possible_connections[0][1].name}').\n{status}"
                )
                raise PipelineConnectError(msg)

            # Get the only possible match
            sender_socket = name_matches[0][0]
            receiver_socket = name_matches[0][1]

        # Connection must be valid on both sender/receiver sides
        if not sender_socket or not receiver_socket or not sender_component_name or not receiver_component_name:
            if sender_component_name and sender_socket:
                sender_repr = f"{sender_component_name}.{sender_socket.name} ({_type_name(sender_socket.type)})"
            else:
                sender_repr = "input needed"

            if receiver_component_name and receiver_socket:
                receiver_repr = f"({_type_name(receiver_socket.type)}) {receiver_component_name}.{receiver_socket.name}"
            else:
                receiver_repr = "output"
            msg = f"Connection must have both sender and receiver: {sender_repr} -> {receiver_repr}"
            raise PipelineConnectError(msg)

        logger.debug(
            "Connecting '{sender_component}.{sender_socket_name}' to '{receiver_component}.{receiver_socket_name}'",
            sender_component=sender_component_name,
            sender_socket_name=sender_socket.name,
            receiver_component=receiver_component_name,
            receiver_socket_name=receiver_socket.name,
        )

        if receiver_component_name in sender_socket.receivers and sender_component_name in receiver_socket.senders:
            # This is already connected, nothing to do
            return self

        if receiver_socket.senders and not receiver_socket.is_variadic:
            # Only variadic input sockets can receive from multiple senders
            msg = (
                f"Cannot connect '{sender_component_name}.{sender_socket.name}' with "
                f"'{receiver_component_name}.{receiver_socket.name}': "
                f"{receiver_component_name}.{receiver_socket.name} is already connected to {receiver_socket.senders}.\n"
            )
            raise PipelineConnectError(msg)

        # Update the sockets with the new connection
        sender_socket.receivers.append(receiver_component_name)
        receiver_socket.senders.append(sender_component_name)

        # Create the new connection
        self.graph.add_edge(
            sender_component_name,
            receiver_component_name,
            key=f"{sender_socket.name}/{receiver_socket.name}",
            conn_type=_type_name(sender_socket.type),
            from_socket=sender_socket,
            to_socket=receiver_socket,
            mandatory=receiver_socket.is_mandatory,
        )
        return self

    def get_component(self, name: str) -> Component:
        """
        Get the component with the specified name from the pipeline.

        :param name:
            The name of the component.
        :returns:
            The instance of that component.

        :raises ValueError:
            If a component with that name is not present in the pipeline.
        """
        try:
            return self.graph.nodes[name]["instance"]
        except KeyError as exc:
            raise ValueError(f"Component named {name} not found in the pipeline.") from exc

    def get_component_name(self, instance: Component) -> str:
        """
        Returns the name of the Component instance if it has been added to this Pipeline or an empty string otherwise.

        :param instance:
            The Component instance to look for.
        :returns:
            The name of the Component instance.
        """
        for name, inst in self.graph.nodes(data="instance"):  # type: ignore # type wrongly defined in networkx
            if inst == instance:
                return name
        return ""

    def inputs(self, include_components_with_connected_inputs: bool = False) -> Dict[str, Dict[str, Any]]:
        """
        Returns a dictionary containing the inputs of a pipeline.

        Each key in the dictionary corresponds to a component name, and its value is another dictionary that describes
        the input sockets of that component, including their types and whether they are optional.

        :param include_components_with_connected_inputs:
            If `False`, only components that have disconnected input edges are
            included in the output.
        :returns:
            A dictionary where each key is a pipeline component name and each value is a dictionary of
            inputs sockets of that component.
        """
        inputs: Dict[str, Dict[str, Any]] = {}
        for component_name, data in find_pipeline_inputs(self.graph, include_components_with_connected_inputs).items():
            sockets_description = {}
            for socket in data:
                sockets_description[socket.name] = {"type": socket.type, "is_mandatory": socket.is_mandatory}
                if not socket.is_mandatory:
                    sockets_description[socket.name]["default_value"] = socket.default_value

            if sockets_description:
                inputs[component_name] = sockets_description
        return inputs

    def outputs(self, include_components_with_connected_outputs: bool = False) -> Dict[str, Dict[str, Any]]:
        """
        Returns a dictionary containing the outputs of a pipeline.

        Each key in the dictionary corresponds to a component name, and its value is another dictionary that describes
        the output sockets of that component.

        :param include_components_with_connected_outputs:
            If `False`, only components that have disconnected output edges are
            included in the output.
        :returns:
            A dictionary where each key is a pipeline component name and each value is a dictionary of
            output sockets of that component.
        """
        outputs = {
            comp: {socket.name: {"type": socket.type} for socket in data}
            for comp, data in find_pipeline_outputs(self.graph, include_components_with_connected_outputs).items()
            if data
        }
        return outputs

    def show(
        self,
        *,
        server_url: str = "https://mermaid.ink",
        params: Optional[dict] = None,
        timeout: int = 30,
        super_component_expansion: bool = False,
    ) -> None:
        """
        Display an image representing this `Pipeline` in a Jupyter notebook.

        This function generates a diagram of the `Pipeline` using a Mermaid server and displays it directly in
        the notebook.

        :param server_url:
            The base URL of the Mermaid server used for rendering (default: 'https://mermaid.ink').
            See https://github.com/jihchi/mermaid.ink and https://github.com/mermaid-js/mermaid-live-editor for more
            info on how to set up your own Mermaid server.

        :param params:
            Dictionary of customization parameters to modify the output. Refer to Mermaid documentation for more details
            Supported keys:
                - format: Output format ('img', 'svg', or 'pdf'). Default: 'img'.
                - type: Image type for /img endpoint ('jpeg', 'png', 'webp'). Default: 'png'.
                - theme: Mermaid theme ('default', 'neutral', 'dark', 'forest'). Default: 'neutral'.
                - bgColor: Background color in hexadecimal (e.g., 'FFFFFF') or named format (e.g., '!white').
                - width: Width of the output image (integer).
                - height: Height of the output image (integer).
                - scale: Scaling factor (1–3). Only applicable if 'width' or 'height' is specified.
                - fit: Whether to fit the diagram size to the page (PDF only, boolean).
                - paper: Paper size for PDFs (e.g., 'a4', 'a3'). Ignored if 'fit' is true.
                - landscape: Landscape orientation for PDFs (boolean). Ignored if 'fit' is true.

        :param timeout:
            Timeout in seconds for the request to the Mermaid server.

        :param super_component_expansion:
            If set to True and the pipeline contains SuperComponents the diagram will show the internal structure of
            super-components as if they were components part of the pipeline instead of a "black-box".
            Otherwise, only the super-component itself will be displayed.

        :raises PipelineDrawingError:
            If the function is called outside of a Jupyter notebook or if there is an issue with rendering.
        """

        if is_in_jupyter():
            from IPython.display import Image, display  # type: ignore

            if super_component_expansion:
                graph, super_component_mapping = self._merge_super_component_pipelines()
            else:
                graph = self.graph
                super_component_mapping = None

            image_data = _to_mermaid_image(
                graph,
                server_url=server_url,
                params=params,
                timeout=timeout,
                super_component_mapping=super_component_mapping,
            )
            display(Image(image_data))
        else:
            msg = "This method is only supported in Jupyter notebooks. Use Pipeline.draw() to save an image locally."
            raise PipelineDrawingError(msg)

    def draw(
        self,
        *,
        path: Path,
        server_url: str = "https://mermaid.ink",
        params: Optional[dict] = None,
        timeout: int = 30,
        super_component_expansion: bool = False,
    ) -> None:
        """
        Save an image representing this `Pipeline` to the specified file path.

        This function generates a diagram of the `Pipeline` using the Mermaid server and saves it to the provided path.

        :param path:
            The file path where the generated image will be saved.

        :param server_url:
            The base URL of the Mermaid server used for rendering (default: 'https://mermaid.ink').
            See https://github.com/jihchi/mermaid.ink and https://github.com/mermaid-js/mermaid-live-editor for more
            info on how to set up your own Mermaid server.

        :param params:
            Dictionary of customization parameters to modify the output. Refer to Mermaid documentation for more details
            Supported keys:
                - format: Output format ('img', 'svg', or 'pdf'). Default: 'img'.
                - type: Image type for /img endpoint ('jpeg', 'png', 'webp'). Default: 'png'.
                - theme: Mermaid theme ('default', 'neutral', 'dark', 'forest'). Default: 'neutral'.
                - bgColor: Background color in hexadecimal (e.g., 'FFFFFF') or named format (e.g., '!white').
                - width: Width of the output image (integer).
                - height: Height of the output image (integer).
                - scale: Scaling factor (1–3). Only applicable if 'width' or 'height' is specified.
                - fit: Whether to fit the diagram size to the page (PDF only, boolean).
                - paper: Paper size for PDFs (e.g., 'a4', 'a3'). Ignored if 'fit' is true.
                - landscape: Landscape orientation for PDFs (boolean). Ignored if 'fit' is true.

        :param timeout:
            Timeout in seconds for the request to the Mermaid server.

        :param super_component_expansion:
            If set to True and the pipeline contains SuperComponents the diagram will show the internal structure of
            super-components as if they were components part of the pipeline instead of a "black-box".
            Otherwise, only the super-component itself will be displayed.

        :raises PipelineDrawingError:
            If there is an issue with rendering or saving the image.
        """

        # Before drawing we edit a bit the graph, to avoid modifying the original that is
        # used for running the pipeline we copy it.
        if super_component_expansion:
            graph, super_component_mapping = self._merge_super_component_pipelines()
        else:
            graph = self.graph
            super_component_mapping = None

        image_data = _to_mermaid_image(
            graph,
            server_url=server_url,
            params=params,
            timeout=timeout,
            super_component_mapping=super_component_mapping,
        )
        Path(path).write_bytes(image_data)

    def walk(self) -> Iterator[Tuple[str, Component]]:
        """
        Visits each component in the pipeline exactly once and yields its name and instance.

        No guarantees are provided on the visiting order.

        :returns:
            An iterator of tuples of component name and component instance.
        """
        for component_name, instance in self.graph.nodes(data="instance"):  # type: ignore # type is wrong in networkx
            yield component_name, instance

    def warm_up(self) -> None:
        """
        Make sure all nodes are warm.

        It's the node's responsibility to make sure this method can be called at every `Pipeline.run()`
        without re-initializing everything.
        """
        for node in self.graph.nodes:
            if hasattr(self.graph.nodes[node]["instance"], "warm_up"):
                logger.info("Warming up component {node}...", node=node)
                self.graph.nodes[node]["instance"].warm_up()

    @staticmethod
    def _create_component_span(
        component_name: str, instance: Component, inputs: Dict[str, Any], parent_span: Optional[tracing.Span] = None
    ) -> ContextManager[tracing.Span]:
        return tracing.tracer.trace(
            "haystack.component.run",
            tags={
                "haystack.component.name": component_name,
                "haystack.component.type": instance.__class__.__name__,
                "haystack.component.input_types": {k: type(v).__name__ for k, v in inputs.items()},
                "haystack.component.input_spec": {
                    key: {
                        "type": (value.type.__name__ if isinstance(value.type, type) else str(value.type)),
                        "senders": value.senders,
                    }
                    for key, value in instance.__haystack_input__._sockets_dict.items()  # type: ignore
                },
                "haystack.component.output_spec": {
                    key: {
                        "type": (value.type.__name__ if isinstance(value.type, type) else str(value.type)),
                        "receivers": value.receivers,
                    }
                    for key, value in instance.__haystack_output__._sockets_dict.items()  # type: ignore
                },
            },
            parent_span=parent_span,
        )

    def validate_input(self, data: Dict[str, Any]) -> None:
        """
        Validates pipeline input data.

        Validates that data:
        * Each Component name actually exists in the Pipeline
        * Each Component is not missing any input
        * Each Component has only one input per input socket, if not variadic
        * Each Component doesn't receive inputs that are already sent by another Component

        :param data:
            A dictionary of inputs for the pipeline's components. Each key is a component name.

        :raises ValueError:
            If inputs are invalid according to the above.
        """
        for component_name, component_inputs in data.items():
            if component_name not in self.graph.nodes:
                raise ValueError(f"Component named {component_name} not found in the pipeline.")
            instance = self.graph.nodes[component_name]["instance"]
            for socket_name, socket in instance.__haystack_input__._sockets_dict.items():
                if socket.senders == [] and socket.is_mandatory and socket_name not in component_inputs:
                    raise ValueError(f"Missing input for component {component_name}: {socket_name}")
            for input_name in component_inputs.keys():
                if input_name not in instance.__haystack_input__._sockets_dict:
                    raise ValueError(f"Input {input_name} not found in component {component_name}.")

        for component_name in self.graph.nodes:
            instance = self.graph.nodes[component_name]["instance"]
            for socket_name, socket in instance.__haystack_input__._sockets_dict.items():
                component_inputs = data.get(component_name, {})
                if socket.senders == [] and socket.is_mandatory and socket_name not in component_inputs:
                    raise ValueError(f"Missing input for component {component_name}: {socket_name}")
                if socket.senders and socket_name in component_inputs and not socket.is_variadic:
                    raise ValueError(
                        f"Input {socket_name} for component {component_name} is already sent by {socket.senders}."
                    )

    def _prepare_component_input_data(self, data: Dict[str, Any]) -> Dict[str, Dict[str, Any]]:
        """
        Prepares input data for pipeline components.

        Organizes input data for pipeline components and identifies any inputs that are not matched to any
        component's input slots. Deep-copies data items to avoid sharing mutables across multiple components.

        This method processes a flat dictionary of input data, where each key-value pair represents an input name
        and its corresponding value. It distributes these inputs to the appropriate pipeline components based on
        their input requirements. Inputs that don't match any component's input slots are classified as unresolved.

        :param data:
            A dictionary potentially having input names as keys and input values as values.

        :returns:
            A dictionary mapping component names to their respective matched inputs.
        """
        # check whether the data is a nested dictionary of component inputs where each key is a component name
        # and each value is a dictionary of input parameters for that component
        is_nested_component_input = all(isinstance(value, dict) for value in data.values())
        if not is_nested_component_input:
            # flat input, a dict where keys are input names and values are the corresponding values
            # we need to convert it to a nested dictionary of component inputs and then run the pipeline
            # just like in the previous case
            pipeline_input_data: Dict[str, Dict[str, Any]] = defaultdict(dict)
            unresolved_kwargs = {}

            # Retrieve the input slots for each component in the pipeline
            available_inputs: Dict[str, Dict[str, Any]] = self.inputs()

            # Go through all provided to distribute them to the appropriate component inputs
            for input_name, input_value in data.items():
                resolved_at_least_once = False

                # Check each component to see if it has a slot for the current kwarg
                for component_name, component_inputs in available_inputs.items():
                    if input_name in component_inputs:
                        # If a match is found, add the kwarg to the component's input data
                        pipeline_input_data[component_name][input_name] = input_value
                        resolved_at_least_once = True

                if not resolved_at_least_once:
                    unresolved_kwargs[input_name] = input_value

            if unresolved_kwargs:
                logger.warning(
                    "Inputs {input_keys} were not matched to any component inputs, please check your run parameters.",
                    input_keys=list(unresolved_kwargs.keys()),
                )

            data = dict(pipeline_input_data)

        # deepcopying the inputs prevents the Pipeline run logic from being altered unexpectedly
        # when the same input reference is passed to multiple components.
        for component_name, component_inputs in data.items():
            data[component_name] = {k: _deepcopy_with_exceptions(v) for k, v in component_inputs.items()}

        return data

    @classmethod
    def from_template(
        cls, predefined_pipeline: PredefinedPipeline, template_params: Optional[Dict[str, Any]] = None
    ) -> "PipelineBase":
        """
        Create a Pipeline from a predefined template. See `PredefinedPipeline` for available options.

        :param predefined_pipeline:
            The predefined pipeline to use.
        :param template_params:
            An optional dictionary of parameters to use when rendering the pipeline template.
        :returns:
            An instance of `Pipeline`.
        """
        tpl = PipelineTemplate.from_predefined(predefined_pipeline)
        # If tpl.render() fails, we let bubble up the original error
        rendered = tpl.render(template_params)

        # If there was a problem with the rendered version of the
        # template, we add it to the error stack for debugging
        try:
            return cls.loads(rendered)
        except Exception as e:
            msg = f"Error unmarshalling pipeline: {e}\n"
            msg += f"Source:\n{rendered}"
            raise PipelineUnmarshalError(msg)

    def _find_receivers_from(self, component_name: str) -> List[Tuple[str, OutputSocket, InputSocket]]:
        """
        Utility function to find all Components that receive input from `component_name`.

        :param component_name:
            Name of the sender Component

        :returns:
            List of tuples containing name of the receiver Component and sender OutputSocket
            and receiver InputSocket instances
        """
        res = []
        for _, receiver_name, connection in self.graph.edges(nbunch=component_name, data=True):
            sender_socket: OutputSocket = connection["from_socket"]
            receiver_socket: InputSocket = connection["to_socket"]
            res.append((receiver_name, sender_socket, receiver_socket))
        return res

    @staticmethod
    def _convert_to_internal_format(pipeline_inputs: Dict[str, Any]) -> Dict[str, Dict[str, List]]:
        """
        Converts the inputs to the pipeline to the format that is needed for the internal `Pipeline.run` logic.

        Example Input:
        {'prompt_builder': {'question': 'Who lives in Paris?'}, 'retriever': {'query': 'Who lives in Paris?'}}
        Example Output:
        {'prompt_builder': {'question': [{'sender': None, 'value': 'Who lives in Paris?'}]},
         'retriever': {'query': [{'sender': None, 'value': 'Who lives in Paris?'}]}}

        :param pipeline_inputs: Inputs to the pipeline.
        :returns: Converted inputs that can be used by the internal `Pipeline.run` logic.
        """
        inputs: Dict[str, Dict[str, List[Dict[str, Any]]]] = {}
        for component_name, socket_dict in pipeline_inputs.items():
            inputs[component_name] = {}
            for socket_name, value in socket_dict.items():
                inputs[component_name][socket_name] = [{"sender": None, "value": value}]

        return inputs

    @staticmethod
    def _consume_component_inputs(
        component_name: str, component: Dict, inputs: Dict, is_resume: bool = False
    ) -> Dict[str, Any]:
        """
        Extracts the inputs needed to run for the component and removes them from the global inputs state.

        :param component_name: The name of a component.
        :param component: Component with component metadata.
        :param inputs: Global inputs state.
        :returns: The inputs for the component.
        """
        component_inputs = inputs.get(component_name, {})
        consumed_inputs = {}
        greedy_inputs_to_remove = set()
        for socket_name, socket in component["input_sockets"].items():
            socket_inputs = component_inputs.get(socket_name, [])
            socket_inputs = [sock["value"] for sock in socket_inputs if sock["value"] is not _NO_OUTPUT_PRODUCED]

            # if we are resuming a component, the inputs are already consumed, so we just return the first input
            if is_resume:
                consumed_inputs[socket_name] = socket_inputs[0]
                continue
            if socket_inputs:
                if not socket.is_variadic:
                    # We only care about the first input provided to the socket.
                    consumed_inputs[socket_name] = socket_inputs[0]
                elif socket.is_greedy:
                    # We need to keep track of greedy inputs because we always remove them, even if they come from
                    # outside the pipeline. Otherwise, a greedy input from the user would trigger a pipeline to run
                    # indefinitely.
                    greedy_inputs_to_remove.add(socket_name)
                    consumed_inputs[socket_name] = [socket_inputs[0]]
                elif is_socket_lazy_variadic(socket):
                    # We use all inputs provided to the socket on a lazy variadic socket.
                    consumed_inputs[socket_name] = socket_inputs

        # We prune all inputs except for those that were provided from outside the pipeline (e.g. user inputs).
        pruned_inputs = {
            socket_name: [
                sock for sock in socket if sock["sender"] is None and not socket_name in greedy_inputs_to_remove
            ]
            for socket_name, socket in component_inputs.items()
        }
        pruned_inputs = {socket_name: socket for socket_name, socket in pruned_inputs.items() if len(socket) > 0}

        inputs[component_name] = pruned_inputs

        return consumed_inputs

    def _fill_queue(
        self, component_names: List[str], inputs: Dict[str, Any], component_visits: Dict[str, int]
    ) -> FIFOPriorityQueue:
        """
        Calculates the execution priority for each component and inserts it into the priority queue.

        :param component_names: Names of the components to put into the queue.
        :param inputs: Inputs to the components.
        :param component_visits: Current state of component visits.
        :returns: A prioritized queue of component names.
        """
        priority_queue = FIFOPriorityQueue()
        for component_name in component_names:
            component = self._get_component_with_graph_metadata_and_visits(
                component_name, component_visits[component_name]
            )
            priority = self._calculate_priority(component, inputs.get(component_name, {}))
            priority_queue.push(component_name, priority)

        return priority_queue

    @staticmethod
    def _calculate_priority(component: Dict, inputs: Dict) -> ComponentPriority:
        """
        Calculates the execution priority for a component depending on the component's inputs.

        :param component: Component metadata and component instance.
        :param inputs: Inputs to the component.
        :returns: Priority value for the component.
        """
        if not can_component_run(component, inputs):
            return ComponentPriority.BLOCKED
        elif is_any_greedy_socket_ready(component, inputs) and are_all_sockets_ready(component, inputs):
            return ComponentPriority.HIGHEST
        elif all_predecessors_executed(component, inputs):
            return ComponentPriority.READY
        elif are_all_lazy_variadic_sockets_resolved(component, inputs):
            return ComponentPriority.DEFER
        else:
            return ComponentPriority.DEFER_LAST

    def _get_component_with_graph_metadata_and_visits(self, component_name: str, visits: int) -> Dict[str, Any]:
        """
        Returns the component instance alongside input/output-socket metadata from the graph and adds current visits.

        We can't store visits in the pipeline graph because this would prevent reentrance / thread-safe execution.

        :param component_name: The name of the component.
        :param visits: Number of visits for the component.
        :returns: Dict including component instance, input/output-sockets and visits.
        """
        comp_dict = self.graph.nodes[component_name]
        comp_dict = {**comp_dict, "visits": visits}
        return comp_dict

    def _get_next_runnable_component(
        self, priority_queue: FIFOPriorityQueue, component_visits: Dict[str, int]
    ) -> Union[Tuple[ComponentPriority, str, Dict[str, Any]], None]:
        """
        Returns the next runnable component alongside its metadata from the priority queue.

        :param priority_queue: Priority queue of component names.
        :param component_visits: Current state of component visits.
        :returns: The next runnable component, the component name, and its priority
            or None if no component in the queue can run.
        :raises: PipelineMaxComponentRuns if the next runnable component has exceeded the maximum number of runs.
        """
        priority_and_component_name: Union[Tuple[ComponentPriority, str], None] = (
            None if (item := priority_queue.get()) is None else (ComponentPriority(item[0]), str(item[1]))
        )

        if priority_and_component_name is None:
            return None

        priority, component_name = priority_and_component_name
        comp = self._get_component_with_graph_metadata_and_visits(component_name, component_visits[component_name])
        if comp["visits"] > self._max_runs_per_component:
            msg = f"Maximum run count {self._max_runs_per_component} reached for component '{component_name}'"
            raise PipelineMaxComponentRuns(msg)
        return priority, component_name, comp

    @staticmethod
    def _add_missing_input_defaults(
        component_inputs: Dict[str, Any], component_input_sockets: Dict[str, InputSocket]
    ) -> Dict[str, Any]:
        """
        Updates the inputs with the default values for the inputs that are missing

        :param component_inputs: Inputs for the component.
        :param component_input_sockets: Input sockets of the component.
        """
        for name, socket in component_input_sockets.items():
            if not socket.is_mandatory and name not in component_inputs:
                if socket.is_variadic:
                    component_inputs[name] = [socket.default_value]
                else:
                    component_inputs[name] = socket.default_value

        return component_inputs

    def _tiebreak_waiting_components(
        self,
        component_name: str,
        priority: ComponentPriority,
        priority_queue: FIFOPriorityQueue,
        topological_sort: Union[Dict[str, int], None],
    ) -> Tuple[str, Union[Dict[str, int], None]]:
        """
        Decides which component to run when multiple components are waiting for inputs with the same priority.

        :param component_name: The name of the component.
        :param priority: Priority of the component.
        :param priority_queue: Priority queue of component names.
        :param topological_sort: Cached topological sort of all components in the pipeline.
        """
        components_with_same_priority = [component_name]

        while len(priority_queue) > 0:
            next_priority, next_component_name = priority_queue.peek()
            if next_priority == priority:
                priority_queue.pop()  # actually remove the component
                components_with_same_priority.append(next_component_name)
            else:
                break

        if len(components_with_same_priority) > 1:
            if topological_sort is None:
                if networkx.is_directed_acyclic_graph(self.graph):
                    topological_sort = networkx.lexicographical_topological_sort(self.graph)
                    topological_sort = {node: idx for idx, node in enumerate(topological_sort)}
                else:
                    condensed = networkx.condensation(self.graph)
                    condensed_sorted = {node: idx for idx, node in enumerate(networkx.topological_sort(condensed))}
                    topological_sort = {
                        component_name: condensed_sorted[node]
                        for component_name, node in condensed.graph["mapping"].items()
                    }

            components_with_same_priority = sorted(
                components_with_same_priority, key=lambda comp_name: (topological_sort[comp_name], comp_name.lower())
            )

            component_name = components_with_same_priority[0]

        return component_name, topological_sort

    @staticmethod
    def _write_component_outputs(
        component_name: str,
        component_outputs: Mapping[str, Any],
        inputs: Dict[str, Any],
        receivers: List[Tuple],
        include_outputs_from: Set[str],
    ) -> Mapping[str, Any]:
        """
        Distributes the outputs of a component to the input sockets that it is connected to.

        :param component_name: The name of the component.
        :param component_outputs: The outputs of the component.
        :param inputs: The current global input state.
        :param receivers: List of components that receive inputs from the component.
        :param include_outputs_from: List of component names that should always return an output from the pipeline.
        """
        for receiver_name, sender_socket, receiver_socket in receivers:
            # We either get the value that was produced by the actor or we use the _NO_OUTPUT_PRODUCED class to indicate
            # that the sender did not produce an output for this socket.
            # This allows us to track if a predecessor already ran but did not produce an output.
            value = component_outputs.get(sender_socket.name, _NO_OUTPUT_PRODUCED)

            if receiver_name not in inputs:
                inputs[receiver_name] = {}

            if is_socket_lazy_variadic(receiver_socket):
                # If the receiver socket is lazy variadic, we append the new input.
                # Lazy variadic sockets can collect multiple inputs.
                _write_to_lazy_variadic_socket(
                    inputs=inputs,
                    receiver_name=receiver_name,
                    receiver_socket_name=receiver_socket.name,
                    component_name=component_name,
                    value=value,
                )
            else:
                # If the receiver socket is not lazy variadic, it is greedy variadic or non-variadic.
                # We overwrite with the new input if it's not _NO_OUTPUT_PRODUCED or if the current value is None.
                _write_to_standard_socket(
                    inputs=inputs,
                    receiver_name=receiver_name,
                    receiver_socket_name=receiver_socket.name,
                    component_name=component_name,
                    value=value,
                )

        # If we want to include all outputs from this actor in the final outputs, we don't need to prune any consumed
        # outputs
        if component_name in include_outputs_from:
            return component_outputs

        # We prune outputs that were consumed by any receiving sockets.
        # All remaining outputs will be added to the final outputs of the pipeline.
        consumed_outputs = {sender_socket.name for _, sender_socket, __ in receivers}
        pruned_outputs = {key: value for key, value in component_outputs.items() if key not in consumed_outputs}

        return pruned_outputs

    @staticmethod
    def _is_queue_stale(priority_queue: FIFOPriorityQueue) -> bool:
        """
        Checks if the priority queue needs to be recomputed because the priorities might have changed.

        :param priority_queue: Priority queue of component names.
        """
        return len(priority_queue) == 0 or priority_queue.peek()[0] > ComponentPriority.READY

    @staticmethod
    def validate_pipeline(priority_queue: FIFOPriorityQueue) -> None:
        """
        Validate the pipeline to check if it is blocked or has no valid entry point.

        :param priority_queue: Priority queue of component names.
        :raises PipelineRuntimeError:
            If the pipeline is blocked or has no valid entry point.
        """
        if len(priority_queue) == 0:
            return

        candidate = priority_queue.peek()
        if candidate is not None and candidate[0] == ComponentPriority.BLOCKED:
            raise PipelineComponentsBlockedError()

    def _find_super_components(self) -> list[tuple[str, Component]]:
        """
        Find all SuperComponents in the pipeline.

        :returns:
            List of tuples containing (component_name, component_instance) representing a SuperComponent.
        """

        super_components = []
        for comp_name, comp in self.walk():
            # a SuperComponent has a "pipeline" attribute which itself a Pipeline instance
            # we don't test against SuperComponent because doing so always lead to circular imports
            if hasattr(comp, "pipeline") and isinstance(comp.pipeline, self.__class__):
                super_components.append((comp_name, comp))
        return super_components

    def _merge_super_component_pipelines(self) -> Tuple["networkx.MultiDiGraph", Dict[str, str]]:
        """
        Merge the internal pipelines of SuperComponents into the main pipeline graph structure.

        This creates a new networkx.MultiDiGraph containing all the components from both the main pipeline
        and all the internal SuperComponents' pipelines. The SuperComponents are removed and their internal
        components are connected to corresponding input and output sockets of the main pipeline.

        :returns:
            A tuple containing:
            - A networkx.MultiDiGraph with the expanded structure of the main pipeline and all it's SuperComponents
            - A dictionary mapping component names to boolean indicating that this component was part of a
              SuperComponent
            - A dictionary mapping component names to their SuperComponent name
        """
        merged_graph = self.graph.copy()
        super_component_mapping: Dict[str, str] = {}

        for super_name, super_component in self._find_super_components():
            internal_pipeline = super_component.pipeline  # type: ignore
            internal_graph = internal_pipeline.graph.copy()

            # Mark all components in the internal pipeline as being part of a SuperComponent
            for node in internal_graph.nodes():
                super_component_mapping[node] = super_name

            # edges connected to the super component
            incoming_edges = list(merged_graph.in_edges(super_name, data=True))
            outgoing_edges = list(merged_graph.out_edges(super_name, data=True))

            # merge the SuperComponent graph into the main graph and remove the super component node
            # since its components are now part of the main graph
            merged_graph = networkx.compose(merged_graph, internal_graph)
            merged_graph.remove_node(super_name)

            # get the entry and exit points of the SuperComponent internal pipeline
            entry_points = [n for n in internal_graph.nodes() if internal_graph.in_degree(n) == 0]
            exit_points = [n for n in internal_graph.nodes() if internal_graph.out_degree(n) == 0]

            # connect the incoming edges to entry points
            for sender, _, edge_data in incoming_edges:
                sender_socket = edge_data["from_socket"]
                for entry_point in entry_points:
                    # find a matching input socket in the entry point
                    entry_point_sockets = internal_graph.nodes[entry_point]["input_sockets"]
                    for socket_name, socket in entry_point_sockets.items():
                        if _types_are_compatible(sender_socket.type, socket.type, self._connection_type_validation):
                            merged_graph.add_edge(
                                sender,
                                entry_point,
                                key=f"{sender_socket.name}/{socket_name}",
                                conn_type=_type_name(sender_socket.type),
                                from_socket=sender_socket,
                                to_socket=socket,
                                mandatory=socket.is_mandatory,
                            )

            # connect outgoing edges from exit points
            for _, receiver, edge_data in outgoing_edges:
                receiver_socket = edge_data["to_socket"]
                for exit_point in exit_points:
                    # find a matching output socket in the exit point
                    exit_point_sockets = internal_graph.nodes[exit_point]["output_sockets"]
                    for socket_name, socket in exit_point_sockets.items():
                        if _types_are_compatible(socket.type, receiver_socket.type, self._connection_type_validation):
                            merged_graph.add_edge(
                                exit_point,
                                receiver,
                                key=f"{socket_name}/{receiver_socket.name}",
                                conn_type=_type_name(socket.type),
                                from_socket=socket,
                                to_socket=receiver_socket,
                                mandatory=receiver_socket.is_mandatory,
                            )

        return merged_graph, super_component_mapping

    def _is_pipeline_possibly_blocked(self, current_pipeline_outputs: Dict[str, Any]) -> bool:
        """
        Heuristically determines whether the pipeline is possibly blocked based on its current outputs.

        This method checks if the pipeline has produced any of the expected outputs.
        - If no outputs are expected (i.e., `self.outputs()` returns an empty list), the method assumes the pipeline
        is not blocked.
        - If at least one expected output is present in `current_pipeline_outputs`, the pipeline is also assumed to not
        be blocked.
        - If none of the expected outputs are present, the pipeline is considered to be possibly blocked.

        Note: This check is not definitive—it is intended as a best-effort guess to detect a stalled or misconfigured
        pipeline when there are no more runnable components.

        :param current_pipeline_outputs: A dictionary of outputs currently produced by the pipeline.
        :returns:
            bool: True if the pipeline is possibly blocked (i.e., expected outputs are missing), False otherwise.
        """
        expected_outputs = self.outputs()
        return bool(expected_outputs) and not any(k in current_pipeline_outputs for k in expected_outputs)


def _connections_status(
    sender_node: str, receiver_node: str, sender_sockets: List[OutputSocket], receiver_sockets: List[InputSocket]
) -> str:
    """
    Lists the status of the sockets, for error messages.
    """
    sender_sockets_entries = []
    for sender_socket in sender_sockets:
        sender_sockets_entries.append(f" - {sender_socket.name}: {_type_name(sender_socket.type)}")
    sender_sockets_list = "\n".join(sender_sockets_entries)

    receiver_sockets_entries = []
    for receiver_socket in receiver_sockets:
        if receiver_socket.senders:
            sender_status = f"sent by {','.join(receiver_socket.senders)}"
        else:
            sender_status = "available"
        receiver_sockets_entries.append(
            f" - {receiver_socket.name}: {_type_name(receiver_socket.type)} ({sender_status})"
        )
    receiver_sockets_list = "\n".join(receiver_sockets_entries)

    return f"'{sender_node}':\n{sender_sockets_list}\n'{receiver_node}':\n{receiver_sockets_list}"


# Utility functions for writing to sockets


def _write_to_lazy_variadic_socket(
    inputs: Dict[str, Any], receiver_name: str, receiver_socket_name: str, component_name: str, value: Any
) -> None:
    """
    Write to a lazy variadic socket.

    Mutates inputs in place.
    """
    if not inputs[receiver_name].get(receiver_socket_name):
        inputs[receiver_name][receiver_socket_name] = []

    inputs[receiver_name][receiver_socket_name].append({"sender": component_name, "value": value})


def _write_to_standard_socket(
    inputs: Dict[str, Any], receiver_name: str, receiver_socket_name: str, component_name: str, value: Any
) -> None:
    """
    Write to a greedy variadic or non-variadic socket.

    Mutates inputs in place.
    """
    current_value = inputs[receiver_name].get(receiver_socket_name)

    # Only overwrite if there's no existing value, or we have a new value to provide
    if current_value is None or value is not _NO_OUTPUT_PRODUCED:
        inputs[receiver_name][receiver_socket_name] = [{"sender": component_name, "value": value}]
-												feat: Raise warning if the pipeline is unable to continue running due to a blocked component (#9569)

* First pass at alerting a user that the pipeline is blocked

* Change approach and add change to async pipeline

* Fix check in run_async

* Another fix

* Somehow also fixed the max_runs_per_component

* Align sync run and async run component

* Update output type of component outputs to Mapping to align with our protocol

* linting

* ruff

* Make it a core test

* Add reno

* Some refactoring

* Linting

* Fix mypy

* Converting to warning

* Small changes

* Update release note

* More cleanup

* PR comment

* PR comments
											
										
										
											2025-07-15 16:02:39 +02:00
+								# pylint: disable=too-many-lines
-												chore: extract BasePipeline (#7673)

* extract BasePipeline

* release note

* add missing headers

* move __eq__ to the base class

* proper check type equality, bless the tests
											
										
										
											2024-05-10 11:35:15 +02:00
+								# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
 								#
 								# SPDX-License-Identifier: Apache-2.0
 								import itertools
 								from collections import defaultdict
 								from datetime import datetime
-												fix: pipeline run bugs in cyclic and acyclic pipelines (#8707)

* add component checks

* pipeline should run deterministically

* add FIFOQueue

* add agent tests

* add order dependent tests

* run new tests

* remove code that is not needed

* test: intermediate from cycle outputs are available outside cycle

* add tests for component checks (Claude)

* adapt tests for component checks (o1 review)

* chore: format

* remove tests that aren't needed anymore

* add _calculate_priority tests

* revert accidental change in pyproject.toml

* test format conversion

* adapt to naming convention

* chore: proper docstrings and type hints for PQ

* format

* add more unit tests

* rm unneeded comments

* test input consumption

* lint

* fix: docstrings

* lint

* format

* format

* fix license header

* fix license header

* add component run tests

* fix: pass correct input format to tracing

* fix types

* format

* format

* types

* add defaults from Socket instead of signature

- otherwise components with dynamic inputs would fail

* fix test names

* still wait for optional inputs on greedy variadic sockets

- mirrors previous behavior

* fix format

* wip: warn for ambiguous running order

* wip: alternative warning

* fix license header

* make code more readable

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* Introduce content tracing to a behavioral test

* Fixing linting

* Remove debug print statements

* Fix tracer tests

* remove print

* test: test for component inputs

* test: remove testing for run order

* chore: update component checks from experimental

* chore: update pipeline and base from experimental

* refactor: remove unused method

* refactor: remove unused method

* refactor: outdated comment

* refactor: inputs state is updated as side effect

- to prepare for AsyncPipeline implementation

* format

* test: add file conversion test

* format

* fix: original implementation deepcopies outputs

* lint

* fix: from_dict was updated

* fix: format

* fix: test

* test: add test for thread safety

* remove unused imports

* format

* test: FIFOPriorityQueue

* chore: add release note

* fix: resolve merge conflict with mermaid changes

* fix: format

* fix: remove unused import

* refactor: rename to avoid accidental conflicts

* chore: remove unused inputs, add missing license header

* chore: extend release notes

* Update releasenotes/notes/fix-pipeline-run-2fefeafc705a6d91.yaml

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* fix: format

* fix: format

* Update release note

---------

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>
Co-authored-by: David S. Batista <dsbatista@gmail.com>
											
										
										
											2025-02-06 15:19:47 +01:00
+								from enum import IntEnum
-												chore: extract BasePipeline (#7673)

* extract BasePipeline

* release note

* add missing headers

* move __eq__ to the base class

* proper check type equality, bless the tests
											
										
										
											2024-05-10 11:35:15 +02:00
+								from pathlib import Path
-												feat: Raise warning if the pipeline is unable to continue running due to a blocked component (#9569)

* First pass at alerting a user that the pipeline is blocked

* Change approach and add change to async pipeline

* Fix check in run_async

* Another fix

* Somehow also fixed the max_runs_per_component

* Align sync run and async run component

* Update output type of component outputs to Mapping to align with our protocol

* linting

* ruff

* Make it a core test

* Add reno

* Some refactoring

* Linting

* Fix mypy

* Converting to warning

* Small changes

* Update release note

* More cleanup

* PR comment

* PR comments
											
										
										
											2025-07-15 16:02:39 +02:00
+								from typing import (
 								    Any,
 								    ContextManager,
 								    Dict,
 								    Iterator,
 								    List,
 								    Mapping,
 								    Optional,
 								    Set,
 								    TextIO,
 								    Tuple,
 								    Type,
 								    TypeVar,
 								    Union,
 								)
-												chore: extract BasePipeline (#7673)

* extract BasePipeline

* release note

* add missing headers

* move __eq__ to the base class

* proper check type equality, bless the tests
											
										
										
											2024-05-10 11:35:15 +02:00
 								import networkx  # type:ignore
-												feat: Agent tracing (#9240)

* Agent tracing

* Small changes

* Some changes and refactoring

* Refactoring to reuse code

* Fix

* Add reno

* Fix tests

* Fix tests

* Fix linting

* Refactor and add tracing support to run_async of Agent

* Reduce duplicate code

* Remove finalize_run

* Use break instead of copying code three times

* Adding a test

* Add tracing unit tests

* Make async tracing test actually run async

* Increase test coverage

* Unit test for traces in pipeline

* Add cleanup

* Fix proper indentation

* PR comments

* PR comments and new test

* Update warning message

* Update warning message

---------

Co-authored-by: Vladimir Blagojevic <dovlex@gmail.com>
											
										
										
											2025-04-15 15:58:26 +02:00
+								from haystack import logging, tracing
-												chore: extract BasePipeline (#7673)

* extract BasePipeline

* release note

* add missing headers

* move __eq__ to the base class

* proper check type equality, bless the tests
											
										
										
											2024-05-10 11:35:15 +02:00
+								from haystack.core.component import Component, InputSocket, OutputSocket, component
 								from haystack.core.errors import (
-												refactor: Improve error messages shown during pipeline deserialization (#8016)

* refactor: Improve error messages shown during pipeline deserialization

* Add link to release notes

* Update release notes link
											
										
										
											2024-07-12 16:47:00 +02:00
+								    DeserializationError,
-												enhancement: Add attributes to PipelineRuntimeError (#9182)

* Start refactoring PipelineRuntimeError

* Slight change

* Fix test and remove test that had no asserts

* Change back to pipeline runtime error

* PR comments

* Add reno

* PR comments

* Update test
											
										
										
											2025-04-09 08:18:50 +02:00
+								    PipelineComponentsBlockedError,
-												chore: extract BasePipeline (#7673)

* extract BasePipeline

* release note

* add missing headers

* move __eq__ to the base class

* proper check type equality, bless the tests
											
										
										
											2024-05-10 11:35:15 +02:00
+								    PipelineConnectError,
 								    PipelineDrawingError,
 								    PipelineError,
-												fix: pipeline run bugs in cyclic and acyclic pipelines (#8707)

* add component checks

* pipeline should run deterministically

* add FIFOQueue

* add agent tests

* add order dependent tests

* run new tests

* remove code that is not needed

* test: intermediate from cycle outputs are available outside cycle

* add tests for component checks (Claude)

* adapt tests for component checks (o1 review)

* chore: format

* remove tests that aren't needed anymore

* add _calculate_priority tests

* revert accidental change in pyproject.toml

* test format conversion

* adapt to naming convention

* chore: proper docstrings and type hints for PQ

* format

* add more unit tests

* rm unneeded comments

* test input consumption

* lint

* fix: docstrings

* lint

* format

* format

* fix license header

* fix license header

* add component run tests

* fix: pass correct input format to tracing

* fix types

* format

* format

* types

* add defaults from Socket instead of signature

- otherwise components with dynamic inputs would fail

* fix test names

* still wait for optional inputs on greedy variadic sockets

- mirrors previous behavior

* fix format

* wip: warn for ambiguous running order

* wip: alternative warning

* fix license header

* make code more readable

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* Introduce content tracing to a behavioral test

* Fixing linting

* Remove debug print statements

* Fix tracer tests

* remove print

* test: test for component inputs

* test: remove testing for run order

* chore: update component checks from experimental

* chore: update pipeline and base from experimental

* refactor: remove unused method

* refactor: remove unused method

* refactor: outdated comment

* refactor: inputs state is updated as side effect

- to prepare for AsyncPipeline implementation

* format

* test: add file conversion test

* format

* fix: original implementation deepcopies outputs

* lint

* fix: from_dict was updated

* fix: format

* fix: test

* test: add test for thread safety

* remove unused imports

* format

* test: FIFOPriorityQueue

* chore: add release note

* fix: resolve merge conflict with mermaid changes

* fix: format

* fix: remove unused import

* refactor: rename to avoid accidental conflicts

* chore: remove unused inputs, add missing license header

* chore: extend release notes

* Update releasenotes/notes/fix-pipeline-run-2fefeafc705a6d91.yaml

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* fix: format

* fix: format

* Update release note

---------

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>
Co-authored-by: David S. Batista <dsbatista@gmail.com>
											
										
										
											2025-02-06 15:19:47 +01:00
+								    PipelineMaxComponentRuns,
-												chore: extract BasePipeline (#7673)

* extract BasePipeline

* release note

* add missing headers

* move __eq__ to the base class

* proper check type equality, bless the tests
											
										
										
											2024-05-10 11:35:15 +02:00
+								    PipelineUnmarshalError,
 								    PipelineValidationError,
 								)
-												fix: pipeline run bugs in cyclic and acyclic pipelines (#8707)

* add component checks

* pipeline should run deterministically

* add FIFOQueue

* add agent tests

* add order dependent tests

* run new tests

* remove code that is not needed

* test: intermediate from cycle outputs are available outside cycle

* add tests for component checks (Claude)

* adapt tests for component checks (o1 review)

* chore: format

* remove tests that aren't needed anymore

* add _calculate_priority tests

* revert accidental change in pyproject.toml

* test format conversion

* adapt to naming convention

* chore: proper docstrings and type hints for PQ

* format

* add more unit tests

* rm unneeded comments

* test input consumption

* lint

* fix: docstrings

* lint

* format

* format

* fix license header

* fix license header

* add component run tests

* fix: pass correct input format to tracing

* fix types

* format

* format

* types

* add defaults from Socket instead of signature

- otherwise components with dynamic inputs would fail

* fix test names

* still wait for optional inputs on greedy variadic sockets

- mirrors previous behavior

* fix format

* wip: warn for ambiguous running order

* wip: alternative warning

* fix license header

* make code more readable

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* Introduce content tracing to a behavioral test

* Fixing linting

* Remove debug print statements

* Fix tracer tests

* remove print

* test: test for component inputs

* test: remove testing for run order

* chore: update component checks from experimental

* chore: update pipeline and base from experimental

* refactor: remove unused method

* refactor: remove unused method

* refactor: outdated comment

* refactor: inputs state is updated as side effect

- to prepare for AsyncPipeline implementation

* format

* test: add file conversion test

* format

* fix: original implementation deepcopies outputs

* lint

* fix: from_dict was updated

* fix: format

* fix: test

* test: add test for thread safety

* remove unused imports

* format

* test: FIFOPriorityQueue

* chore: add release note

* fix: resolve merge conflict with mermaid changes

* fix: format

* fix: remove unused import

* refactor: rename to avoid accidental conflicts

* chore: remove unused inputs, add missing license header

* chore: extend release notes

* Update releasenotes/notes/fix-pipeline-run-2fefeafc705a6d91.yaml

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* fix: format

* fix: format

* Update release note

---------

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>
Co-authored-by: David S. Batista <dsbatista@gmail.com>
											
										
										
											2025-02-06 15:19:47 +01:00
+								from haystack.core.pipeline.component_checks import (
 								    _NO_OUTPUT_PRODUCED,
 								    all_predecessors_executed,
 								    are_all_lazy_variadic_sockets_resolved,
 								    are_all_sockets_ready,
 								    can_component_run,
 								    is_any_greedy_socket_ready,
 								    is_socket_lazy_variadic,
 								)
-												chore: removing `Pipeline.draw()` deprecation warnings (#9651)

* cleaning up tests

* adding release notes
											
										
										
											2025-07-24 11:35:19 +01:00
+								from haystack.core.pipeline.utils import FIFOPriorityQueue, _deepcopy_with_exceptions, parse_connect_string
-												chore: extract BasePipeline (#7673)

* extract BasePipeline

* release note

* add missing headers

* move __eq__ to the base class

* proper check type equality, bless the tests
											
										
										
											2024-05-10 11:35:15 +02:00
+								from haystack.core.serialization import DeserializationCallbacks, component_from_dict, component_to_dict
 								from haystack.core.type_utils import _type_name, _types_are_compatible
 								from haystack.marshal import Marshaller, YamlMarshaller
-												fix: fix deserialization issues in multi-threading environments (#8651)


											
										
										
											2024-12-18 21:34:57 +01:00
+								from haystack.utils import is_in_jupyter, type_serialization
-												chore: extract BasePipeline (#7673)

* extract BasePipeline

* release note

* add missing headers

* move __eq__ to the base class

* proper check type equality, bless the tests
											
										
										
											2024-05-10 11:35:15 +02:00
 								from .descriptions import find_pipeline_inputs, find_pipeline_outputs
 								from .draw import _to_mermaid_image
 								from .template import PipelineTemplate, PredefinedPipeline
 								DEFAULT_MARSHALLER = YamlMarshaller()
-												feat: Add support for custom (or offline) Mermaid.ink server and support all parameters (#8799)

* compress graph data to support pako endpoint

* support mermaid.ink parameters and custom servers

* dont try to resolve conflicts with the github web ui...

* avoid double graph copy

* fixing typing, improving docstrings and release notes

* reverting type

* nit - force type checker no cache

* nit - force type checker no cache

---------

Co-authored-by: Ulises M <ulises@lbux.org>
Co-authored-by: Ulises M <30765968+lbux@users.noreply.github.com>
											
										
										
											2025-02-03 15:55:29 +01:00
+								# We use a generic type to annotate the return value of class methods,
-												chore: extract BasePipeline (#7673)

* extract BasePipeline

* release note

* add missing headers

* move __eq__ to the base class

* proper check type equality, bless the tests
											
										
										
											2024-05-10 11:35:15 +02:00
+								# so that static analyzers won't be confused when derived classes
 								# use those methods.
 								T = TypeVar("T", bound="PipelineBase")
 								logger = logging.getLogger(__name__)
-												feat: Agent tracing (#9240)

* Agent tracing

* Small changes

* Some changes and refactoring

* Refactoring to reuse code

* Fix

* Add reno

* Fix tests

* Fix tests

* Fix linting

* Refactor and add tracing support to run_async of Agent

* Reduce duplicate code

* Remove finalize_run

* Use break instead of copying code three times

* Adding a test

* Add tracing unit tests

* Make async tracing test actually run async

* Increase test coverage

* Unit test for traces in pipeline

* Add cleanup

* Fix proper indentation

* PR comments

* PR comments and new test

* Update warning message

* Update warning message

---------

Co-authored-by: Vladimir Blagojevic <dovlex@gmail.com>
											
										
										
											2025-04-15 15:58:26 +02:00
+								# Constants for tracing tags
 								_COMPONENT_INPUT = "haystack.component.input"
 								_COMPONENT_OUTPUT = "haystack.component.output"
 								_COMPONENT_VISITS = "haystack.component.visits"
-												fix: pipeline run bugs in cyclic and acyclic pipelines (#8707)

* add component checks

* pipeline should run deterministically

* add FIFOQueue

* add agent tests

* add order dependent tests

* run new tests

* remove code that is not needed

* test: intermediate from cycle outputs are available outside cycle

* add tests for component checks (Claude)

* adapt tests for component checks (o1 review)

* chore: format

* remove tests that aren't needed anymore

* add _calculate_priority tests

* revert accidental change in pyproject.toml

* test format conversion

* adapt to naming convention

* chore: proper docstrings and type hints for PQ

* format

* add more unit tests

* rm unneeded comments

* test input consumption

* lint

* fix: docstrings

* lint

* format

* format

* fix license header

* fix license header

* add component run tests

* fix: pass correct input format to tracing

* fix types

* format

* format

* types

* add defaults from Socket instead of signature

- otherwise components with dynamic inputs would fail

* fix test names

* still wait for optional inputs on greedy variadic sockets

- mirrors previous behavior

* fix format

* wip: warn for ambiguous running order

* wip: alternative warning

* fix license header

* make code more readable

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* Introduce content tracing to a behavioral test

* Fixing linting

* Remove debug print statements

* Fix tracer tests

* remove print

* test: test for component inputs

* test: remove testing for run order

* chore: update component checks from experimental

* chore: update pipeline and base from experimental

* refactor: remove unused method

* refactor: remove unused method

* refactor: outdated comment

* refactor: inputs state is updated as side effect

- to prepare for AsyncPipeline implementation

* format

* test: add file conversion test

* format

* fix: original implementation deepcopies outputs

* lint

* fix: from_dict was updated

* fix: format

* fix: test

* test: add test for thread safety

* remove unused imports

* format

* test: FIFOPriorityQueue

* chore: add release note

* fix: resolve merge conflict with mermaid changes

* fix: format

* fix: remove unused import

* refactor: rename to avoid accidental conflicts

* chore: remove unused inputs, add missing license header

* chore: extend release notes

* Update releasenotes/notes/fix-pipeline-run-2fefeafc705a6d91.yaml

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* fix: format

* fix: format

* Update release note

---------

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>
Co-authored-by: David S. Batista <dsbatista@gmail.com>
											
										
										
											2025-02-06 15:19:47 +01:00
+								class ComponentPriority(IntEnum):
 								    HIGHEST = 1
 								    READY = 2
 								    DEFER = 3
 								    DEFER_LAST = 4
 								    BLOCKED = 5
-												chore: update linter configuration for compatibility with latest ruff release (#9528)

* Fix linting

* Fix linting

* Update error suppression

* Update pre commit

* Update pyproject.toml
											
										
										
											2025-06-18 09:53:19 +02:00
+								class PipelineBase:  # noqa: PLW1641
-												chore: extract BasePipeline (#7673)

* extract BasePipeline

* release note

* add missing headers

* move __eq__ to the base class

* proper check type equality, bless the tests
											
										
										
											2024-05-10 11:35:15 +02:00
+								    """
 								    Components orchestration engine.
 								    Builds a graph of components and orchestrates their execution according to the execution graph.
 								    """
-												feat: Add Type Validation parameter for Pipeline Connections (#8875)

* Starting to refactor type util tests to be more systematic

* refactoring

* Expand tests

* Update to type utils

* Add missing subclass check

* Expand and refactor tests, introduce type_validation Literal

* More test refactoring

* Test refactoring, adding type validation variable to pipeline base

* Update relaxed version of type checking to pass all newly added tests

* trim whitespace

* Add tests

* cleanup

* Updates docstrings

* Add reno

* docs

* Fix mypy and add docstrings

* Changes based on advice from Tobi

* Remove unused imports

* Doc strings

* Add connection type validation to to_dict and from_dict

* Update tests

* Fix test

* Also save connection_type_validation at global pipeline level

* Fix tests

* Remove connection type validation from the connect level, only keep at pipeline level

* Formatting

* Fix tests

* formatting
											
										
										
											2025-03-03 16:00:22 +01:00
+								    def __init__(
 								        self,
 								        metadata: Optional[Dict[str, Any]] = None,
 								        max_runs_per_component: int = 100,
 								        connection_type_validation: bool = True,
 								    ):
-												chore: extract BasePipeline (#7673)

* extract BasePipeline

* release note

* add missing headers

* move __eq__ to the base class

* proper check type equality, bless the tests
											
										
										
											2024-05-10 11:35:15 +02:00
+								        """
 								        Creates the Pipeline.
 								        :param metadata:
-												feat: Deprecate `max_loops_allowed` in favour of new argument `max_runs_per_component` (#8354)

* Deprecate max_loops_allowed in favour of new argument max_runs_per_component

* Add missing test file

* Some enhancements

* Add version that will remove deprecate stuff
											
										
										
											2024-09-12 11:00:12 +02:00
+								            Arbitrary dictionary to store metadata about this `Pipeline`. Make sure all the values contained in
 								            this dictionary can be serialized and deserialized if you wish to save this `Pipeline` to file.
 								        :param max_runs_per_component:
 								            How many times the `Pipeline` can run the same Component.
 								            If this limit is reached a `PipelineMaxComponentRuns` exception is raised.
 								            If not set defaults to 100 runs per Component.
-												feat: Add Type Validation parameter for Pipeline Connections (#8875)

* Starting to refactor type util tests to be more systematic

* refactoring

* Expand tests

* Update to type utils

* Add missing subclass check

* Expand and refactor tests, introduce type_validation Literal

* More test refactoring

* Test refactoring, adding type validation variable to pipeline base

* Update relaxed version of type checking to pass all newly added tests

* trim whitespace

* Add tests

* cleanup

* Updates docstrings

* Add reno

* docs

* Fix mypy and add docstrings

* Changes based on advice from Tobi

* Remove unused imports

* Doc strings

* Add connection type validation to to_dict and from_dict

* Update tests

* Fix test

* Also save connection_type_validation at global pipeline level

* Fix tests

* Remove connection type validation from the connect level, only keep at pipeline level

* Formatting

* Fix tests

* formatting
											
										
										
											2025-03-03 16:00:22 +01:00
+								        :param connection_type_validation: Whether the pipeline will validate the types of the connections.
 								            Defaults to True.
-												chore: extract BasePipeline (#7673)

* extract BasePipeline

* release note

* add missing headers

* move __eq__ to the base class

* proper check type equality, bless the tests
											
										
										
											2024-05-10 11:35:15 +02:00
+								        """
 								        self._telemetry_runs = 0
 								        self._last_telemetry_sent: Optional[datetime] = None
 								        self.metadata = metadata or {}
 								        self.graph = networkx.MultiDiGraph()
-												chore: Removed deprecated max_loop_allowed argument from Pipeline init (#8409)

* Added equality check for sender and receiver in connection function of pipeline

* Update base.py

irrelevant changes reverted

* added release note

* removed deprecated param max_loops_allowed from pipeline init

* added release note

* revert non relevant test

* Delete releasenotes/notes/remove-support-to-connect-component-to-self-6eedfb287f2a2a02.yaml

* revery non relevant change

* Remove unused test_pipeline_deprecated.yaml

* Remove PipelineMaxLoops error

* Update release notes

---------

Co-authored-by: Silvano Cerza <silvanocerza@gmail.com>
											
										
										
											2024-09-30 19:28:05 +05:30
+								        self._max_runs_per_component = max_runs_per_component
-												feat: Add Type Validation parameter for Pipeline Connections (#8875)

* Starting to refactor type util tests to be more systematic

* refactoring

* Expand tests

* Update to type utils

* Add missing subclass check

* Expand and refactor tests, introduce type_validation Literal

* More test refactoring

* Test refactoring, adding type validation variable to pipeline base

* Update relaxed version of type checking to pass all newly added tests

* trim whitespace

* Add tests

* cleanup

* Updates docstrings

* Add reno

* docs

* Fix mypy and add docstrings

* Changes based on advice from Tobi

* Remove unused imports

* Doc strings

* Add connection type validation to to_dict and from_dict

* Update tests

* Fix test

* Also save connection_type_validation at global pipeline level

* Fix tests

* Remove connection type validation from the connect level, only keep at pipeline level

* Formatting

* Fix tests

* formatting
											
										
										
											2025-03-03 16:00:22 +01:00
+								        self._connection_type_validation = connection_type_validation
-												feat: Deprecate `max_loops_allowed` in favour of new argument `max_runs_per_component` (#8354)

* Deprecate max_loops_allowed in favour of new argument max_runs_per_component

* Add missing test file

* Some enhancements

* Add version that will remove deprecate stuff
											
										
										
											2024-09-12 11:00:12 +02:00
-												chore: Make the Haystack core "type complete" (#9438)

* chore: Make the Haystack core "type complete"

For libraries with a `py.typed` marker, it is [recommended][1] to
make all public interfaces "type complete", i.e. to explicitly
annotate all function parameters and return types. Doing so has the
following benefits:

- It maximizes the type information available to users and IDEs.
- It ensures that the argument and return types are the intended ones.
- It sidesteps differences in type inference between the different
  type checker implementations.

This change makes a first step towards type completeness by enabling
the Mypy `disallow_incomplete_defs` for the core modules (excluding
`haystack.components.*` and `haystack.testing.*`) and fixing the
resulting errors.

[1]: https://typing.python.org/en/latest/guides/libraries.html#how-much-of-my-library-needs-types

* chore: Add `python_version = 3.9` to Mypy config

This catches type constructs that are only supported in later Python
versions.

* Remove unused import

* try to fix linting

---------

Co-authored-by: Stefano Fiorucci <stefanofiorucci@gmail.com>
											
										
										
											2025-05-26 11:00:22 +02:00
+								    def __eq__(self, other: object) -> bool:
-												chore: extract BasePipeline (#7673)

* extract BasePipeline

* release note

* add missing headers

* move __eq__ to the base class

* proper check type equality, bless the tests
											
										
										
											2024-05-10 11:35:15 +02:00
+								        """
 								        Pipeline equality is defined by their type and the equality of their serialized form.
 								        Pipelines of the same type share every metadata, node and edge, but they're not required to use
 								        the same node instances: this allows pipeline saved and then loaded back to be equal to themselves.
 								        """
 								        if not isinstance(self, type(other)):
 								            return False
-												chore: Make the Haystack core "type complete" (#9438)

* chore: Make the Haystack core "type complete"

For libraries with a `py.typed` marker, it is [recommended][1] to
make all public interfaces "type complete", i.e. to explicitly
annotate all function parameters and return types. Doing so has the
following benefits:

- It maximizes the type information available to users and IDEs.
- It ensures that the argument and return types are the intended ones.
- It sidesteps differences in type inference between the different
  type checker implementations.

This change makes a first step towards type completeness by enabling
the Mypy `disallow_incomplete_defs` for the core modules (excluding
`haystack.components.*` and `haystack.testing.*`) and fixing the
resulting errors.

[1]: https://typing.python.org/en/latest/guides/libraries.html#how-much-of-my-library-needs-types

* chore: Add `python_version = 3.9` to Mypy config

This catches type constructs that are only supported in later Python
versions.

* Remove unused import

* try to fix linting

---------

Co-authored-by: Stefano Fiorucci <stefanofiorucci@gmail.com>
											
										
										
											2025-05-26 11:00:22 +02:00
+								        assert isinstance(other, PipelineBase)
-												chore: extract BasePipeline (#7673)

* extract BasePipeline

* release note

* add missing headers

* move __eq__ to the base class

* proper check type equality, bless the tests
											
										
										
											2024-05-10 11:35:15 +02:00
+								        return self.to_dict() == other.to_dict()
 								    def __repr__(self) -> str:
 								        """
 								        Returns a text representation of the Pipeline.
 								        """
 								        res = f"{object.__repr__(self)}\n"
 								        if self.metadata:
 								            res += "🧱 Metadata\n"
 								            for k, v in self.metadata.items():
 								                res += f"  - {k}: {v}\n"
 								        res += "🚅 Components\n"
 								        for name, instance in self.graph.nodes(data="instance"):  # type: ignore # type wrongly defined in networkx
 								            res += f"  - {name}: {instance.__class__.__name__}\n"
 								        res += "🛤️ Connections\n"
 								        for sender, receiver, edge_data in self.graph.edges(data=True):
 								            sender_socket = edge_data["from_socket"].name
 								            receiver_socket = edge_data["to_socket"].name
 								            res += f"  - {sender}.{sender_socket} -> {receiver}.{receiver_socket} ({edge_data['conn_type']})\n"
 								        return res
 								    def to_dict(self) -> Dict[str, Any]:
 								        """
 								        Serializes the pipeline to a dictionary.
 								        This is meant to be an intermediate representation but it can be also used to save a pipeline to file.
 								        :returns:
 								            Dictionary with serialized data.
 								        """
 								        components = {}
 								        for name, instance in self.graph.nodes(data="instance"):  # type:ignore
-												fix: Enforce basic Python types restriction on serialized component data (#8473)


											
										
										
											2024-10-22 17:08:36 +02:00
+								            components[name] = component_to_dict(instance, name)
-												chore: extract BasePipeline (#7673)

* extract BasePipeline

* release note

* add missing headers

* move __eq__ to the base class

* proper check type equality, bless the tests
											
										
										
											2024-05-10 11:35:15 +02:00
 								        connections = []
 								        for sender, receiver, edge_data in self.graph.edges.data():
 								            sender_socket = edge_data["from_socket"].name
 								            receiver_socket = edge_data["to_socket"].name
 								            connections.append({"sender": f"{sender}.{sender_socket}", "receiver": f"{receiver}.{receiver_socket}"})
 								        return {
 								            "metadata": self.metadata,
-												feat: Deprecate `max_loops_allowed` in favour of new argument `max_runs_per_component` (#8354)

* Deprecate max_loops_allowed in favour of new argument max_runs_per_component

* Add missing test file

* Some enhancements

* Add version that will remove deprecate stuff
											
										
										
											2024-09-12 11:00:12 +02:00
+								            "max_runs_per_component": self._max_runs_per_component,
-												chore: extract BasePipeline (#7673)

* extract BasePipeline

* release note

* add missing headers

* move __eq__ to the base class

* proper check type equality, bless the tests
											
										
										
											2024-05-10 11:35:15 +02:00
+								            "components": components,
 								            "connections": connections,
-												feat: Add Type Validation parameter for Pipeline Connections (#8875)

* Starting to refactor type util tests to be more systematic

* refactoring

* Expand tests

* Update to type utils

* Add missing subclass check

* Expand and refactor tests, introduce type_validation Literal

* More test refactoring

* Test refactoring, adding type validation variable to pipeline base

* Update relaxed version of type checking to pass all newly added tests

* trim whitespace

* Add tests

* cleanup

* Updates docstrings

* Add reno

* docs

* Fix mypy and add docstrings

* Changes based on advice from Tobi

* Remove unused imports

* Doc strings

* Add connection type validation to to_dict and from_dict

* Update tests

* Fix test

* Also save connection_type_validation at global pipeline level

* Fix tests

* Remove connection type validation from the connect level, only keep at pipeline level

* Formatting

* Fix tests

* formatting
											
										
										
											2025-03-03 16:00:22 +01:00
+								            "connection_type_validation": self._connection_type_validation,
-												chore: extract BasePipeline (#7673)

* extract BasePipeline

* release note

* add missing headers

* move __eq__ to the base class

* proper check type equality, bless the tests
											
										
										
											2024-05-10 11:35:15 +02:00
+								        }
 								    @classmethod
-												ci: Add code formatting checks  (#7882)

* ruff settings

enable ruff format and re-format outdated files

feat: `EvaluationRunResult` add parameter to specify columns to keep in the comparative `Dataframe`  (#7879)

* adding param to explictily state which cols to keep

* adding param to explictily state which cols to keep

* adding param to explictily state which cols to keep

* updating tests

* adding release notes

* Update haystack/evaluation/eval_run_result.py

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>

* Update releasenotes/notes/add-keep-columns-to-EvalRunResult-comparative-be3e15ce45de3e0b.yaml

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>

* updating docstring

---------

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>

add format-check

fail on format and linting failures

fix string formatting

reformat long lines

fix tests

fix typing

linter

pull from main

* reformat

* lint -> check

* lint -> check
											
										
										
											2024-06-18 17:52:46 +02:00
+								    def from_dict(
-												chore: Make the Haystack core "type complete" (#9438)

* chore: Make the Haystack core "type complete"

For libraries with a `py.typed` marker, it is [recommended][1] to
make all public interfaces "type complete", i.e. to explicitly
annotate all function parameters and return types. Doing so has the
following benefits:

- It maximizes the type information available to users and IDEs.
- It ensures that the argument and return types are the intended ones.
- It sidesteps differences in type inference between the different
  type checker implementations.

This change makes a first step towards type completeness by enabling
the Mypy `disallow_incomplete_defs` for the core modules (excluding
`haystack.components.*` and `haystack.testing.*`) and fixing the
resulting errors.

[1]: https://typing.python.org/en/latest/guides/libraries.html#how-much-of-my-library-needs-types

* chore: Add `python_version = 3.9` to Mypy config

This catches type constructs that are only supported in later Python
versions.

* Remove unused import

* try to fix linting

---------

Co-authored-by: Stefano Fiorucci <stefanofiorucci@gmail.com>
											
										
										
											2025-05-26 11:00:22 +02:00
+								        cls: Type[T], data: Dict[str, Any], callbacks: Optional[DeserializationCallbacks] = None, **kwargs: Any
-												ci: Add code formatting checks  (#7882)

* ruff settings

enable ruff format and re-format outdated files

feat: `EvaluationRunResult` add parameter to specify columns to keep in the comparative `Dataframe`  (#7879)

* adding param to explictily state which cols to keep

* adding param to explictily state which cols to keep

* adding param to explictily state which cols to keep

* updating tests

* adding release notes

* Update haystack/evaluation/eval_run_result.py

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>

* Update releasenotes/notes/add-keep-columns-to-EvalRunResult-comparative-be3e15ce45de3e0b.yaml

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>

* updating docstring

---------

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>

add format-check

fail on format and linting failures

fix string formatting

reformat long lines

fix tests

fix typing

linter

pull from main

* reformat

* lint -> check

* lint -> check
											
										
										
											2024-06-18 17:52:46 +02:00
+								    ) -> T:
-												chore: extract BasePipeline (#7673)

* extract BasePipeline

* release note

* add missing headers

* move __eq__ to the base class

* proper check type equality, bless the tests
											
										
										
											2024-05-10 11:35:15 +02:00
+								        """
 								        Deserializes the pipeline from a dictionary.
 								        :param data:
 								            Dictionary to deserialize from.
 								        :param callbacks:
 								            Callbacks to invoke during deserialization.
 								        :param kwargs:
-												docs: fix curly brackets in docstrings (#9598)


											
										
										
											2025-07-08 14:28:34 +02:00
+								            `components`: a dictionary of `{name: instance}` to reuse instances of components instead of creating new
-												ci: Add code formatting checks  (#7882)

* ruff settings

enable ruff format and re-format outdated files

feat: `EvaluationRunResult` add parameter to specify columns to keep in the comparative `Dataframe`  (#7879)

* adding param to explictily state which cols to keep

* adding param to explictily state which cols to keep

* adding param to explictily state which cols to keep

* updating tests

* adding release notes

* Update haystack/evaluation/eval_run_result.py

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>

* Update releasenotes/notes/add-keep-columns-to-EvalRunResult-comparative-be3e15ce45de3e0b.yaml

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>

* updating docstring

---------

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>

add format-check

fail on format and linting failures

fix string formatting

reformat long lines

fix tests

fix typing

linter

pull from main

* reformat

* lint -> check

* lint -> check
											
										
										
											2024-06-18 17:52:46 +02:00
+								            ones.
-												chore: extract BasePipeline (#7673)

* extract BasePipeline

* release note

* add missing headers

* move __eq__ to the base class

* proper check type equality, bless the tests
											
										
										
											2024-05-10 11:35:15 +02:00
+								        :returns:
 								            Deserialized component.
 								        """
-												perf: Don't deepcopy Components, Tools, or Toolsets (#9356)

* Don't copy components

* Use deepcopy_with_fallback in more places and don't deepcopy Components, Tools or Toolsets

* Slight change

* Slightly update tests

* Refactor function based on PR feedback

* Add reno

* Fix lint

* Simplify tests, rename function, PR comments

* Fix mypy

* Undo typing
											
										
										
											2025-05-08 14:48:08 +02:00
+								        data_copy = _deepcopy_with_exceptions(data)  # to prevent modification of original data
-												fix: Prevent `Pipeline.from_dict` from modifying the dictionary parameter passed to it (#8030)


* Updated the pipeline deserialization
											
										
										
											2024-07-17 10:28:29 +02:00
+								        metadata = data_copy.get("metadata", {})
-												feat: Deprecate `max_loops_allowed` in favour of new argument `max_runs_per_component` (#8354)

* Deprecate max_loops_allowed in favour of new argument max_runs_per_component

* Add missing test file

* Some enhancements

* Add version that will remove deprecate stuff
											
										
										
											2024-09-12 11:00:12 +02:00
+								        max_runs_per_component = data_copy.get("max_runs_per_component", 100)
-												feat: Add Type Validation parameter for Pipeline Connections (#8875)

* Starting to refactor type util tests to be more systematic

* refactoring

* Expand tests

* Update to type utils

* Add missing subclass check

* Expand and refactor tests, introduce type_validation Literal

* More test refactoring

* Test refactoring, adding type validation variable to pipeline base

* Update relaxed version of type checking to pass all newly added tests

* trim whitespace

* Add tests

* cleanup

* Updates docstrings

* Add reno

* docs

* Fix mypy and add docstrings

* Changes based on advice from Tobi

* Remove unused imports

* Doc strings

* Add connection type validation to to_dict and from_dict

* Update tests

* Fix test

* Also save connection_type_validation at global pipeline level

* Fix tests

* Remove connection type validation from the connect level, only keep at pipeline level

* Formatting

* Fix tests

* formatting
											
										
										
											2025-03-03 16:00:22 +01:00
+								        connection_type_validation = data_copy.get("connection_type_validation", True)
 								        pipe = cls(
 								            metadata=metadata,
 								            max_runs_per_component=max_runs_per_component,
 								            connection_type_validation=connection_type_validation,
 								        )
-												chore: extract BasePipeline (#7673)

* extract BasePipeline

* release note

* add missing headers

* move __eq__ to the base class

* proper check type equality, bless the tests
											
										
										
											2024-05-10 11:35:15 +02:00
+								        components_to_reuse = kwargs.get("components", {})
-												fix: Prevent `Pipeline.from_dict` from modifying the dictionary parameter passed to it (#8030)


* Updated the pipeline deserialization
											
										
										
											2024-07-17 10:28:29 +02:00
+								        for name, component_data in data_copy.get("components", {}).items():
-												chore: extract BasePipeline (#7673)

* extract BasePipeline

* release note

* add missing headers

* move __eq__ to the base class

* proper check type equality, bless the tests
											
										
										
											2024-05-10 11:35:15 +02:00
+								            if name in components_to_reuse:
 								                # Reuse an instance
 								                instance = components_to_reuse[name]
 								            else:
 								                if "type" not in component_data:
 								                    raise PipelineError(f"Missing 'type' in component '{name}'")
 								                if component_data["type"] not in component.registry:
 								                    try:
 								                        # Import the module first...
 								                        module, _ = component_data["type"].rsplit(".", 1)
 								                        logger.debug("Trying to import module {module_name}", module_name=module)
-												fix: fix deserialization issues in multi-threading environments (#8651)


											
										
										
											2024-12-18 21:34:57 +01:00
+								                        type_serialization.thread_safe_import(module)
-												chore: extract BasePipeline (#7673)

* extract BasePipeline

* release note

* add missing headers

* move __eq__ to the base class

* proper check type equality, bless the tests
											
										
										
											2024-05-10 11:35:15 +02:00
+								                        # ...then try again
 								                        if component_data["type"] not in component.registry:
-												ci: Add code formatting checks  (#7882)

* ruff settings

enable ruff format and re-format outdated files

feat: `EvaluationRunResult` add parameter to specify columns to keep in the comparative `Dataframe`  (#7879)

* adding param to explictily state which cols to keep

* adding param to explictily state which cols to keep

* adding param to explictily state which cols to keep

* updating tests

* adding release notes

* Update haystack/evaluation/eval_run_result.py

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>

* Update releasenotes/notes/add-keep-columns-to-EvalRunResult-comparative-be3e15ce45de3e0b.yaml

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>

* updating docstring

---------

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>

add format-check

fail on format and linting failures

fix string formatting

reformat long lines

fix tests

fix typing

linter

pull from main

* reformat

* lint -> check

* lint -> check
											
										
										
											2024-06-18 17:52:46 +02:00
+								                            raise PipelineError(
-												fix: improve error message for incorrect component types (#9066)

* Update error statement


* Add a new test

---------

Co-authored-by: Sebastian Husch Lee <sjrl@users.noreply.github.com>
											
										
										
											2025-03-20 17:23:57 +05:00
+								                                f"Successfully imported module '{module}' but couldn't find "
 								                                f"'{component_data['type']}' in the component registry.\n"
 								                                f"The component might be registered under a different path. "
 								                                f"Here are the registered components:\n {list(component.registry.keys())}\n"
-												ci: Add code formatting checks  (#7882)

* ruff settings

enable ruff format and re-format outdated files

feat: `EvaluationRunResult` add parameter to specify columns to keep in the comparative `Dataframe`  (#7879)

* adding param to explictily state which cols to keep

* adding param to explictily state which cols to keep

* adding param to explictily state which cols to keep

* updating tests

* adding release notes

* Update haystack/evaluation/eval_run_result.py

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>

* Update releasenotes/notes/add-keep-columns-to-EvalRunResult-comparative-be3e15ce45de3e0b.yaml

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>

* updating docstring

---------

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>

add format-check

fail on format and linting failures

fix string formatting

reformat long lines

fix tests

fix typing

linter

pull from main

* reformat

* lint -> check

* lint -> check
											
										
										
											2024-06-18 17:52:46 +02:00
+								                            )
-												refactor: raise `PipelineError` when `Pipeline.from_dict` receives an invalid type (#8711)

* fix: error on invalid type

* add reno

* Update releasenotes/notes/fix-invalid-component-type-error-83ee00d820b63cc5.yaml

Co-authored-by: Stefano Fiorucci <stefanofiorucci@gmail.com>

* Update test/core/pipeline/test_pipeline.py

Co-authored-by: Stefano Fiorucci <stefanofiorucci@gmail.com>

* fix reno

* fix reno

* last reno fix

---------

Co-authored-by: Stefano Fiorucci <stefanofiorucci@gmail.com>
											
										
										
											2025-01-23 12:40:19 +01:00
+								                    except (ImportError, PipelineError, ValueError) as e:
 								                        raise PipelineError(
-												fix: improve error message for incorrect component types (#9066)

* Update error statement


* Add a new test

---------

Co-authored-by: Sebastian Husch Lee <sjrl@users.noreply.github.com>
											
										
										
											2025-03-20 17:23:57 +05:00
+								                            f"Component '{component_data['type']}' (name: '{name}') not imported. Please "
 								                            f"check that the package is installed and the component path is correct."
-												refactor: raise `PipelineError` when `Pipeline.from_dict` receives an invalid type (#8711)

* fix: error on invalid type

* add reno

* Update releasenotes/notes/fix-invalid-component-type-error-83ee00d820b63cc5.yaml

Co-authored-by: Stefano Fiorucci <stefanofiorucci@gmail.com>

* Update test/core/pipeline/test_pipeline.py

Co-authored-by: Stefano Fiorucci <stefanofiorucci@gmail.com>

* fix reno

* fix reno

* last reno fix

---------

Co-authored-by: Stefano Fiorucci <stefanofiorucci@gmail.com>
											
										
										
											2025-01-23 12:40:19 +01:00
+								                        ) from e
-												chore: extract BasePipeline (#7673)

* extract BasePipeline

* release note

* add missing headers

* move __eq__ to the base class

* proper check type equality, bless the tests
											
										
										
											2024-05-10 11:35:15 +02:00
 								                # Create a new one
 								                component_class = component.registry[component_data["type"]]
-												refactor: Improve error messages shown during pipeline deserialization (#8016)

* refactor: Improve error messages shown during pipeline deserialization

* Add link to release notes

* Update release notes link
											
										
										
											2024-07-12 16:47:00 +02:00
 								                try:
 								                    instance = component_from_dict(component_class, component_data, name, callbacks)
-												chore: Revert change to deserialization error in  `Pipeline` (#8591)


											
										
										
											2024-11-28 13:28:52 +01:00
+								                except Exception as e:
-												refactor: Improve error messages shown during pipeline deserialization (#8016)

* refactor: Improve error messages shown during pipeline deserialization

* Add link to release notes

* Update release notes link
											
										
										
											2024-07-12 16:47:00 +02:00
+								                    msg = (
-												chore: Revert change to deserialization error in  `Pipeline` (#8591)


											
										
										
											2024-11-28 13:28:52 +01:00
+								                        f"Couldn't deserialize component '{name}' of class '{component_class.__name__}' "
 								                        f"with the following data: {str(component_data)}. Possible reasons include "
 								                        "malformed serialized data, mismatch between the serialized component and the "
 								                        "loaded one (due to a breaking change, see "
-												chore: use class methods to create `ChatMessage` (#8581)

* use class methods to build messages

* fix failing format
											
										
										
											2024-11-28 10:35:24 +01:00
+								                        "https://github.com/deepset-ai/haystack/releases), etc."
-												refactor: Improve error messages shown during pipeline deserialization (#8016)

* refactor: Improve error messages shown during pipeline deserialization

* Add link to release notes

* Update release notes link
											
										
										
											2024-07-12 16:47:00 +02:00
+								                    )
-												chore: Revert change to deserialization error in  `Pipeline` (#8591)


											
										
										
											2024-11-28 13:28:52 +01:00
+								                    raise DeserializationError(msg) from e
-												chore: extract BasePipeline (#7673)

* extract BasePipeline

* release note

* add missing headers

* move __eq__ to the base class

* proper check type equality, bless the tests
											
										
										
											2024-05-10 11:35:15 +02:00
+								            pipe.add_component(name=name, instance=instance)
 								        for connection in data.get("connections", []):
 								            if "sender" not in connection:
 								                raise PipelineError(f"Missing sender in connection: {connection}")
 								            if "receiver" not in connection:
 								                raise PipelineError(f"Missing receiver in connection: {connection}")
 								            pipe.connect(sender=connection["sender"], receiver=connection["receiver"])
 								        return pipe
 								    def dumps(self, marshaller: Marshaller = DEFAULT_MARSHALLER) -> str:
 								        """
 								        Returns the string representation of this pipeline according to the format dictated by the `Marshaller` in use.
 								        :param marshaller:
 								            The Marshaller used to create the string representation. Defaults to `YamlMarshaller`.
 								        :returns:
 								            A string representing the pipeline.
 								        """
 								        return marshaller.marshal(self.to_dict())
-												chore: Make the Haystack core "type complete" (#9438)

* chore: Make the Haystack core "type complete"

For libraries with a `py.typed` marker, it is [recommended][1] to
make all public interfaces "type complete", i.e. to explicitly
annotate all function parameters and return types. Doing so has the
following benefits:

- It maximizes the type information available to users and IDEs.
- It ensures that the argument and return types are the intended ones.
- It sidesteps differences in type inference between the different
  type checker implementations.

This change makes a first step towards type completeness by enabling
the Mypy `disallow_incomplete_defs` for the core modules (excluding
`haystack.components.*` and `haystack.testing.*`) and fixing the
resulting errors.

[1]: https://typing.python.org/en/latest/guides/libraries.html#how-much-of-my-library-needs-types

* chore: Add `python_version = 3.9` to Mypy config

This catches type constructs that are only supported in later Python
versions.

* Remove unused import

* try to fix linting

---------

Co-authored-by: Stefano Fiorucci <stefanofiorucci@gmail.com>
											
										
										
											2025-05-26 11:00:22 +02:00
+								    def dump(self, fp: TextIO, marshaller: Marshaller = DEFAULT_MARSHALLER) -> None:
-												chore: extract BasePipeline (#7673)

* extract BasePipeline

* release note

* add missing headers

* move __eq__ to the base class

* proper check type equality, bless the tests
											
										
										
											2024-05-10 11:35:15 +02:00
+								        """
 								        Writes the string representation of this pipeline to the file-like object passed in the `fp` argument.
 								        :param fp:
 								            A file-like object ready to be written to.
 								        :param marshaller:
 								            The Marshaller used to create the string representation. Defaults to `YamlMarshaller`.
 								        """
 								        fp.write(marshaller.marshal(self.to_dict()))
 								    @classmethod
 								    def loads(
 								        cls: Type[T],
 								        data: Union[str, bytes, bytearray],
 								        marshaller: Marshaller = DEFAULT_MARSHALLER,
 								        callbacks: Optional[DeserializationCallbacks] = None,
 								    ) -> T:
 								        """
 								        Creates a `Pipeline` object from the string representation passed in the `data` argument.
 								        :param data:
 								            The string representation of the pipeline, can be `str`, `bytes` or `bytearray`.
 								        :param marshaller:
 								            The Marshaller used to create the string representation. Defaults to `YamlMarshaller`.
 								        :param callbacks:
 								            Callbacks to invoke during deserialization.
-												refactor: Improve error messages shown during pipeline deserialization (#8016)

* refactor: Improve error messages shown during pipeline deserialization

* Add link to release notes

* Update release notes link
											
										
										
											2024-07-12 16:47:00 +02:00
+								        :raises DeserializationError:
 								            If an error occurs during deserialization.
-												chore: extract BasePipeline (#7673)

* extract BasePipeline

* release note

* add missing headers

* move __eq__ to the base class

* proper check type equality, bless the tests
											
										
										
											2024-05-10 11:35:15 +02:00
+								        :returns:
 								            A `Pipeline` object.
 								        """
-												refactor: Improve error messages shown during pipeline deserialization (#8016)

* refactor: Improve error messages shown during pipeline deserialization

* Add link to release notes

* Update release notes link
											
										
										
											2024-07-12 16:47:00 +02:00
+								        try:
 								            deserialized_data = marshaller.unmarshal(data)
 								        except Exception as e:
 								            raise DeserializationError(
 								                "Error while unmarshalling serialized pipeline data. This is usually "
 								                "caused by malformed or invalid syntax in the serialized representation."
 								            ) from e
 								        return cls.from_dict(deserialized_data, callbacks)
-												chore: extract BasePipeline (#7673)

* extract BasePipeline

* release note

* add missing headers

* move __eq__ to the base class

* proper check type equality, bless the tests
											
										
										
											2024-05-10 11:35:15 +02:00
 								    @classmethod
 								    def load(
 								        cls: Type[T],
 								        fp: TextIO,
 								        marshaller: Marshaller = DEFAULT_MARSHALLER,
 								        callbacks: Optional[DeserializationCallbacks] = None,
 								    ) -> T:
 								        """
 								        Creates a `Pipeline` object a string representation.
 								        The string representation is read from the file-like object passed in the `fp` argument.
 								        :param fp:
 								            A file-like object ready to be read from.
 								        :param marshaller:
 								            The Marshaller used to create the string representation. Defaults to `YamlMarshaller`.
 								        :param callbacks:
 								            Callbacks to invoke during deserialization.
-												refactor: Improve error messages shown during pipeline deserialization (#8016)

* refactor: Improve error messages shown during pipeline deserialization

* Add link to release notes

* Update release notes link
											
										
										
											2024-07-12 16:47:00 +02:00
+								        :raises DeserializationError:
 								            If an error occurs during deserialization.
-												chore: extract BasePipeline (#7673)

* extract BasePipeline

* release note

* add missing headers

* move __eq__ to the base class

* proper check type equality, bless the tests
											
										
										
											2024-05-10 11:35:15 +02:00
+								        :returns:
 								            A `Pipeline` object.
 								        """
-												refactor: Improve error messages shown during pipeline deserialization (#8016)

* refactor: Improve error messages shown during pipeline deserialization

* Add link to release notes

* Update release notes link
											
										
										
											2024-07-12 16:47:00 +02:00
+								        return cls.loads(fp.read(), marshaller, callbacks)
-												chore: extract BasePipeline (#7673)

* extract BasePipeline

* release note

* add missing headers

* move __eq__ to the base class

* proper check type equality, bless the tests
											
										
										
											2024-05-10 11:35:15 +02:00
 								    def add_component(self, name: str, instance: Component) -> None:
 								        """
 								        Add the given component to the pipeline.
 								        Components are not connected to anything by default: use `Pipeline.connect()` to connect components together.
 								        Component names must be unique, but component instances can be reused if needed.
 								        :param name:
 								            The name of the component to add.
 								        :param instance:
 								            The component instance to add.
 								        :raises ValueError:
 								            If a component with the same name already exists.
 								        :raises PipelineValidationError:
-												chore: Remove print statements from tests and mention of old name (#8883)

* Remove print statements from tests

* Remove mention of Canals

* Remove another mention
											
										
										
											2025-02-20 10:24:26 +01:00
+								            If the given instance is not a component.
-												chore: extract BasePipeline (#7673)

* extract BasePipeline

* release note

* add missing headers

* move __eq__ to the base class

* proper check type equality, bless the tests
											
										
										
											2024-05-10 11:35:15 +02:00
+								        """
 								        # Component names are unique
 								        if name in self.graph.nodes:
 								            raise ValueError(f"A component named '{name}' already exists in this pipeline: choose another name.")
 								        # Components can't be named `_debug`
 								        if name == "_debug":
 								            raise ValueError("'_debug' is a reserved name for debug output. Choose another name.")
-												test: adding `Pipeline` component name checks - cannot have `.` (dot characters) (#9155)

* adding component name checks + tests

* fixes
											
										
										
											2025-04-01 16:45:04 +02:00
+								        # Component names can't have "."
 								        if "." in name:
 								            raise ValueError(f"{name} is an invalid component name, cannot contain '.' (dot) characters.")
-												chore: extract BasePipeline (#7673)

* extract BasePipeline

* release note

* add missing headers

* move __eq__ to the base class

* proper check type equality, bless the tests
											
										
										
											2024-05-10 11:35:15 +02:00
+								        # Component instances must be components
 								        if not isinstance(instance, Component):
-												ci: Add code formatting checks  (#7882)

* ruff settings

enable ruff format and re-format outdated files

feat: `EvaluationRunResult` add parameter to specify columns to keep in the comparative `Dataframe`  (#7879)

* adding param to explictily state which cols to keep

* adding param to explictily state which cols to keep

* adding param to explictily state which cols to keep

* updating tests

* adding release notes

* Update haystack/evaluation/eval_run_result.py

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>

* Update releasenotes/notes/add-keep-columns-to-EvalRunResult-comparative-be3e15ce45de3e0b.yaml

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>

* updating docstring

---------

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>

add format-check

fail on format and linting failures

fix string formatting

reformat long lines

fix tests

fix typing

linter

pull from main

* reformat

* lint -> check

* lint -> check
											
										
										
											2024-06-18 17:52:46 +02:00
+								            raise PipelineValidationError(
 								                f"'{type(instance)}' doesn't seem to be a component. Is this class decorated with @component?"
 								            )
-												chore: extract BasePipeline (#7673)

* extract BasePipeline

* release note

* add missing headers

* move __eq__ to the base class

* proper check type equality, bless the tests
											
										
										
											2024-05-10 11:35:15 +02:00
 								        if getattr(instance, "__haystack_added_to_pipeline__", None):
-												ci: Add code formatting checks  (#7882)

* ruff settings

enable ruff format and re-format outdated files

feat: `EvaluationRunResult` add parameter to specify columns to keep in the comparative `Dataframe`  (#7879)

* adding param to explictily state which cols to keep

* adding param to explictily state which cols to keep

* adding param to explictily state which cols to keep

* updating tests

* adding release notes

* Update haystack/evaluation/eval_run_result.py

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>

* Update releasenotes/notes/add-keep-columns-to-EvalRunResult-comparative-be3e15ce45de3e0b.yaml

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>

* updating docstring

---------

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>

add format-check

fail on format and linting failures

fix string formatting

reformat long lines

fix tests

fix typing

linter

pull from main

* reformat

* lint -> check

* lint -> check
											
										
										
											2024-06-18 17:52:46 +02:00
+								            msg = (
 								                "Component has already been added in another Pipeline. Components can't be shared between Pipelines. "
 								                "Create a new instance instead."
 								            )
-												chore: extract BasePipeline (#7673)

* extract BasePipeline

* release note

* add missing headers

* move __eq__ to the base class

* proper check type equality, bless the tests
											
										
										
											2024-05-10 11:35:15 +02:00
+								            raise PipelineError(msg)
 								        setattr(instance, "__haystack_added_to_pipeline__", self)
-												feat: add component name and type to `StreamingChunk` (#9426)

* Stream component name in openai

* Fix type

* PR comments

* Update huggingface gen

* Typing fix

* Update huggingfacelocal gen

* Fix errors

* Remove model changes

* Fix minor errors

* Update releasenotes/notes/add-component-info-dataclass-be115dee2fa50abd.yaml

Co-authored-by: Sebastian Husch Lee <10526848+sjrl@users.noreply.github.com>

* PR comments

* update annotation

* Update hf files

* Fix linting

* Add a from_component method

* use add_component

---------

Co-authored-by: Sebastian Husch Lee <10526848+sjrl@users.noreply.github.com>
											
										
										
											2025-05-27 12:23:40 +02:00
+								        setattr(instance, "__component_name__", name)
-												chore: extract BasePipeline (#7673)

* extract BasePipeline

* release note

* add missing headers

* move __eq__ to the base class

* proper check type equality, bless the tests
											
										
										
											2024-05-10 11:35:15 +02:00
 								        # Add component to the graph, disconnected
 								        logger.debug("Adding component '{component_name}' ({component})", component_name=name, component=instance)
 								        # We're completely sure the fields exist so we ignore the type error
 								        self.graph.add_node(
 								            name,
 								            instance=instance,
 								            input_sockets=instance.__haystack_input__._sockets_dict,  # type: ignore[attr-defined]
 								            output_sockets=instance.__haystack_output__._sockets_dict,  # type: ignore[attr-defined]
 								            visits=0,
 								        )
-												feat: add methods to remove and replace components in a pipeline (#7820)

* add remove_component method plus unit tests

* add docstrings

* add reno

* add type annotation to remove_component method

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>

* solve bug not allowing a component to be reatached to a pipeline after being removed

* Properly remove Component from Pipeline

* Ignore mypy

---------

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>
Co-authored-by: Silvano Cerza <silvanocerza@gmail.com>
											
										
										
											2024-06-10 14:54:07 +02:00
+								    def remove_component(self, name: str) -> Component:
 								        """
 								        Remove and returns component from the pipeline.
 								        Remove an existing component from the pipeline by providing its name.
 								        All edges that connect to the component will also be deleted.
 								        :param name:
 								            The name of the component to remove.
 								        :returns:
 								            The removed Component instance.
 								        :raises ValueError:
 								            If there is no component with that name already in the Pipeline.
 								        """
 								        # Check that a component with that name is in the Pipeline
 								        try:
 								            instance = self.get_component(name)
 								        except ValueError as exc:
 								            raise ValueError(
 								                f"There is no component named '{name}' in the pipeline. The valid component names are: ",
 								                ", ".join(n for n in self.graph.nodes),
 								            ) from exc
 								        # Delete component from the graph, deleting all its connections
 								        self.graph.remove_node(name)
 								        # Reset the Component sockets' senders and receivers
 								        input_sockets = instance.__haystack_input__._sockets_dict  # type: ignore[attr-defined]
 								        for socket in input_sockets.values():
 								            socket.senders = []
 								        output_sockets = instance.__haystack_output__._sockets_dict  # type: ignore[attr-defined]
 								        for socket in output_sockets.values():
 								            socket.receivers = []
 								        # Reset the Component's pipeline reference
 								        setattr(instance, "__haystack_added_to_pipeline__", None)
 								        return instance
-												fix: pipeline run bugs in cyclic and acyclic pipelines (#8707)

* add component checks

* pipeline should run deterministically

* add FIFOQueue

* add agent tests

* add order dependent tests

* run new tests

* remove code that is not needed

* test: intermediate from cycle outputs are available outside cycle

* add tests for component checks (Claude)

* adapt tests for component checks (o1 review)

* chore: format

* remove tests that aren't needed anymore

* add _calculate_priority tests

* revert accidental change in pyproject.toml

* test format conversion

* adapt to naming convention

* chore: proper docstrings and type hints for PQ

* format

* add more unit tests

* rm unneeded comments

* test input consumption

* lint

* fix: docstrings

* lint

* format

* format

* fix license header

* fix license header

* add component run tests

* fix: pass correct input format to tracing

* fix types

* format

* format

* types

* add defaults from Socket instead of signature

- otherwise components with dynamic inputs would fail

* fix test names

* still wait for optional inputs on greedy variadic sockets

- mirrors previous behavior

* fix format

* wip: warn for ambiguous running order

* wip: alternative warning

* fix license header

* make code more readable

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* Introduce content tracing to a behavioral test

* Fixing linting

* Remove debug print statements

* Fix tracer tests

* remove print

* test: test for component inputs

* test: remove testing for run order

* chore: update component checks from experimental

* chore: update pipeline and base from experimental

* refactor: remove unused method

* refactor: remove unused method

* refactor: outdated comment

* refactor: inputs state is updated as side effect

- to prepare for AsyncPipeline implementation

* format

* test: add file conversion test

* format

* fix: original implementation deepcopies outputs

* lint

* fix: from_dict was updated

* fix: format

* fix: test

* test: add test for thread safety

* remove unused imports

* format

* test: FIFOPriorityQueue

* chore: add release note

* fix: resolve merge conflict with mermaid changes

* fix: format

* fix: remove unused import

* refactor: rename to avoid accidental conflicts

* chore: remove unused inputs, add missing license header

* chore: extend release notes

* Update releasenotes/notes/fix-pipeline-run-2fefeafc705a6d91.yaml

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* fix: format

* fix: format

* Update release note

---------

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>
Co-authored-by: David S. Batista <dsbatista@gmail.com>
											
										
										
											2025-02-06 15:19:47 +01:00
+								    def connect(self, sender: str, receiver: str) -> "PipelineBase":  # noqa: PLR0915 PLR0912
-												chore: extract BasePipeline (#7673)

* extract BasePipeline

* release note

* add missing headers

* move __eq__ to the base class

* proper check type equality, bless the tests
											
										
										
											2024-05-10 11:35:15 +02:00
+								        """
 								        Connects two components together.
 								        All components to connect must exist in the pipeline.
 								        If connecting to a component that has several output connections, specify the inputs and output names as
 								        'component_name.connections_name'.
 								        :param sender:
 								            The component that delivers the value. This can be either just a component name or can be
 								            in the format `component_name.connection_name` if the component has multiple outputs.
 								        :param receiver:
 								            The component that receives the value. This can be either just a component name or can be
 								            in the format `component_name.connection_name` if the component has multiple inputs.
-												chore: removing an invalid arg from `pipeline.base.PipelineBase.connect()` docstring


											
										
										
											2025-05-23 13:48:41 +01:00
-												chore: extract BasePipeline (#7673)

* extract BasePipeline

* release note

* add missing headers

* move __eq__ to the base class

* proper check type equality, bless the tests
											
										
										
											2024-05-10 11:35:15 +02:00
+								        :returns:
 								            The Pipeline instance.
 								        :raises PipelineConnectError:
 								            If the two components cannot be connected (for example if one of the components is
 								            not present in the pipeline, or the connections don't match by type, and so on).
 								        """
 								        # Edges may be named explicitly by passing 'node_name.edge_name' to connect().
 								        sender_component_name, sender_socket_name = parse_connect_string(sender)
 								        receiver_component_name, receiver_socket_name = parse_connect_string(receiver)
-												chore: Deprecate connecting a Component to itself (#8368)

* Deprecate connecting a Component to itself

* Apply suggestions from code review

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>

---------

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>
											
										
										
											2024-09-25 11:29:31 +02:00
+								        if sender_component_name == receiver_component_name:
-												feat: `Pipeline.connect()` will now raise a `PipelineConnectError` if `sender` and `receiver` are the same Component (#8403)

* Added equality check for sender and receiver in connection function of pipeline

* Update base.py

irrelevant changes reverted

* added release note

* altered a walk with cycle test

* added a test to verify that pipeline raises PipelineConnectError when adding a component to itself

* Update release notes

* Remove self connection feature tests

* Tidy up connect unit test

---------

Co-authored-by: Silvano Cerza <silvanocerza@gmail.com>
											
										
										
											2024-09-30 19:22:36 +05:30
+								            raise PipelineConnectError("Connecting a Component to itself is not supported.")
-												chore: Deprecate connecting a Component to itself (#8368)

* Deprecate connecting a Component to itself

* Apply suggestions from code review

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>

---------

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>
											
										
										
											2024-09-25 11:29:31 +02:00
-												chore: extract BasePipeline (#7673)

* extract BasePipeline

* release note

* add missing headers

* move __eq__ to the base class

* proper check type equality, bless the tests
											
										
										
											2024-05-10 11:35:15 +02:00
+								        # Get the nodes data.
 								        try:
-												feat: Add Type Validation parameter for Pipeline Connections (#8875)

* Starting to refactor type util tests to be more systematic

* refactoring

* Expand tests

* Update to type utils

* Add missing subclass check

* Expand and refactor tests, introduce type_validation Literal

* More test refactoring

* Test refactoring, adding type validation variable to pipeline base

* Update relaxed version of type checking to pass all newly added tests

* trim whitespace

* Add tests

* cleanup

* Updates docstrings

* Add reno

* docs

* Fix mypy and add docstrings

* Changes based on advice from Tobi

* Remove unused imports

* Doc strings

* Add connection type validation to to_dict and from_dict

* Update tests

* Fix test

* Also save connection_type_validation at global pipeline level

* Fix tests

* Remove connection type validation from the connect level, only keep at pipeline level

* Formatting

* Fix tests

* formatting
											
										
										
											2025-03-03 16:00:22 +01:00
+								            sender_sockets = self.graph.nodes[sender_component_name]["output_sockets"]
-												chore: extract BasePipeline (#7673)

* extract BasePipeline

* release note

* add missing headers

* move __eq__ to the base class

* proper check type equality, bless the tests
											
										
										
											2024-05-10 11:35:15 +02:00
+								        except KeyError as exc:
 								            raise ValueError(f"Component named {sender_component_name} not found in the pipeline.") from exc
 								        try:
-												feat: Add Type Validation parameter for Pipeline Connections (#8875)

* Starting to refactor type util tests to be more systematic

* refactoring

* Expand tests

* Update to type utils

* Add missing subclass check

* Expand and refactor tests, introduce type_validation Literal

* More test refactoring

* Test refactoring, adding type validation variable to pipeline base

* Update relaxed version of type checking to pass all newly added tests

* trim whitespace

* Add tests

* cleanup

* Updates docstrings

* Add reno

* docs

* Fix mypy and add docstrings

* Changes based on advice from Tobi

* Remove unused imports

* Doc strings

* Add connection type validation to to_dict and from_dict

* Update tests

* Fix test

* Also save connection_type_validation at global pipeline level

* Fix tests

* Remove connection type validation from the connect level, only keep at pipeline level

* Formatting

* Fix tests

* formatting
											
										
										
											2025-03-03 16:00:22 +01:00
+								            receiver_sockets = self.graph.nodes[receiver_component_name]["input_sockets"]
-												chore: extract BasePipeline (#7673)

* extract BasePipeline

* release note

* add missing headers

* move __eq__ to the base class

* proper check type equality, bless the tests
											
										
										
											2024-05-10 11:35:15 +02:00
+								        except KeyError as exc:
 								            raise ValueError(f"Component named {receiver_component_name} not found in the pipeline.") from exc
 								        # If the name of either socket is given, get the socket
 								        sender_socket: Optional[OutputSocket] = None
 								        if sender_socket_name:
-												feat: Add Type Validation parameter for Pipeline Connections (#8875)

* Starting to refactor type util tests to be more systematic

* refactoring

* Expand tests

* Update to type utils

* Add missing subclass check

* Expand and refactor tests, introduce type_validation Literal

* More test refactoring

* Test refactoring, adding type validation variable to pipeline base

* Update relaxed version of type checking to pass all newly added tests

* trim whitespace

* Add tests

* cleanup

* Updates docstrings

* Add reno

* docs

* Fix mypy and add docstrings

* Changes based on advice from Tobi

* Remove unused imports

* Doc strings

* Add connection type validation to to_dict and from_dict

* Update tests

* Fix test

* Also save connection_type_validation at global pipeline level

* Fix tests

* Remove connection type validation from the connect level, only keep at pipeline level

* Formatting

* Fix tests

* formatting
											
										
										
											2025-03-03 16:00:22 +01:00
+								            sender_socket = sender_sockets.get(sender_socket_name)
-												chore: extract BasePipeline (#7673)

* extract BasePipeline

* release note

* add missing headers

* move __eq__ to the base class

* proper check type equality, bless the tests
											
										
										
											2024-05-10 11:35:15 +02:00
+								            if not sender_socket:
-												ci: Add code formatting checks  (#7882)

* ruff settings

enable ruff format and re-format outdated files

feat: `EvaluationRunResult` add parameter to specify columns to keep in the comparative `Dataframe`  (#7879)

* adding param to explictily state which cols to keep

* adding param to explictily state which cols to keep

* adding param to explictily state which cols to keep

* updating tests

* adding release notes

* Update haystack/evaluation/eval_run_result.py

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>

* Update releasenotes/notes/add-keep-columns-to-EvalRunResult-comparative-be3e15ce45de3e0b.yaml

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>

* updating docstring

---------

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>

add format-check

fail on format and linting failures

fix string formatting

reformat long lines

fix tests

fix typing

linter

pull from main

* reformat

* lint -> check

* lint -> check
											
										
										
											2024-06-18 17:52:46 +02:00
+								                raise PipelineConnectError(
 								                    f"'{sender} does not exist. "
 								                    f"Output connections of {sender_component_name} are: "
-												feat: Add Type Validation parameter for Pipeline Connections (#8875)

* Starting to refactor type util tests to be more systematic

* refactoring

* Expand tests

* Update to type utils

* Add missing subclass check

* Expand and refactor tests, introduce type_validation Literal

* More test refactoring

* Test refactoring, adding type validation variable to pipeline base

* Update relaxed version of type checking to pass all newly added tests

* trim whitespace

* Add tests

* cleanup

* Updates docstrings

* Add reno

* docs

* Fix mypy and add docstrings

* Changes based on advice from Tobi

* Remove unused imports

* Doc strings

* Add connection type validation to to_dict and from_dict

* Update tests

* Fix test

* Also save connection_type_validation at global pipeline level

* Fix tests

* Remove connection type validation from the connect level, only keep at pipeline level

* Formatting

* Fix tests

* formatting
											
										
										
											2025-03-03 16:00:22 +01:00
+								                    + ", ".join([f"{name} (type {_type_name(socket.type)})" for name, socket in sender_sockets.items()])
-												ci: Add code formatting checks  (#7882)

* ruff settings

enable ruff format and re-format outdated files

feat: `EvaluationRunResult` add parameter to specify columns to keep in the comparative `Dataframe`  (#7879)

* adding param to explictily state which cols to keep

* adding param to explictily state which cols to keep

* adding param to explictily state which cols to keep

* updating tests

* adding release notes

* Update haystack/evaluation/eval_run_result.py

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>

* Update releasenotes/notes/add-keep-columns-to-EvalRunResult-comparative-be3e15ce45de3e0b.yaml

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>

* updating docstring

---------

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>

add format-check

fail on format and linting failures

fix string formatting

reformat long lines

fix tests

fix typing

linter

pull from main

* reformat

* lint -> check

* lint -> check
											
										
										
											2024-06-18 17:52:46 +02:00
+								                )
-												chore: extract BasePipeline (#7673)

* extract BasePipeline

* release note

* add missing headers

* move __eq__ to the base class

* proper check type equality, bless the tests
											
										
										
											2024-05-10 11:35:15 +02:00
 								        receiver_socket: Optional[InputSocket] = None
 								        if receiver_socket_name:
-												feat: Add Type Validation parameter for Pipeline Connections (#8875)

* Starting to refactor type util tests to be more systematic

* refactoring

* Expand tests

* Update to type utils

* Add missing subclass check

* Expand and refactor tests, introduce type_validation Literal

* More test refactoring

* Test refactoring, adding type validation variable to pipeline base

* Update relaxed version of type checking to pass all newly added tests

* trim whitespace

* Add tests

* cleanup

* Updates docstrings

* Add reno

* docs

* Fix mypy and add docstrings

* Changes based on advice from Tobi

* Remove unused imports

* Doc strings

* Add connection type validation to to_dict and from_dict

* Update tests

* Fix test

* Also save connection_type_validation at global pipeline level

* Fix tests

* Remove connection type validation from the connect level, only keep at pipeline level

* Formatting

* Fix tests

* formatting
											
										
										
											2025-03-03 16:00:22 +01:00
+								            receiver_socket = receiver_sockets.get(receiver_socket_name)
-												chore: extract BasePipeline (#7673)

* extract BasePipeline

* release note

* add missing headers

* move __eq__ to the base class

* proper check type equality, bless the tests
											
										
										
											2024-05-10 11:35:15 +02:00
+								            if not receiver_socket:
-												ci: Add code formatting checks  (#7882)

* ruff settings

enable ruff format and re-format outdated files

feat: `EvaluationRunResult` add parameter to specify columns to keep in the comparative `Dataframe`  (#7879)

* adding param to explictily state which cols to keep

* adding param to explictily state which cols to keep

* adding param to explictily state which cols to keep

* updating tests

* adding release notes

* Update haystack/evaluation/eval_run_result.py

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>

* Update releasenotes/notes/add-keep-columns-to-EvalRunResult-comparative-be3e15ce45de3e0b.yaml

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>

* updating docstring

---------

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>

add format-check

fail on format and linting failures

fix string formatting

reformat long lines

fix tests

fix typing

linter

pull from main

* reformat

* lint -> check

* lint -> check
											
										
										
											2024-06-18 17:52:46 +02:00
+								                raise PipelineConnectError(
 								                    f"'{receiver} does not exist. "
 								                    f"Input connections of {receiver_component_name} are: "
-												feat: Add Type Validation parameter for Pipeline Connections (#8875)

* Starting to refactor type util tests to be more systematic

* refactoring

* Expand tests

* Update to type utils

* Add missing subclass check

* Expand and refactor tests, introduce type_validation Literal

* More test refactoring

* Test refactoring, adding type validation variable to pipeline base

* Update relaxed version of type checking to pass all newly added tests

* trim whitespace

* Add tests

* cleanup

* Updates docstrings

* Add reno

* docs

* Fix mypy and add docstrings

* Changes based on advice from Tobi

* Remove unused imports

* Doc strings

* Add connection type validation to to_dict and from_dict

* Update tests

* Fix test

* Also save connection_type_validation at global pipeline level

* Fix tests

* Remove connection type validation from the connect level, only keep at pipeline level

* Formatting

* Fix tests

* formatting
											
										
										
											2025-03-03 16:00:22 +01:00
+								                    + ", ".join(
 								                        [f"{name} (type {_type_name(socket.type)})" for name, socket in receiver_sockets.items()]
 								                    )
-												ci: Add code formatting checks  (#7882)

* ruff settings

enable ruff format and re-format outdated files

feat: `EvaluationRunResult` add parameter to specify columns to keep in the comparative `Dataframe`  (#7879)

* adding param to explictily state which cols to keep

* adding param to explictily state which cols to keep

* adding param to explictily state which cols to keep

* updating tests

* adding release notes

* Update haystack/evaluation/eval_run_result.py

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>

* Update releasenotes/notes/add-keep-columns-to-EvalRunResult-comparative-be3e15ce45de3e0b.yaml

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>

* updating docstring

---------

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>

add format-check

fail on format and linting failures

fix string formatting

reformat long lines

fix tests

fix typing

linter

pull from main

* reformat

* lint -> check

* lint -> check
											
										
										
											2024-06-18 17:52:46 +02:00
+								                )
-												chore: extract BasePipeline (#7673)

* extract BasePipeline

* release note

* add missing headers

* move __eq__ to the base class

* proper check type equality, bless the tests
											
										
										
											2024-05-10 11:35:15 +02:00
 								        # Look for a matching connection among the possible ones.
 								        # Note that if there is more than one possible connection but two sockets match by name, they're paired.
-												feat: Add Type Validation parameter for Pipeline Connections (#8875)

* Starting to refactor type util tests to be more systematic

* refactoring

* Expand tests

* Update to type utils

* Add missing subclass check

* Expand and refactor tests, introduce type_validation Literal

* More test refactoring

* Test refactoring, adding type validation variable to pipeline base

* Update relaxed version of type checking to pass all newly added tests

* trim whitespace

* Add tests

* cleanup

* Updates docstrings

* Add reno

* docs

* Fix mypy and add docstrings

* Changes based on advice from Tobi

* Remove unused imports

* Doc strings

* Add connection type validation to to_dict and from_dict

* Update tests

* Fix test

* Also save connection_type_validation at global pipeline level

* Fix tests

* Remove connection type validation from the connect level, only keep at pipeline level

* Formatting

* Fix tests

* formatting
											
										
										
											2025-03-03 16:00:22 +01:00
+								        sender_socket_candidates: List[OutputSocket] = (
 								            [sender_socket] if sender_socket else list(sender_sockets.values())
 								        )
-												ci: Add code formatting checks  (#7882)

* ruff settings

enable ruff format and re-format outdated files

feat: `EvaluationRunResult` add parameter to specify columns to keep in the comparative `Dataframe`  (#7879)

* adding param to explictily state which cols to keep

* adding param to explictily state which cols to keep

* adding param to explictily state which cols to keep

* updating tests

* adding release notes

* Update haystack/evaluation/eval_run_result.py

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>

* Update releasenotes/notes/add-keep-columns-to-EvalRunResult-comparative-be3e15ce45de3e0b.yaml

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>

* updating docstring

---------

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>

add format-check

fail on format and linting failures

fix string formatting

reformat long lines

fix tests

fix typing

linter

pull from main

* reformat

* lint -> check

* lint -> check
											
										
										
											2024-06-18 17:52:46 +02:00
+								        receiver_socket_candidates: List[InputSocket] = (
-												feat: Add Type Validation parameter for Pipeline Connections (#8875)

* Starting to refactor type util tests to be more systematic

* refactoring

* Expand tests

* Update to type utils

* Add missing subclass check

* Expand and refactor tests, introduce type_validation Literal

* More test refactoring

* Test refactoring, adding type validation variable to pipeline base

* Update relaxed version of type checking to pass all newly added tests

* trim whitespace

* Add tests

* cleanup

* Updates docstrings

* Add reno

* docs

* Fix mypy and add docstrings

* Changes based on advice from Tobi

* Remove unused imports

* Doc strings

* Add connection type validation to to_dict and from_dict

* Update tests

* Fix test

* Also save connection_type_validation at global pipeline level

* Fix tests

* Remove connection type validation from the connect level, only keep at pipeline level

* Formatting

* Fix tests

* formatting
											
										
										
											2025-03-03 16:00:22 +01:00
+								            [receiver_socket] if receiver_socket else list(receiver_sockets.values())
-												ci: Add code formatting checks  (#7882)

* ruff settings

enable ruff format and re-format outdated files

feat: `EvaluationRunResult` add parameter to specify columns to keep in the comparative `Dataframe`  (#7879)

* adding param to explictily state which cols to keep

* adding param to explictily state which cols to keep

* adding param to explictily state which cols to keep

* updating tests

* adding release notes

* Update haystack/evaluation/eval_run_result.py

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>

* Update releasenotes/notes/add-keep-columns-to-EvalRunResult-comparative-be3e15ce45de3e0b.yaml

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>

* updating docstring

---------

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>

add format-check

fail on format and linting failures

fix string formatting

reformat long lines

fix tests

fix typing

linter

pull from main

* reformat

* lint -> check

* lint -> check
											
										
										
											2024-06-18 17:52:46 +02:00
+								        )
-												chore: extract BasePipeline (#7673)

* extract BasePipeline

* release note

* add missing headers

* move __eq__ to the base class

* proper check type equality, bless the tests
											
										
										
											2024-05-10 11:35:15 +02:00
 								        # Find all possible connections between these two components
-												feat: Add Type Validation parameter for Pipeline Connections (#8875)

* Starting to refactor type util tests to be more systematic

* refactoring

* Expand tests

* Update to type utils

* Add missing subclass check

* Expand and refactor tests, introduce type_validation Literal

* More test refactoring

* Test refactoring, adding type validation variable to pipeline base

* Update relaxed version of type checking to pass all newly added tests

* trim whitespace

* Add tests

* cleanup

* Updates docstrings

* Add reno

* docs

* Fix mypy and add docstrings

* Changes based on advice from Tobi

* Remove unused imports

* Doc strings

* Add connection type validation to to_dict and from_dict

* Update tests

* Fix test

* Also save connection_type_validation at global pipeline level

* Fix tests

* Remove connection type validation from the connect level, only keep at pipeline level

* Formatting

* Fix tests

* formatting
											
										
										
											2025-03-03 16:00:22 +01:00
+								        possible_connections = []
 								        for sender_sock, receiver_sock in itertools.product(sender_socket_candidates, receiver_socket_candidates):
 								            if _types_are_compatible(sender_sock.type, receiver_sock.type, self._connection_type_validation):
 								                possible_connections.append((sender_sock, receiver_sock))
-												chore: extract BasePipeline (#7673)

* extract BasePipeline

* release note

* add missing headers

* move __eq__ to the base class

* proper check type equality, bless the tests
											
										
										
											2024-05-10 11:35:15 +02:00
 								        # We need this status for error messages, since we might need it in multiple places we calculate it here
 								        status = _connections_status(
 								            sender_node=sender_component_name,
 								            sender_sockets=sender_socket_candidates,
 								            receiver_node=receiver_component_name,
 								            receiver_sockets=receiver_socket_candidates,
 								        )
 								        if not possible_connections:
 								            # There's no possible connection between these two components
 								            if len(sender_socket_candidates) == len(receiver_socket_candidates) == 1:
-												ci: Add code formatting checks  (#7882)

* ruff settings

enable ruff format and re-format outdated files

feat: `EvaluationRunResult` add parameter to specify columns to keep in the comparative `Dataframe`  (#7879)

* adding param to explictily state which cols to keep

* adding param to explictily state which cols to keep

* adding param to explictily state which cols to keep

* updating tests

* adding release notes

* Update haystack/evaluation/eval_run_result.py

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>

* Update releasenotes/notes/add-keep-columns-to-EvalRunResult-comparative-be3e15ce45de3e0b.yaml

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>

* updating docstring

---------

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>

add format-check

fail on format and linting failures

fix string formatting

reformat long lines

fix tests

fix typing

linter

pull from main

* reformat

* lint -> check

* lint -> check
											
										
										
											2024-06-18 17:52:46 +02:00
+								                msg = (
 								                    f"Cannot connect '{sender_component_name}.{sender_socket_candidates[0].name}' with "
 								                    f"'{receiver_component_name}.{receiver_socket_candidates[0].name}': "
 								                    f"their declared input and output types do not match.\n{status}"
 								                )
-												chore: extract BasePipeline (#7673)

* extract BasePipeline

* release note

* add missing headers

* move __eq__ to the base class

* proper check type equality, bless the tests
											
										
										
											2024-05-10 11:35:15 +02:00
+								            else:
-												ci: Add code formatting checks  (#7882)

* ruff settings

enable ruff format and re-format outdated files

feat: `EvaluationRunResult` add parameter to specify columns to keep in the comparative `Dataframe`  (#7879)

* adding param to explictily state which cols to keep

* adding param to explictily state which cols to keep

* adding param to explictily state which cols to keep

* updating tests

* adding release notes

* Update haystack/evaluation/eval_run_result.py

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>

* Update releasenotes/notes/add-keep-columns-to-EvalRunResult-comparative-be3e15ce45de3e0b.yaml

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>

* updating docstring

---------

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>

add format-check

fail on format and linting failures

fix string formatting

reformat long lines

fix tests

fix typing

linter

pull from main

* reformat

* lint -> check

* lint -> check
											
										
										
											2024-06-18 17:52:46 +02:00
+								                msg = (
 								                    f"Cannot connect '{sender_component_name}' with '{receiver_component_name}': "
 								                    f"no matching connections available.\n{status}"
 								                )
-												chore: extract BasePipeline (#7673)

* extract BasePipeline

* release note

* add missing headers

* move __eq__ to the base class

* proper check type equality, bless the tests
											
										
										
											2024-05-10 11:35:15 +02:00
+								            raise PipelineConnectError(msg)
 								        if len(possible_connections) == 1:
 								            # There's only one possible connection, use it
 								            sender_socket = possible_connections[0][0]
 								            receiver_socket = possible_connections[0][1]
 								        if len(possible_connections) > 1:
 								            # There are multiple possible connection, let's try to match them by name
-												ci: Add code formatting checks  (#7882)

* ruff settings

enable ruff format and re-format outdated files

feat: `EvaluationRunResult` add parameter to specify columns to keep in the comparative `Dataframe`  (#7879)

* adding param to explictily state which cols to keep

* adding param to explictily state which cols to keep

* adding param to explictily state which cols to keep

* updating tests

* adding release notes

* Update haystack/evaluation/eval_run_result.py

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>

* Update releasenotes/notes/add-keep-columns-to-EvalRunResult-comparative-be3e15ce45de3e0b.yaml

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>

* updating docstring

---------

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>

add format-check

fail on format and linting failures

fix string formatting

reformat long lines

fix tests

fix typing

linter

pull from main

* reformat

* lint -> check

* lint -> check
											
										
										
											2024-06-18 17:52:46 +02:00
+								            name_matches = [
 								                (out_sock, in_sock) for out_sock, in_sock in possible_connections if in_sock.name == out_sock.name
 								            ]
-												chore: extract BasePipeline (#7673)

* extract BasePipeline

* release note

* add missing headers

* move __eq__ to the base class

* proper check type equality, bless the tests
											
										
										
											2024-05-10 11:35:15 +02:00
+								            if len(name_matches) != 1:
 								                # There's are either no matches or more than one, we can't pick one reliably
 								                msg = (
-												ci: Add code formatting checks  (#7882)

* ruff settings

enable ruff format and re-format outdated files

feat: `EvaluationRunResult` add parameter to specify columns to keep in the comparative `Dataframe`  (#7879)

* adding param to explictily state which cols to keep

* adding param to explictily state which cols to keep

* adding param to explictily state which cols to keep

* updating tests

* adding release notes

* Update haystack/evaluation/eval_run_result.py

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>

* Update releasenotes/notes/add-keep-columns-to-EvalRunResult-comparative-be3e15ce45de3e0b.yaml

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>

* updating docstring

---------

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>

add format-check

fail on format and linting failures

fix string formatting

reformat long lines

fix tests

fix typing

linter

pull from main

* reformat

* lint -> check

* lint -> check
											
										
										
											2024-06-18 17:52:46 +02:00
+								                    f"Cannot connect '{sender_component_name}' with "
 								                    f"'{receiver_component_name}': more than one connection is possible "
-												chore: extract BasePipeline (#7673)

* extract BasePipeline

* release note

* add missing headers

* move __eq__ to the base class

* proper check type equality, bless the tests
											
										
										
											2024-05-10 11:35:15 +02:00
+								                    "between these components. Please specify the connection name, like: "
 								                    f"pipeline.connect('{sender_component_name}.{possible_connections[0][0].name}', "
 								                    f"'{receiver_component_name}.{possible_connections[0][1].name}').\n{status}"
 								                )
 								                raise PipelineConnectError(msg)
 								            # Get the only possible match
 								            sender_socket = name_matches[0][0]
 								            receiver_socket = name_matches[0][1]
 								        # Connection must be valid on both sender/receiver sides
 								        if not sender_socket or not receiver_socket or not sender_component_name or not receiver_component_name:
 								            if sender_component_name and sender_socket:
 								                sender_repr = f"{sender_component_name}.{sender_socket.name} ({_type_name(sender_socket.type)})"
 								            else:
 								                sender_repr = "input needed"
 								            if receiver_component_name and receiver_socket:
 								                receiver_repr = f"({_type_name(receiver_socket.type)}) {receiver_component_name}.{receiver_socket.name}"
 								            else:
 								                receiver_repr = "output"
 								            msg = f"Connection must have both sender and receiver: {sender_repr} -> {receiver_repr}"
 								            raise PipelineConnectError(msg)
 								        logger.debug(
 								            "Connecting '{sender_component}.{sender_socket_name}' to '{receiver_component}.{receiver_socket_name}'",
 								            sender_component=sender_component_name,
 								            sender_socket_name=sender_socket.name,
 								            receiver_component=receiver_component_name,
 								            receiver_socket_name=receiver_socket.name,
 								        )
 								        if receiver_component_name in sender_socket.receivers and sender_component_name in receiver_socket.senders:
 								            # This is already connected, nothing to do
 								            return self
 								        if receiver_socket.senders and not receiver_socket.is_variadic:
 								            # Only variadic input sockets can receive from multiple senders
-												ci: Add code formatting checks  (#7882)

* ruff settings

enable ruff format and re-format outdated files

feat: `EvaluationRunResult` add parameter to specify columns to keep in the comparative `Dataframe`  (#7879)

* adding param to explictily state which cols to keep

* adding param to explictily state which cols to keep

* adding param to explictily state which cols to keep

* updating tests

* adding release notes

* Update haystack/evaluation/eval_run_result.py

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>

* Update releasenotes/notes/add-keep-columns-to-EvalRunResult-comparative-be3e15ce45de3e0b.yaml

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>

* updating docstring

---------

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>

add format-check

fail on format and linting failures

fix string formatting

reformat long lines

fix tests

fix typing

linter

pull from main

* reformat

* lint -> check

* lint -> check
											
										
										
											2024-06-18 17:52:46 +02:00
+								            msg = (
 								                f"Cannot connect '{sender_component_name}.{sender_socket.name}' with "
 								                f"'{receiver_component_name}.{receiver_socket.name}': "
 								                f"{receiver_component_name}.{receiver_socket.name} is already connected to {receiver_socket.senders}.\n"
 								            )
-												chore: extract BasePipeline (#7673)

* extract BasePipeline

* release note

* add missing headers

* move __eq__ to the base class

* proper check type equality, bless the tests
											
										
										
											2024-05-10 11:35:15 +02:00
+								            raise PipelineConnectError(msg)
 								        # Update the sockets with the new connection
 								        sender_socket.receivers.append(receiver_component_name)
 								        receiver_socket.senders.append(sender_component_name)
 								        # Create the new connection
 								        self.graph.add_edge(
 								            sender_component_name,
 								            receiver_component_name,
 								            key=f"{sender_socket.name}/{receiver_socket.name}",
 								            conn_type=_type_name(sender_socket.type),
 								            from_socket=sender_socket,
 								            to_socket=receiver_socket,
 								            mandatory=receiver_socket.is_mandatory,
 								        )
 								        return self
 								    def get_component(self, name: str) -> Component:
 								        """
 								        Get the component with the specified name from the pipeline.
 								        :param name:
 								            The name of the component.
 								        :returns:
 								            The instance of that component.
 								        :raises ValueError:
 								            If a component with that name is not present in the pipeline.
 								        """
 								        try:
 								            return self.graph.nodes[name]["instance"]
 								        except KeyError as exc:
 								            raise ValueError(f"Component named {name} not found in the pipeline.") from exc
 								    def get_component_name(self, instance: Component) -> str:
 								        """
 								        Returns the name of the Component instance if it has been added to this Pipeline or an empty string otherwise.
 								        :param instance:
 								            The Component instance to look for.
 								        :returns:
 								            The name of the Component instance.
 								        """
 								        for name, inst in self.graph.nodes(data="instance"):  # type: ignore # type wrongly defined in networkx
 								            if inst == instance:
 								                return name
 								        return ""
 								    def inputs(self, include_components_with_connected_inputs: bool = False) -> Dict[str, Dict[str, Any]]:
 								        """
 								        Returns a dictionary containing the inputs of a pipeline.
 								        Each key in the dictionary corresponds to a component name, and its value is another dictionary that describes
 								        the input sockets of that component, including their types and whether they are optional.
 								        :param include_components_with_connected_inputs:
 								            If `False`, only components that have disconnected input edges are
 								            included in the output.
 								        :returns:
 								            A dictionary where each key is a pipeline component name and each value is a dictionary of
 								            inputs sockets of that component.
 								        """
 								        inputs: Dict[str, Dict[str, Any]] = {}
 								        for component_name, data in find_pipeline_inputs(self.graph, include_components_with_connected_inputs).items():
 								            sockets_description = {}
 								            for socket in data:
 								                sockets_description[socket.name] = {"type": socket.type, "is_mandatory": socket.is_mandatory}
 								                if not socket.is_mandatory:
 								                    sockets_description[socket.name]["default_value"] = socket.default_value
 								            if sockets_description:
 								                inputs[component_name] = sockets_description
 								        return inputs
 								    def outputs(self, include_components_with_connected_outputs: bool = False) -> Dict[str, Dict[str, Any]]:
 								        """
 								        Returns a dictionary containing the outputs of a pipeline.
 								        Each key in the dictionary corresponds to a component name, and its value is another dictionary that describes
 								        the output sockets of that component.
 								        :param include_components_with_connected_outputs:
 								            If `False`, only components that have disconnected output edges are
 								            included in the output.
 								        :returns:
 								            A dictionary where each key is a pipeline component name and each value is a dictionary of
 								            output sockets of that component.
 								        """
-												ci: Add code formatting checks  (#7882)

* ruff settings

enable ruff format and re-format outdated files

feat: `EvaluationRunResult` add parameter to specify columns to keep in the comparative `Dataframe`  (#7879)

* adding param to explictily state which cols to keep

* adding param to explictily state which cols to keep

* adding param to explictily state which cols to keep

* updating tests

* adding release notes

* Update haystack/evaluation/eval_run_result.py

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>

* Update releasenotes/notes/add-keep-columns-to-EvalRunResult-comparative-be3e15ce45de3e0b.yaml

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>

* updating docstring

---------

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>

add format-check

fail on format and linting failures

fix string formatting

reformat long lines

fix tests

fix typing

linter

pull from main

* reformat

* lint -> check

* lint -> check
											
										
										
											2024-06-18 17:52:46 +02:00
+								        outputs = {
 								            comp: {socket.name: {"type": socket.type} for socket in data}
 								            for comp, data in find_pipeline_outputs(self.graph, include_components_with_connected_outputs).items()
 								            if data
 								        }
-												chore: extract BasePipeline (#7673)

* extract BasePipeline

* release note

* add missing headers

* move __eq__ to the base class

* proper check type equality, bless the tests
											
										
										
											2024-05-10 11:35:15 +02:00
+								        return outputs
-												feat: draw/show SuperComponents in detail, expand it and show it's internal components in the visualisation diagram (#9389)

* initial import

* small fixes

* adding tests

* adding tests

* refactoring merge graphs

* updating tests

* docstrings

* adding release notes

* adding SuperComponent name to extended components

* adding colours and legend to different super components

* adding missed docstring parameter

* fixing tests and type checking

* Update haystack/core/pipeline/base.py

Co-authored-by: Sebastian Husch Lee <10526848+sjrl@users.noreply.github.com>

* forcing keyword arguments for draw() and show()

* adding wrapper function and a deprecation warning

* adding pylint disable - this will be removed soon

* wip

* adding a decorator function to test if another function is being called with positional arguments

* adding a decorator function to test if another function is being called with positional arguments

---------

Co-authored-by: Sebastian Husch Lee <10526848+sjrl@users.noreply.github.com>
											
										
										
											2025-05-23 09:21:44 +01:00
+								    def show(
 								        self,
-												chore: removing `Pipeline.draw()` deprecation warnings (#9651)

* cleaning up tests

* adding release notes
											
										
										
											2025-07-24 11:35:19 +01:00
+								        *,
-												feat: draw/show SuperComponents in detail, expand it and show it's internal components in the visualisation diagram (#9389)

* initial import

* small fixes

* adding tests

* adding tests

* refactoring merge graphs

* updating tests

* docstrings

* adding release notes

* adding SuperComponent name to extended components

* adding colours and legend to different super components

* adding missed docstring parameter

* fixing tests and type checking

* Update haystack/core/pipeline/base.py

Co-authored-by: Sebastian Husch Lee <10526848+sjrl@users.noreply.github.com>

* forcing keyword arguments for draw() and show()

* adding wrapper function and a deprecation warning

* adding pylint disable - this will be removed soon

* wip

* adding a decorator function to test if another function is being called with positional arguments

* adding a decorator function to test if another function is being called with positional arguments

---------

Co-authored-by: Sebastian Husch Lee <10526848+sjrl@users.noreply.github.com>
											
										
										
											2025-05-23 09:21:44 +01:00
+								        server_url: str = "https://mermaid.ink",
 								        params: Optional[dict] = None,
 								        timeout: int = 30,
 								        super_component_expansion: bool = False,
 								    ) -> None:
-												chore: extract BasePipeline (#7673)

* extract BasePipeline

* release note

* add missing headers

* move __eq__ to the base class

* proper check type equality, bless the tests
											
										
										
											2024-05-10 11:35:15 +02:00
+								        """
-												feat: Add support for custom (or offline) Mermaid.ink server and support all parameters (#8799)

* compress graph data to support pako endpoint

* support mermaid.ink parameters and custom servers

* dont try to resolve conflicts with the github web ui...

* avoid double graph copy

* fixing typing, improving docstrings and release notes

* reverting type

* nit - force type checker no cache

* nit - force type checker no cache

---------

Co-authored-by: Ulises M <ulises@lbux.org>
Co-authored-by: Ulises M <30765968+lbux@users.noreply.github.com>
											
										
										
											2025-02-03 15:55:29 +01:00
+								        Display an image representing this `Pipeline` in a Jupyter notebook.
-												chore: extract BasePipeline (#7673)

* extract BasePipeline

* release note

* add missing headers

* move __eq__ to the base class

* proper check type equality, bless the tests
											
										
										
											2024-05-10 11:35:15 +02:00
-												feat: Add support for custom (or offline) Mermaid.ink server and support all parameters (#8799)

* compress graph data to support pako endpoint

* support mermaid.ink parameters and custom servers

* dont try to resolve conflicts with the github web ui...

* avoid double graph copy

* fixing typing, improving docstrings and release notes

* reverting type

* nit - force type checker no cache

* nit - force type checker no cache

---------

Co-authored-by: Ulises M <ulises@lbux.org>
Co-authored-by: Ulises M <30765968+lbux@users.noreply.github.com>
											
										
										
											2025-02-03 15:55:29 +01:00
+								        This function generates a diagram of the `Pipeline` using a Mermaid server and displays it directly in
 								        the notebook.
 								        :param server_url:
 								            The base URL of the Mermaid server used for rendering (default: 'https://mermaid.ink').
 								            See https://github.com/jihchi/mermaid.ink and https://github.com/mermaid-js/mermaid-live-editor for more
 								            info on how to set up your own Mermaid server.
 								        :param params:
 								            Dictionary of customization parameters to modify the output. Refer to Mermaid documentation for more details
 								            Supported keys:
 								                - format: Output format ('img', 'svg', or 'pdf'). Default: 'img'.
 								                - type: Image type for /img endpoint ('jpeg', 'png', 'webp'). Default: 'png'.
 								                - theme: Mermaid theme ('default', 'neutral', 'dark', 'forest'). Default: 'neutral'.
 								                - bgColor: Background color in hexadecimal (e.g., 'FFFFFF') or named format (e.g., '!white').
 								                - width: Width of the output image (integer).
 								                - height: Height of the output image (integer).
 								                - scale: Scaling factor (1–3). Only applicable if 'width' or 'height' is specified.
 								                - fit: Whether to fit the diagram size to the page (PDF only, boolean).
 								                - paper: Paper size for PDFs (e.g., 'a4', 'a3'). Ignored if 'fit' is true.
 								                - landscape: Landscape orientation for PDFs (boolean). Ignored if 'fit' is true.
-												feat: increase Mermaid timeout and make it configurable (#8973)

* increase Mermaid timeout and make it configurable

* rm e2e trigger

* simplify test
											
										
										
											2025-03-05 11:49:34 +01:00
+								        :param timeout:
 								            Timeout in seconds for the request to the Mermaid server.
-												feat: draw/show SuperComponents in detail, expand it and show it's internal components in the visualisation diagram (#9389)

* initial import

* small fixes

* adding tests

* adding tests

* refactoring merge graphs

* updating tests

* docstrings

* adding release notes

* adding SuperComponent name to extended components

* adding colours and legend to different super components

* adding missed docstring parameter

* fixing tests and type checking

* Update haystack/core/pipeline/base.py

Co-authored-by: Sebastian Husch Lee <10526848+sjrl@users.noreply.github.com>

* forcing keyword arguments for draw() and show()

* adding wrapper function and a deprecation warning

* adding pylint disable - this will be removed soon

* wip

* adding a decorator function to test if another function is being called with positional arguments

* adding a decorator function to test if another function is being called with positional arguments

---------

Co-authored-by: Sebastian Husch Lee <10526848+sjrl@users.noreply.github.com>
											
										
										
											2025-05-23 09:21:44 +01:00
+								        :param super_component_expansion:
 								            If set to True and the pipeline contains SuperComponents the diagram will show the internal structure of
 								            super-components as if they were components part of the pipeline instead of a "black-box".
 								            Otherwise, only the super-component itself will be displayed.
-												feat: Add support for custom (or offline) Mermaid.ink server and support all parameters (#8799)

* compress graph data to support pako endpoint

* support mermaid.ink parameters and custom servers

* dont try to resolve conflicts with the github web ui...

* avoid double graph copy

* fixing typing, improving docstrings and release notes

* reverting type

* nit - force type checker no cache

* nit - force type checker no cache

---------

Co-authored-by: Ulises M <ulises@lbux.org>
Co-authored-by: Ulises M <30765968+lbux@users.noreply.github.com>
											
										
										
											2025-02-03 15:55:29 +01:00
+								        :raises PipelineDrawingError:
 								            If the function is called outside of a Jupyter notebook or if there is an issue with rendering.
-												chore: extract BasePipeline (#7673)

* extract BasePipeline

* release note

* add missing headers

* move __eq__ to the base class

* proper check type equality, bless the tests
											
										
										
											2024-05-10 11:35:15 +02:00
+								        """
-												feat: draw/show SuperComponents in detail, expand it and show it's internal components in the visualisation diagram (#9389)

* initial import

* small fixes

* adding tests

* adding tests

* refactoring merge graphs

* updating tests

* docstrings

* adding release notes

* adding SuperComponent name to extended components

* adding colours and legend to different super components

* adding missed docstring parameter

* fixing tests and type checking

* Update haystack/core/pipeline/base.py

Co-authored-by: Sebastian Husch Lee <10526848+sjrl@users.noreply.github.com>

* forcing keyword arguments for draw() and show()

* adding wrapper function and a deprecation warning

* adding pylint disable - this will be removed soon

* wip

* adding a decorator function to test if another function is being called with positional arguments

* adding a decorator function to test if another function is being called with positional arguments

---------

Co-authored-by: Sebastian Husch Lee <10526848+sjrl@users.noreply.github.com>
											
										
										
											2025-05-23 09:21:44 +01:00
-												chore: extract BasePipeline (#7673)

* extract BasePipeline

* release note

* add missing headers

* move __eq__ to the base class

* proper check type equality, bless the tests
											
										
										
											2024-05-10 11:35:15 +02:00
+								        if is_in_jupyter():
 								            from IPython.display import Image, display  # type: ignore
-												feat: draw/show SuperComponents in detail, expand it and show it's internal components in the visualisation diagram (#9389)

* initial import

* small fixes

* adding tests

* adding tests

* refactoring merge graphs

* updating tests

* docstrings

* adding release notes

* adding SuperComponent name to extended components

* adding colours and legend to different super components

* adding missed docstring parameter

* fixing tests and type checking

* Update haystack/core/pipeline/base.py

Co-authored-by: Sebastian Husch Lee <10526848+sjrl@users.noreply.github.com>

* forcing keyword arguments for draw() and show()

* adding wrapper function and a deprecation warning

* adding pylint disable - this will be removed soon

* wip

* adding a decorator function to test if another function is being called with positional arguments

* adding a decorator function to test if another function is being called with positional arguments

---------

Co-authored-by: Sebastian Husch Lee <10526848+sjrl@users.noreply.github.com>
											
										
										
											2025-05-23 09:21:44 +01:00
+								            if super_component_expansion:
 								                graph, super_component_mapping = self._merge_super_component_pipelines()
 								            else:
 								                graph = self.graph
 								                super_component_mapping = None
 								            image_data = _to_mermaid_image(
 								                graph,
 								                server_url=server_url,
 								                params=params,
 								                timeout=timeout,
 								                super_component_mapping=super_component_mapping,
 								            )
-												chore: extract BasePipeline (#7673)

* extract BasePipeline

* release note

* add missing headers

* move __eq__ to the base class

* proper check type equality, bless the tests
											
										
										
											2024-05-10 11:35:15 +02:00
+								            display(Image(image_data))
 								        else:
 								            msg = "This method is only supported in Jupyter notebooks. Use Pipeline.draw() to save an image locally."
 								            raise PipelineDrawingError(msg)
-												chore: removing `Pipeline.draw()` deprecation warnings (#9651)

* cleaning up tests

* adding release notes
											
										
										
											2025-07-24 11:35:19 +01:00
+								    def draw(
-												feat: draw/show SuperComponents in detail, expand it and show it's internal components in the visualisation diagram (#9389)

* initial import

* small fixes

* adding tests

* adding tests

* refactoring merge graphs

* updating tests

* docstrings

* adding release notes

* adding SuperComponent name to extended components

* adding colours and legend to different super components

* adding missed docstring parameter

* fixing tests and type checking

* Update haystack/core/pipeline/base.py

Co-authored-by: Sebastian Husch Lee <10526848+sjrl@users.noreply.github.com>

* forcing keyword arguments for draw() and show()

* adding wrapper function and a deprecation warning

* adding pylint disable - this will be removed soon

* wip

* adding a decorator function to test if another function is being called with positional arguments

* adding a decorator function to test if another function is being called with positional arguments

---------

Co-authored-by: Sebastian Husch Lee <10526848+sjrl@users.noreply.github.com>
											
										
										
											2025-05-23 09:21:44 +01:00
+								        self,
-												chore: removing `Pipeline.draw()` deprecation warnings (#9651)

* cleaning up tests

* adding release notes
											
										
										
											2025-07-24 11:35:19 +01:00
+								        *,
-												feat: draw/show SuperComponents in detail, expand it and show it's internal components in the visualisation diagram (#9389)

* initial import

* small fixes

* adding tests

* adding tests

* refactoring merge graphs

* updating tests

* docstrings

* adding release notes

* adding SuperComponent name to extended components

* adding colours and legend to different super components

* adding missed docstring parameter

* fixing tests and type checking

* Update haystack/core/pipeline/base.py

Co-authored-by: Sebastian Husch Lee <10526848+sjrl@users.noreply.github.com>

* forcing keyword arguments for draw() and show()

* adding wrapper function and a deprecation warning

* adding pylint disable - this will be removed soon

* wip

* adding a decorator function to test if another function is being called with positional arguments

* adding a decorator function to test if another function is being called with positional arguments

---------

Co-authored-by: Sebastian Husch Lee <10526848+sjrl@users.noreply.github.com>
											
										
										
											2025-05-23 09:21:44 +01:00
+								        path: Path,
 								        server_url: str = "https://mermaid.ink",
 								        params: Optional[dict] = None,
 								        timeout: int = 30,
 								        super_component_expansion: bool = False,
-												feat: increase Mermaid timeout and make it configurable (#8973)

* increase Mermaid timeout and make it configurable

* rm e2e trigger

* simplify test
											
										
										
											2025-03-05 11:49:34 +01:00
+								    ) -> None:
-												chore: extract BasePipeline (#7673)

* extract BasePipeline

* release note

* add missing headers

* move __eq__ to the base class

* proper check type equality, bless the tests
											
										
										
											2024-05-10 11:35:15 +02:00
+								        """
-												feat: Add support for custom (or offline) Mermaid.ink server and support all parameters (#8799)

* compress graph data to support pako endpoint

* support mermaid.ink parameters and custom servers

* dont try to resolve conflicts with the github web ui...

* avoid double graph copy

* fixing typing, improving docstrings and release notes

* reverting type

* nit - force type checker no cache

* nit - force type checker no cache

---------

Co-authored-by: Ulises M <ulises@lbux.org>
Co-authored-by: Ulises M <30765968+lbux@users.noreply.github.com>
											
										
										
											2025-02-03 15:55:29 +01:00
+								        Save an image representing this `Pipeline` to the specified file path.
 								        This function generates a diagram of the `Pipeline` using the Mermaid server and saves it to the provided path.
-												chore: extract BasePipeline (#7673)

* extract BasePipeline

* release note

* add missing headers

* move __eq__ to the base class

* proper check type equality, bless the tests
											
										
										
											2024-05-10 11:35:15 +02:00
 								        :param path:
-												feat: Add support for custom (or offline) Mermaid.ink server and support all parameters (#8799)

* compress graph data to support pako endpoint

* support mermaid.ink parameters and custom servers

* dont try to resolve conflicts with the github web ui...

* avoid double graph copy

* fixing typing, improving docstrings and release notes

* reverting type

* nit - force type checker no cache

* nit - force type checker no cache

---------

Co-authored-by: Ulises M <ulises@lbux.org>
Co-authored-by: Ulises M <30765968+lbux@users.noreply.github.com>
											
										
										
											2025-02-03 15:55:29 +01:00
+								            The file path where the generated image will be saved.
-												feat: draw/show SuperComponents in detail, expand it and show it's internal components in the visualisation diagram (#9389)

* initial import

* small fixes

* adding tests

* adding tests

* refactoring merge graphs

* updating tests

* docstrings

* adding release notes

* adding SuperComponent name to extended components

* adding colours and legend to different super components

* adding missed docstring parameter

* fixing tests and type checking

* Update haystack/core/pipeline/base.py

Co-authored-by: Sebastian Husch Lee <10526848+sjrl@users.noreply.github.com>

* forcing keyword arguments for draw() and show()

* adding wrapper function and a deprecation warning

* adding pylint disable - this will be removed soon

* wip

* adding a decorator function to test if another function is being called with positional arguments

* adding a decorator function to test if another function is being called with positional arguments

---------

Co-authored-by: Sebastian Husch Lee <10526848+sjrl@users.noreply.github.com>
											
										
										
											2025-05-23 09:21:44 +01:00
-												feat: Add support for custom (or offline) Mermaid.ink server and support all parameters (#8799)

* compress graph data to support pako endpoint

* support mermaid.ink parameters and custom servers

* dont try to resolve conflicts with the github web ui...

* avoid double graph copy

* fixing typing, improving docstrings and release notes

* reverting type

* nit - force type checker no cache

* nit - force type checker no cache

---------

Co-authored-by: Ulises M <ulises@lbux.org>
Co-authored-by: Ulises M <30765968+lbux@users.noreply.github.com>
											
										
										
											2025-02-03 15:55:29 +01:00
+								        :param server_url:
 								            The base URL of the Mermaid server used for rendering (default: 'https://mermaid.ink').
 								            See https://github.com/jihchi/mermaid.ink and https://github.com/mermaid-js/mermaid-live-editor for more
 								            info on how to set up your own Mermaid server.
-												feat: draw/show SuperComponents in detail, expand it and show it's internal components in the visualisation diagram (#9389)

* initial import

* small fixes

* adding tests

* adding tests

* refactoring merge graphs

* updating tests

* docstrings

* adding release notes

* adding SuperComponent name to extended components

* adding colours and legend to different super components

* adding missed docstring parameter

* fixing tests and type checking

* Update haystack/core/pipeline/base.py

Co-authored-by: Sebastian Husch Lee <10526848+sjrl@users.noreply.github.com>

* forcing keyword arguments for draw() and show()

* adding wrapper function and a deprecation warning

* adding pylint disable - this will be removed soon

* wip

* adding a decorator function to test if another function is being called with positional arguments

* adding a decorator function to test if another function is being called with positional arguments

---------

Co-authored-by: Sebastian Husch Lee <10526848+sjrl@users.noreply.github.com>
											
										
										
											2025-05-23 09:21:44 +01:00
-												feat: Add support for custom (or offline) Mermaid.ink server and support all parameters (#8799)

* compress graph data to support pako endpoint

* support mermaid.ink parameters and custom servers

* dont try to resolve conflicts with the github web ui...

* avoid double graph copy

* fixing typing, improving docstrings and release notes

* reverting type

* nit - force type checker no cache

* nit - force type checker no cache

---------

Co-authored-by: Ulises M <ulises@lbux.org>
Co-authored-by: Ulises M <30765968+lbux@users.noreply.github.com>
											
										
										
											2025-02-03 15:55:29 +01:00
+								        :param params:
 								            Dictionary of customization parameters to modify the output. Refer to Mermaid documentation for more details
 								            Supported keys:
 								                - format: Output format ('img', 'svg', or 'pdf'). Default: 'img'.
 								                - type: Image type for /img endpoint ('jpeg', 'png', 'webp'). Default: 'png'.
 								                - theme: Mermaid theme ('default', 'neutral', 'dark', 'forest'). Default: 'neutral'.
 								                - bgColor: Background color in hexadecimal (e.g., 'FFFFFF') or named format (e.g., '!white').
 								                - width: Width of the output image (integer).
 								                - height: Height of the output image (integer).
 								                - scale: Scaling factor (1–3). Only applicable if 'width' or 'height' is specified.
 								                - fit: Whether to fit the diagram size to the page (PDF only, boolean).
 								                - paper: Paper size for PDFs (e.g., 'a4', 'a3'). Ignored if 'fit' is true.
 								                - landscape: Landscape orientation for PDFs (boolean). Ignored if 'fit' is true.
-												feat: increase Mermaid timeout and make it configurable (#8973)

* increase Mermaid timeout and make it configurable

* rm e2e trigger

* simplify test
											
										
										
											2025-03-05 11:49:34 +01:00
+								        :param timeout:
 								            Timeout in seconds for the request to the Mermaid server.
-												feat: draw/show SuperComponents in detail, expand it and show it's internal components in the visualisation diagram (#9389)

* initial import

* small fixes

* adding tests

* adding tests

* refactoring merge graphs

* updating tests

* docstrings

* adding release notes

* adding SuperComponent name to extended components

* adding colours and legend to different super components

* adding missed docstring parameter

* fixing tests and type checking

* Update haystack/core/pipeline/base.py

Co-authored-by: Sebastian Husch Lee <10526848+sjrl@users.noreply.github.com>

* forcing keyword arguments for draw() and show()

* adding wrapper function and a deprecation warning

* adding pylint disable - this will be removed soon

* wip

* adding a decorator function to test if another function is being called with positional arguments

* adding a decorator function to test if another function is being called with positional arguments

---------

Co-authored-by: Sebastian Husch Lee <10526848+sjrl@users.noreply.github.com>
											
										
										
											2025-05-23 09:21:44 +01:00
+								        :param super_component_expansion:
 								            If set to True and the pipeline contains SuperComponents the diagram will show the internal structure of
 								            super-components as if they were components part of the pipeline instead of a "black-box".
 								            Otherwise, only the super-component itself will be displayed.
-												feat: Add support for custom (or offline) Mermaid.ink server and support all parameters (#8799)

* compress graph data to support pako endpoint

* support mermaid.ink parameters and custom servers

* dont try to resolve conflicts with the github web ui...

* avoid double graph copy

* fixing typing, improving docstrings and release notes

* reverting type

* nit - force type checker no cache

* nit - force type checker no cache

---------

Co-authored-by: Ulises M <ulises@lbux.org>
Co-authored-by: Ulises M <30765968+lbux@users.noreply.github.com>
											
										
										
											2025-02-03 15:55:29 +01:00
+								        :raises PipelineDrawingError:
 								            If there is an issue with rendering or saving the image.
-												chore: extract BasePipeline (#7673)

* extract BasePipeline

* release note

* add missing headers

* move __eq__ to the base class

* proper check type equality, bless the tests
											
										
										
											2024-05-10 11:35:15 +02:00
+								        """
-												feat: draw/show SuperComponents in detail, expand it and show it's internal components in the visualisation diagram (#9389)

* initial import

* small fixes

* adding tests

* adding tests

* refactoring merge graphs

* updating tests

* docstrings

* adding release notes

* adding SuperComponent name to extended components

* adding colours and legend to different super components

* adding missed docstring parameter

* fixing tests and type checking

* Update haystack/core/pipeline/base.py

Co-authored-by: Sebastian Husch Lee <10526848+sjrl@users.noreply.github.com>

* forcing keyword arguments for draw() and show()

* adding wrapper function and a deprecation warning

* adding pylint disable - this will be removed soon

* wip

* adding a decorator function to test if another function is being called with positional arguments

* adding a decorator function to test if another function is being called with positional arguments

---------

Co-authored-by: Sebastian Husch Lee <10526848+sjrl@users.noreply.github.com>
											
										
										
											2025-05-23 09:21:44 +01:00
-												chore: extract BasePipeline (#7673)

* extract BasePipeline

* release note

* add missing headers

* move __eq__ to the base class

* proper check type equality, bless the tests
											
										
										
											2024-05-10 11:35:15 +02:00
+								        # Before drawing we edit a bit the graph, to avoid modifying the original that is
 								        # used for running the pipeline we copy it.
-												feat: draw/show SuperComponents in detail, expand it and show it's internal components in the visualisation diagram (#9389)

* initial import

* small fixes

* adding tests

* adding tests

* refactoring merge graphs

* updating tests

* docstrings

* adding release notes

* adding SuperComponent name to extended components

* adding colours and legend to different super components

* adding missed docstring parameter

* fixing tests and type checking

* Update haystack/core/pipeline/base.py

Co-authored-by: Sebastian Husch Lee <10526848+sjrl@users.noreply.github.com>

* forcing keyword arguments for draw() and show()

* adding wrapper function and a deprecation warning

* adding pylint disable - this will be removed soon

* wip

* adding a decorator function to test if another function is being called with positional arguments

* adding a decorator function to test if another function is being called with positional arguments

---------

Co-authored-by: Sebastian Husch Lee <10526848+sjrl@users.noreply.github.com>
											
										
										
											2025-05-23 09:21:44 +01:00
+								        if super_component_expansion:
 								            graph, super_component_mapping = self._merge_super_component_pipelines()
 								        else:
 								            graph = self.graph
 								            super_component_mapping = None
 								        image_data = _to_mermaid_image(
 								            graph,
 								            server_url=server_url,
 								            params=params,
 								            timeout=timeout,
 								            super_component_mapping=super_component_mapping,
 								        )
-												chore: extract BasePipeline (#7673)

* extract BasePipeline

* release note

* add missing headers

* move __eq__ to the base class

* proper check type equality, bless the tests
											
										
										
											2024-05-10 11:35:15 +02:00
+								        Path(path).write_bytes(image_data)
 								    def walk(self) -> Iterator[Tuple[str, Component]]:
 								        """
 								        Visits each component in the pipeline exactly once and yields its name and instance.
 								        No guarantees are provided on the visiting order.
 								        :returns:
 								            An iterator of tuples of component name and component instance.
 								        """
 								        for component_name, instance in self.graph.nodes(data="instance"):  # type: ignore # type is wrong in networkx
 								            yield component_name, instance
-												chore: Make the Haystack core "type complete" (#9438)

* chore: Make the Haystack core "type complete"

For libraries with a `py.typed` marker, it is [recommended][1] to
make all public interfaces "type complete", i.e. to explicitly
annotate all function parameters and return types. Doing so has the
following benefits:

- It maximizes the type information available to users and IDEs.
- It ensures that the argument and return types are the intended ones.
- It sidesteps differences in type inference between the different
  type checker implementations.

This change makes a first step towards type completeness by enabling
the Mypy `disallow_incomplete_defs` for the core modules (excluding
`haystack.components.*` and `haystack.testing.*`) and fixing the
resulting errors.

[1]: https://typing.python.org/en/latest/guides/libraries.html#how-much-of-my-library-needs-types

* chore: Add `python_version = 3.9` to Mypy config

This catches type constructs that are only supported in later Python
versions.

* Remove unused import

* try to fix linting

---------

Co-authored-by: Stefano Fiorucci <stefanofiorucci@gmail.com>
											
										
										
											2025-05-26 11:00:22 +02:00
+								    def warm_up(self) -> None:
-												chore: extract BasePipeline (#7673)

* extract BasePipeline

* release note

* add missing headers

* move __eq__ to the base class

* proper check type equality, bless the tests
											
										
										
											2024-05-10 11:35:15 +02:00
+								        """
 								        Make sure all nodes are warm.
 								        It's the node's responsibility to make sure this method can be called at every `Pipeline.run()`
 								        without re-initializing everything.
 								        """
 								        for node in self.graph.nodes:
 								            if hasattr(self.graph.nodes[node]["instance"], "warm_up"):
 								                logger.info("Warming up component {node}...", node=node)
 								                self.graph.nodes[node]["instance"].warm_up()
-												feat: Agent tracing (#9240)

* Agent tracing

* Small changes

* Some changes and refactoring

* Refactoring to reuse code

* Fix

* Add reno

* Fix tests

* Fix tests

* Fix linting

* Refactor and add tracing support to run_async of Agent

* Reduce duplicate code

* Remove finalize_run

* Use break instead of copying code three times

* Adding a test

* Add tracing unit tests

* Make async tracing test actually run async

* Increase test coverage

* Unit test for traces in pipeline

* Add cleanup

* Fix proper indentation

* PR comments

* PR comments and new test

* Update warning message

* Update warning message

---------

Co-authored-by: Vladimir Blagojevic <dovlex@gmail.com>
											
										
										
											2025-04-15 15:58:26 +02:00
+								    @staticmethod
 								    def _create_component_span(
 								        component_name: str, instance: Component, inputs: Dict[str, Any], parent_span: Optional[tracing.Span] = None
-												chore: Make the Haystack core "type complete" (#9438)

* chore: Make the Haystack core "type complete"

For libraries with a `py.typed` marker, it is [recommended][1] to
make all public interfaces "type complete", i.e. to explicitly
annotate all function parameters and return types. Doing so has the
following benefits:

- It maximizes the type information available to users and IDEs.
- It ensures that the argument and return types are the intended ones.
- It sidesteps differences in type inference between the different
  type checker implementations.

This change makes a first step towards type completeness by enabling
the Mypy `disallow_incomplete_defs` for the core modules (excluding
`haystack.components.*` and `haystack.testing.*`) and fixing the
resulting errors.

[1]: https://typing.python.org/en/latest/guides/libraries.html#how-much-of-my-library-needs-types

* chore: Add `python_version = 3.9` to Mypy config

This catches type constructs that are only supported in later Python
versions.

* Remove unused import

* try to fix linting

---------

Co-authored-by: Stefano Fiorucci <stefanofiorucci@gmail.com>
											
										
										
											2025-05-26 11:00:22 +02:00
+								    ) -> ContextManager[tracing.Span]:
-												feat: Agent tracing (#9240)

* Agent tracing

* Small changes

* Some changes and refactoring

* Refactoring to reuse code

* Fix

* Add reno

* Fix tests

* Fix tests

* Fix linting

* Refactor and add tracing support to run_async of Agent

* Reduce duplicate code

* Remove finalize_run

* Use break instead of copying code three times

* Adding a test

* Add tracing unit tests

* Make async tracing test actually run async

* Increase test coverage

* Unit test for traces in pipeline

* Add cleanup

* Fix proper indentation

* PR comments

* PR comments and new test

* Update warning message

* Update warning message

---------

Co-authored-by: Vladimir Blagojevic <dovlex@gmail.com>
											
										
										
											2025-04-15 15:58:26 +02:00
+								        return tracing.tracer.trace(
 								            "haystack.component.run",
 								            tags={
 								                "haystack.component.name": component_name,
 								                "haystack.component.type": instance.__class__.__name__,
 								                "haystack.component.input_types": {k: type(v).__name__ for k, v in inputs.items()},
 								                "haystack.component.input_spec": {
 								                    key: {
 								                        "type": (value.type.__name__ if isinstance(value.type, type) else str(value.type)),
 								                        "senders": value.senders,
 								                    }
 								                    for key, value in instance.__haystack_input__._sockets_dict.items()  # type: ignore
 								                },
 								                "haystack.component.output_spec": {
 								                    key: {
 								                        "type": (value.type.__name__ if isinstance(value.type, type) else str(value.type)),
 								                        "receivers": value.receivers,
 								                    }
 								                    for key, value in instance.__haystack_output__._sockets_dict.items()  # type: ignore
 								                },
 								            },
 								            parent_span=parent_span,
 								        )
-												feat: Make `PipelineBase().validate_input` public (#9520)

* Make validate_input public

* Add reno
											
										
										
											2025-06-16 11:58:28 +02:00
+								    def validate_input(self, data: Dict[str, Any]) -> None:
-												chore: extract BasePipeline (#7673)

* extract BasePipeline

* release note

* add missing headers

* move __eq__ to the base class

* proper check type equality, bless the tests
											
										
										
											2024-05-10 11:35:15 +02:00
+								        """
 								        Validates pipeline input data.
 								        Validates that data:
 								        * Each Component name actually exists in the Pipeline
 								        * Each Component is not missing any input
 								        * Each Component has only one input per input socket, if not variadic
 								        * Each Component doesn't receive inputs that are already sent by another Component
 								        :param data:
 								            A dictionary of inputs for the pipeline's components. Each key is a component name.
 								        :raises ValueError:
 								            If inputs are invalid according to the above.
 								        """
 								        for component_name, component_inputs in data.items():
 								            if component_name not in self.graph.nodes:
 								                raise ValueError(f"Component named {component_name} not found in the pipeline.")
 								            instance = self.graph.nodes[component_name]["instance"]
 								            for socket_name, socket in instance.__haystack_input__._sockets_dict.items():
 								                if socket.senders == [] and socket.is_mandatory and socket_name not in component_inputs:
 								                    raise ValueError(f"Missing input for component {component_name}: {socket_name}")
 								            for input_name in component_inputs.keys():
 								                if input_name not in instance.__haystack_input__._sockets_dict:
 								                    raise ValueError(f"Input {input_name} not found in component {component_name}.")
 								        for component_name in self.graph.nodes:
 								            instance = self.graph.nodes[component_name]["instance"]
 								            for socket_name, socket in instance.__haystack_input__._sockets_dict.items():
 								                component_inputs = data.get(component_name, {})
 								                if socket.senders == [] and socket.is_mandatory and socket_name not in component_inputs:
 								                    raise ValueError(f"Missing input for component {component_name}: {socket_name}")
 								                if socket.senders and socket_name in component_inputs and not socket.is_variadic:
-												ci: Add code formatting checks  (#7882)

* ruff settings

enable ruff format and re-format outdated files

feat: `EvaluationRunResult` add parameter to specify columns to keep in the comparative `Dataframe`  (#7879)

* adding param to explictily state which cols to keep

* adding param to explictily state which cols to keep

* adding param to explictily state which cols to keep

* updating tests

* adding release notes

* Update haystack/evaluation/eval_run_result.py

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>

* Update releasenotes/notes/add-keep-columns-to-EvalRunResult-comparative-be3e15ce45de3e0b.yaml

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>

* updating docstring

---------

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>

add format-check

fail on format and linting failures

fix string formatting

reformat long lines

fix tests

fix typing

linter

pull from main

* reformat

* lint -> check

* lint -> check
											
										
										
											2024-06-18 17:52:46 +02:00
+								                    raise ValueError(
 								                        f"Input {socket_name} for component {component_name} is already sent by {socket.senders}."
 								                    )
-												chore: extract BasePipeline (#7673)

* extract BasePipeline

* release note

* add missing headers

* move __eq__ to the base class

* proper check type equality, bless the tests
											
										
										
											2024-05-10 11:35:15 +02:00
-												chore: Simplify `Pipeline.run` method by moving code to the base class (#7680)

* move graph initialization to the base class

* simplify data normalization

* deepcopy data in base class

* initialize inputs state

* move to_run preparation to the base class

* Test Pipeline._init_to_run()

* Test Pipeline._init_inputs_state()

* Test Pipeline._prepare_component_input_data()

---------

Co-authored-by: Silvano Cerza <silvanocerza@gmail.com>
											
										
										
											2024-05-14 23:25:46 +02:00
+								    def _prepare_component_input_data(self, data: Dict[str, Any]) -> Dict[str, Dict[str, Any]]:
-												chore: extract BasePipeline (#7673)

* extract BasePipeline

* release note

* add missing headers

* move __eq__ to the base class

* proper check type equality, bless the tests
											
										
										
											2024-05-10 11:35:15 +02:00
+								        """
 								        Prepares input data for pipeline components.
 								        Organizes input data for pipeline components and identifies any inputs that are not matched to any
-												chore: Simplify `Pipeline.run` method by moving code to the base class (#7680)

* move graph initialization to the base class

* simplify data normalization

* deepcopy data in base class

* initialize inputs state

* move to_run preparation to the base class

* Test Pipeline._init_to_run()

* Test Pipeline._init_inputs_state()

* Test Pipeline._prepare_component_input_data()

---------

Co-authored-by: Silvano Cerza <silvanocerza@gmail.com>
											
										
										
											2024-05-14 23:25:46 +02:00
+								        component's input slots. Deep-copies data items to avoid sharing mutables across multiple components.
-												chore: extract BasePipeline (#7673)

* extract BasePipeline

* release note

* add missing headers

* move __eq__ to the base class

* proper check type equality, bless the tests
											
										
										
											2024-05-10 11:35:15 +02:00
 								        This method processes a flat dictionary of input data, where each key-value pair represents an input name
 								        and its corresponding value. It distributes these inputs to the appropriate pipeline components based on
 								        their input requirements. Inputs that don't match any component's input slots are classified as unresolved.
 								        :param data:
-												chore: Simplify `Pipeline.run` method by moving code to the base class (#7680)

* move graph initialization to the base class

* simplify data normalization

* deepcopy data in base class

* initialize inputs state

* move to_run preparation to the base class

* Test Pipeline._init_to_run()

* Test Pipeline._init_inputs_state()

* Test Pipeline._prepare_component_input_data()

---------

Co-authored-by: Silvano Cerza <silvanocerza@gmail.com>
											
										
										
											2024-05-14 23:25:46 +02:00
+								            A dictionary potentially having input names as keys and input values as values.
-												chore: extract BasePipeline (#7673)

* extract BasePipeline

* release note

* add missing headers

* move __eq__ to the base class

* proper check type equality, bless the tests
											
										
										
											2024-05-10 11:35:15 +02:00
-												chore: Simplify `Pipeline.run` method by moving code to the base class (#7680)

* move graph initialization to the base class

* simplify data normalization

* deepcopy data in base class

* initialize inputs state

* move to_run preparation to the base class

* Test Pipeline._init_to_run()

* Test Pipeline._init_inputs_state()

* Test Pipeline._prepare_component_input_data()

---------

Co-authored-by: Silvano Cerza <silvanocerza@gmail.com>
											
										
										
											2024-05-14 23:25:46 +02:00
+								        :returns:
 								            A dictionary mapping component names to their respective matched inputs.
 								        """
 								        # check whether the data is a nested dictionary of component inputs where each key is a component name
 								        # and each value is a dictionary of input parameters for that component
 								        is_nested_component_input = all(isinstance(value, dict) for value in data.values())
 								        if not is_nested_component_input:
 								            # flat input, a dict where keys are input names and values are the corresponding values
 								            # we need to convert it to a nested dictionary of component inputs and then run the pipeline
 								            # just like in the previous case
 								            pipeline_input_data: Dict[str, Dict[str, Any]] = defaultdict(dict)
 								            unresolved_kwargs = {}
 								            # Retrieve the input slots for each component in the pipeline
 								            available_inputs: Dict[str, Dict[str, Any]] = self.inputs()
 								            # Go through all provided to distribute them to the appropriate component inputs
 								            for input_name, input_value in data.items():
 								                resolved_at_least_once = False
 								                # Check each component to see if it has a slot for the current kwarg
 								                for component_name, component_inputs in available_inputs.items():
 								                    if input_name in component_inputs:
 								                        # If a match is found, add the kwarg to the component's input data
 								                        pipeline_input_data[component_name][input_name] = input_value
 								                        resolved_at_least_once = True
 								                if not resolved_at_least_once:
 								                    unresolved_kwargs[input_name] = input_value
 								            if unresolved_kwargs:
 								                logger.warning(
 								                    "Inputs {input_keys} were not matched to any component inputs, please check your run parameters.",
 								                    input_keys=list(unresolved_kwargs.keys()),
 								                )
-												chore: extract BasePipeline (#7673)

* extract BasePipeline

* release note

* add missing headers

* move __eq__ to the base class

* proper check type equality, bless the tests
											
										
										
											2024-05-10 11:35:15 +02:00
-												chore: Simplify `Pipeline.run` method by moving code to the base class (#7680)

* move graph initialization to the base class

* simplify data normalization

* deepcopy data in base class

* initialize inputs state

* move to_run preparation to the base class

* Test Pipeline._init_to_run()

* Test Pipeline._init_inputs_state()

* Test Pipeline._prepare_component_input_data()

---------

Co-authored-by: Silvano Cerza <silvanocerza@gmail.com>
											
										
										
											2024-05-14 23:25:46 +02:00
+								            data = dict(pipeline_input_data)
-												chore: extract BasePipeline (#7673)

* extract BasePipeline

* release note

* add missing headers

* move __eq__ to the base class

* proper check type equality, bless the tests
											
										
										
											2024-05-10 11:35:15 +02:00
-												chore: Simplify `Pipeline.run` method by moving code to the base class (#7680)

* move graph initialization to the base class

* simplify data normalization

* deepcopy data in base class

* initialize inputs state

* move to_run preparation to the base class

* Test Pipeline._init_to_run()

* Test Pipeline._init_inputs_state()

* Test Pipeline._prepare_component_input_data()

---------

Co-authored-by: Silvano Cerza <silvanocerza@gmail.com>
											
										
										
											2024-05-14 23:25:46 +02:00
+								        # deepcopying the inputs prevents the Pipeline run logic from being altered unexpectedly
 								        # when the same input reference is passed to multiple components.
 								        for component_name, component_inputs in data.items():
-												perf: Don't deepcopy Components, Tools, or Toolsets (#9356)

* Don't copy components

* Use deepcopy_with_fallback in more places and don't deepcopy Components, Tools or Toolsets

* Slight change

* Slightly update tests

* Refactor function based on PR feedback

* Add reno

* Fix lint

* Simplify tests, rename function, PR comments

* Fix mypy

* Undo typing
											
										
										
											2025-05-08 14:48:08 +02:00
+								            data[component_name] = {k: _deepcopy_with_exceptions(v) for k, v in component_inputs.items()}
-												chore: extract BasePipeline (#7673)

* extract BasePipeline

* release note

* add missing headers

* move __eq__ to the base class

* proper check type equality, bless the tests
											
										
										
											2024-05-10 11:35:15 +02:00
-												chore: Simplify `Pipeline.run` method by moving code to the base class (#7680)

* move graph initialization to the base class

* simplify data normalization

* deepcopy data in base class

* initialize inputs state

* move to_run preparation to the base class

* Test Pipeline._init_to_run()

* Test Pipeline._init_inputs_state()

* Test Pipeline._prepare_component_input_data()

---------

Co-authored-by: Silvano Cerza <silvanocerza@gmail.com>
											
										
										
											2024-05-14 23:25:46 +02:00
+								        return data
-												chore: extract BasePipeline (#7673)

* extract BasePipeline

* release note

* add missing headers

* move __eq__ to the base class

* proper check type equality, bless the tests
											
										
										
											2024-05-10 11:35:15 +02:00
 								    @classmethod
-												ci: Add code formatting checks  (#7882)

* ruff settings

enable ruff format and re-format outdated files

feat: `EvaluationRunResult` add parameter to specify columns to keep in the comparative `Dataframe`  (#7879)

* adding param to explictily state which cols to keep

* adding param to explictily state which cols to keep

* adding param to explictily state which cols to keep

* updating tests

* adding release notes

* Update haystack/evaluation/eval_run_result.py

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>

* Update releasenotes/notes/add-keep-columns-to-EvalRunResult-comparative-be3e15ce45de3e0b.yaml

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>

* updating docstring

---------

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>

add format-check

fail on format and linting failures

fix string formatting

reformat long lines

fix tests

fix typing

linter

pull from main

* reformat

* lint -> check

* lint -> check
											
										
										
											2024-06-18 17:52:46 +02:00
+								    def from_template(
 								        cls, predefined_pipeline: PredefinedPipeline, template_params: Optional[Dict[str, Any]] = None
 								    ) -> "PipelineBase":
-												chore: extract BasePipeline (#7673)

* extract BasePipeline

* release note

* add missing headers

* move __eq__ to the base class

* proper check type equality, bless the tests
											
										
										
											2024-05-10 11:35:15 +02:00
+								        """
 								        Create a Pipeline from a predefined template. See `PredefinedPipeline` for available options.
 								        :param predefined_pipeline:
 								            The predefined pipeline to use.
 								        :param template_params:
 								            An optional dictionary of parameters to use when rendering the pipeline template.
 								        :returns:
 								            An instance of `Pipeline`.
 								        """
 								        tpl = PipelineTemplate.from_predefined(predefined_pipeline)
 								        # If tpl.render() fails, we let bubble up the original error
 								        rendered = tpl.render(template_params)
 								        # If there was a problem with the rendered version of the
 								        # template, we add it to the error stack for debugging
 								        try:
 								            return cls.loads(rendered)
 								        except Exception as e:
 								            msg = f"Error unmarshalling pipeline: {e}\n"
 								            msg += f"Source:\n{rendered}"
 								            raise PipelineUnmarshalError(msg)
-												feat: Rework `Pipeline.run()` to better handle cycles (#8431)

* draft

* Enhance

* Almost works

* Simplify some parts and handle intermediate outputs

* Handle connections with default

* Handle cycles with multiple connections from two components

* Update distributed outputs at the correct time

* Remove Component inputs after it runs

* Add agent pipeline test case

* Fix infite loop test

* Handle some corner cases with loops checking and inputs deletion

* Fix tests

* Add new behavioral test

* Remove unused code in behavioural test

* Fix behavioural test

* Fix max run check

* Simplify outputs distribution

* Simplify subgraph run check

* Remove unused _init_run_queue function

* Remove commented code

* Add some missing type hints

* Simplify cycles breaking

* Fix _distribute_output test

* Fix _find_components_that_will_receive_no_input test

* Fix validation test

* Fix tracer losing Component inputs

* Fix some linting issues

* Remove ignore pylint rule

* Rename method that break cycles and make it raise

* Add docstring to _run_subgraph

* Update Pipeline.run() docstring

* Update comment to clarify cycles execution

* Remove SelfLoop sample Component

* Add behavioural test for unsupported cycles

* Rename behavioural test to be more specific

* Add new behavioural test

* Add release notes

* Remove commented out code and random pass

* Use more efficient function to find cycles

* Simplify _break_supported_cycles_in_graph by using defaultdict

* Stop breaking edges as soon as we make the graph acyclic

* Fix docstring and add some more comments

* Fix _distribute_output docstring

* Fix _find_receivers_from docstring

* More detailed release notes

* Minimize calls to networkx.is_directed_acyclic_graph

* Add some more info on edges keys

* Adjust components_in_cycles comment

* Add new Pipeline behavioural test

* Enhance _find_components_that_will_receive_no_input to cover more cases

* Explain why run_queue is reset after running a subgraph cycle

* Rename _init_inputs_state to _normalize_input_data

* Better explain the subgraph output distribution

* Remove for else

* Fix some comments and docstrings

* Fix linting

* Add missing return type

* Fix typo

* Rename _normalize_input_data to _normalize_varidiac_input_data and add more documentation

* Remove unused import

---------

Co-authored-by: Sebastian Husch Lee <sjrl423@gmail.com>
											
										
										
											2024-10-29 15:43:16 +01:00
+								    def _find_receivers_from(self, component_name: str) -> List[Tuple[str, OutputSocket, InputSocket]]:
 								        """
-												feat: Add Type Validation parameter for Pipeline Connections (#8875)

* Starting to refactor type util tests to be more systematic

* refactoring

* Expand tests

* Update to type utils

* Add missing subclass check

* Expand and refactor tests, introduce type_validation Literal

* More test refactoring

* Test refactoring, adding type validation variable to pipeline base

* Update relaxed version of type checking to pass all newly added tests

* trim whitespace

* Add tests

* cleanup

* Updates docstrings

* Add reno

* docs

* Fix mypy and add docstrings

* Changes based on advice from Tobi

* Remove unused imports

* Doc strings

* Add connection type validation to to_dict and from_dict

* Update tests

* Fix test

* Also save connection_type_validation at global pipeline level

* Fix tests

* Remove connection type validation from the connect level, only keep at pipeline level

* Formatting

* Fix tests

* formatting
											
										
										
											2025-03-03 16:00:22 +01:00
+								        Utility function to find all Components that receive input from `component_name`.
-												feat: Rework `Pipeline.run()` to better handle cycles (#8431)

* draft

* Enhance

* Almost works

* Simplify some parts and handle intermediate outputs

* Handle connections with default

* Handle cycles with multiple connections from two components

* Update distributed outputs at the correct time

* Remove Component inputs after it runs

* Add agent pipeline test case

* Fix infite loop test

* Handle some corner cases with loops checking and inputs deletion

* Fix tests

* Add new behavioral test

* Remove unused code in behavioural test

* Fix behavioural test

* Fix max run check

* Simplify outputs distribution

* Simplify subgraph run check

* Remove unused _init_run_queue function

* Remove commented code

* Add some missing type hints

* Simplify cycles breaking

* Fix _distribute_output test

* Fix _find_components_that_will_receive_no_input test

* Fix validation test

* Fix tracer losing Component inputs

* Fix some linting issues

* Remove ignore pylint rule

* Rename method that break cycles and make it raise

* Add docstring to _run_subgraph

* Update Pipeline.run() docstring

* Update comment to clarify cycles execution

* Remove SelfLoop sample Component

* Add behavioural test for unsupported cycles

* Rename behavioural test to be more specific

* Add new behavioural test

* Add release notes

* Remove commented out code and random pass

* Use more efficient function to find cycles

* Simplify _break_supported_cycles_in_graph by using defaultdict

* Stop breaking edges as soon as we make the graph acyclic

* Fix docstring and add some more comments

* Fix _distribute_output docstring

* Fix _find_receivers_from docstring

* More detailed release notes

* Minimize calls to networkx.is_directed_acyclic_graph

* Add some more info on edges keys

* Adjust components_in_cycles comment

* Add new Pipeline behavioural test

* Enhance _find_components_that_will_receive_no_input to cover more cases

* Explain why run_queue is reset after running a subgraph cycle

* Rename _init_inputs_state to _normalize_input_data

* Better explain the subgraph output distribution

* Remove for else

* Fix some comments and docstrings

* Fix linting

* Add missing return type

* Fix typo

* Rename _normalize_input_data to _normalize_varidiac_input_data and add more documentation

* Remove unused import

---------

Co-authored-by: Sebastian Husch Lee <sjrl423@gmail.com>
											
										
										
											2024-10-29 15:43:16 +01:00
 								        :param component_name:
 								            Name of the sender Component
 								        :returns:
 								            List of tuples containing name of the receiver Component and sender OutputSocket
 								            and receiver InputSocket instances
 								        """
 								        res = []
 								        for _, receiver_name, connection in self.graph.edges(nbunch=component_name, data=True):
 								            sender_socket: OutputSocket = connection["from_socket"]
 								            receiver_socket: InputSocket = connection["to_socket"]
 								            res.append((receiver_name, sender_socket, receiver_socket))
 								        return res
-												fix: pipeline run bugs in cyclic and acyclic pipelines (#8707)

* add component checks

* pipeline should run deterministically

* add FIFOQueue

* add agent tests

* add order dependent tests

* run new tests

* remove code that is not needed

* test: intermediate from cycle outputs are available outside cycle

* add tests for component checks (Claude)

* adapt tests for component checks (o1 review)

* chore: format

* remove tests that aren't needed anymore

* add _calculate_priority tests

* revert accidental change in pyproject.toml

* test format conversion

* adapt to naming convention

* chore: proper docstrings and type hints for PQ

* format

* add more unit tests

* rm unneeded comments

* test input consumption

* lint

* fix: docstrings

* lint

* format

* format

* fix license header

* fix license header

* add component run tests

* fix: pass correct input format to tracing

* fix types

* format

* format

* types

* add defaults from Socket instead of signature

- otherwise components with dynamic inputs would fail

* fix test names

* still wait for optional inputs on greedy variadic sockets

- mirrors previous behavior

* fix format

* wip: warn for ambiguous running order

* wip: alternative warning

* fix license header

* make code more readable

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* Introduce content tracing to a behavioral test

* Fixing linting

* Remove debug print statements

* Fix tracer tests

* remove print

* test: test for component inputs

* test: remove testing for run order

* chore: update component checks from experimental

* chore: update pipeline and base from experimental

* refactor: remove unused method

* refactor: remove unused method

* refactor: outdated comment

* refactor: inputs state is updated as side effect

- to prepare for AsyncPipeline implementation

* format

* test: add file conversion test

* format

* fix: original implementation deepcopies outputs

* lint

* fix: from_dict was updated

* fix: format

* fix: test

* test: add test for thread safety

* remove unused imports

* format

* test: FIFOPriorityQueue

* chore: add release note

* fix: resolve merge conflict with mermaid changes

* fix: format

* fix: remove unused import

* refactor: rename to avoid accidental conflicts

* chore: remove unused inputs, add missing license header

* chore: extend release notes

* Update releasenotes/notes/fix-pipeline-run-2fefeafc705a6d91.yaml

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* fix: format

* fix: format

* Update release note

---------

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>
Co-authored-by: David S. Batista <dsbatista@gmail.com>
											
										
										
											2025-02-06 15:19:47 +01:00
+								    @staticmethod
 								    def _convert_to_internal_format(pipeline_inputs: Dict[str, Any]) -> Dict[str, Dict[str, List]]:
-												refactor: Isolate logic that distributes Components output after run (#7845)

* Isolate logic that distributes Component outputs

* Handle variadic reset in correct place

* Move methods to PipelineBase

* Enhance variables and method names

* Add missing return type

* Update comment with correct variable name

* Add comment explaining conditional outputs

* Add variadic list assertion and enhance comment explaining the need of a list

* Rename to_remove_from_res to to_remove_from_component_result and enhance comment

* Split elif

* Enhance code to enqueue greedy variadic components

* Revert "Enhance code to enqueue greedy variadic components"

This reverts commit 052ceb889ec8ea100be6eab810cb06d5febea6fe.

* Enhance variadic greedy enqueue comment
											
										
										
											2024-06-14 15:53:28 +02:00
+								        """
-												fix: pipeline run bugs in cyclic and acyclic pipelines (#8707)

* add component checks

* pipeline should run deterministically

* add FIFOQueue

* add agent tests

* add order dependent tests

* run new tests

* remove code that is not needed

* test: intermediate from cycle outputs are available outside cycle

* add tests for component checks (Claude)

* adapt tests for component checks (o1 review)

* chore: format

* remove tests that aren't needed anymore

* add _calculate_priority tests

* revert accidental change in pyproject.toml

* test format conversion

* adapt to naming convention

* chore: proper docstrings and type hints for PQ

* format

* add more unit tests

* rm unneeded comments

* test input consumption

* lint

* fix: docstrings

* lint

* format

* format

* fix license header

* fix license header

* add component run tests

* fix: pass correct input format to tracing

* fix types

* format

* format

* types

* add defaults from Socket instead of signature

- otherwise components with dynamic inputs would fail

* fix test names

* still wait for optional inputs on greedy variadic sockets

- mirrors previous behavior

* fix format

* wip: warn for ambiguous running order

* wip: alternative warning

* fix license header

* make code more readable

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* Introduce content tracing to a behavioral test

* Fixing linting

* Remove debug print statements

* Fix tracer tests

* remove print

* test: test for component inputs

* test: remove testing for run order

* chore: update component checks from experimental

* chore: update pipeline and base from experimental

* refactor: remove unused method

* refactor: remove unused method

* refactor: outdated comment

* refactor: inputs state is updated as side effect

- to prepare for AsyncPipeline implementation

* format

* test: add file conversion test

* format

* fix: original implementation deepcopies outputs

* lint

* fix: from_dict was updated

* fix: format

* fix: test

* test: add test for thread safety

* remove unused imports

* format

* test: FIFOPriorityQueue

* chore: add release note

* fix: resolve merge conflict with mermaid changes

* fix: format

* fix: remove unused import

* refactor: rename to avoid accidental conflicts

* chore: remove unused inputs, add missing license header

* chore: extend release notes

* Update releasenotes/notes/fix-pipeline-run-2fefeafc705a6d91.yaml

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* fix: format

* fix: format

* Update release note

---------

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>
Co-authored-by: David S. Batista <dsbatista@gmail.com>
											
										
										
											2025-02-06 15:19:47 +01:00
+								        Converts the inputs to the pipeline to the format that is needed for the internal `Pipeline.run` logic.
-												refactor: Isolate logic that distributes Components output after run (#7845)

* Isolate logic that distributes Component outputs

* Handle variadic reset in correct place

* Move methods to PipelineBase

* Enhance variables and method names

* Add missing return type

* Update comment with correct variable name

* Add comment explaining conditional outputs

* Add variadic list assertion and enhance comment explaining the need of a list

* Rename to_remove_from_res to to_remove_from_component_result and enhance comment

* Split elif

* Enhance code to enqueue greedy variadic components

* Revert "Enhance code to enqueue greedy variadic components"

This reverts commit 052ceb889ec8ea100be6eab810cb06d5febea6fe.

* Enhance variadic greedy enqueue comment
											
										
										
											2024-06-14 15:53:28 +02:00
-												fix: pipeline run bugs in cyclic and acyclic pipelines (#8707)

* add component checks

* pipeline should run deterministically

* add FIFOQueue

* add agent tests

* add order dependent tests

* run new tests

* remove code that is not needed

* test: intermediate from cycle outputs are available outside cycle

* add tests for component checks (Claude)

* adapt tests for component checks (o1 review)

* chore: format

* remove tests that aren't needed anymore

* add _calculate_priority tests

* revert accidental change in pyproject.toml

* test format conversion

* adapt to naming convention

* chore: proper docstrings and type hints for PQ

* format

* add more unit tests

* rm unneeded comments

* test input consumption

* lint

* fix: docstrings

* lint

* format

* format

* fix license header

* fix license header

* add component run tests

* fix: pass correct input format to tracing

* fix types

* format

* format

* types

* add defaults from Socket instead of signature

- otherwise components with dynamic inputs would fail

* fix test names

* still wait for optional inputs on greedy variadic sockets

- mirrors previous behavior

* fix format

* wip: warn for ambiguous running order

* wip: alternative warning

* fix license header

* make code more readable

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* Introduce content tracing to a behavioral test

* Fixing linting

* Remove debug print statements

* Fix tracer tests

* remove print

* test: test for component inputs

* test: remove testing for run order

* chore: update component checks from experimental

* chore: update pipeline and base from experimental

* refactor: remove unused method

* refactor: remove unused method

* refactor: outdated comment

* refactor: inputs state is updated as side effect

- to prepare for AsyncPipeline implementation

* format

* test: add file conversion test

* format

* fix: original implementation deepcopies outputs

* lint

* fix: from_dict was updated

* fix: format

* fix: test

* test: add test for thread safety

* remove unused imports

* format

* test: FIFOPriorityQueue

* chore: add release note

* fix: resolve merge conflict with mermaid changes

* fix: format

* fix: remove unused import

* refactor: rename to avoid accidental conflicts

* chore: remove unused inputs, add missing license header

* chore: extend release notes

* Update releasenotes/notes/fix-pipeline-run-2fefeafc705a6d91.yaml

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* fix: format

* fix: format

* Update release note

---------

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>
Co-authored-by: David S. Batista <dsbatista@gmail.com>
											
										
										
											2025-02-06 15:19:47 +01:00
+								        Example Input:
 								        {'prompt_builder': {'question': 'Who lives in Paris?'}, 'retriever': {'query': 'Who lives in Paris?'}}
 								        Example Output:
 								        {'prompt_builder': {'question': [{'sender': None, 'value': 'Who lives in Paris?'}]},
 								         'retriever': {'query': [{'sender': None, 'value': 'Who lives in Paris?'}]}}
-												refactor: Isolate logic that distributes Components output after run (#7845)

* Isolate logic that distributes Component outputs

* Handle variadic reset in correct place

* Move methods to PipelineBase

* Enhance variables and method names

* Add missing return type

* Update comment with correct variable name

* Add comment explaining conditional outputs

* Add variadic list assertion and enhance comment explaining the need of a list

* Rename to_remove_from_res to to_remove_from_component_result and enhance comment

* Split elif

* Enhance code to enqueue greedy variadic components

* Revert "Enhance code to enqueue greedy variadic components"

This reverts commit 052ceb889ec8ea100be6eab810cb06d5febea6fe.

* Enhance variadic greedy enqueue comment
											
										
										
											2024-06-14 15:53:28 +02:00
-												fix: pipeline run bugs in cyclic and acyclic pipelines (#8707)

* add component checks

* pipeline should run deterministically

* add FIFOQueue

* add agent tests

* add order dependent tests

* run new tests

* remove code that is not needed

* test: intermediate from cycle outputs are available outside cycle

* add tests for component checks (Claude)

* adapt tests for component checks (o1 review)

* chore: format

* remove tests that aren't needed anymore

* add _calculate_priority tests

* revert accidental change in pyproject.toml

* test format conversion

* adapt to naming convention

* chore: proper docstrings and type hints for PQ

* format

* add more unit tests

* rm unneeded comments

* test input consumption

* lint

* fix: docstrings

* lint

* format

* format

* fix license header

* fix license header

* add component run tests

* fix: pass correct input format to tracing

* fix types

* format

* format

* types

* add defaults from Socket instead of signature

- otherwise components with dynamic inputs would fail

* fix test names

* still wait for optional inputs on greedy variadic sockets

- mirrors previous behavior

* fix format

* wip: warn for ambiguous running order

* wip: alternative warning

* fix license header

* make code more readable

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* Introduce content tracing to a behavioral test

* Fixing linting

* Remove debug print statements

* Fix tracer tests

* remove print

* test: test for component inputs

* test: remove testing for run order

* chore: update component checks from experimental

* chore: update pipeline and base from experimental

* refactor: remove unused method

* refactor: remove unused method

* refactor: outdated comment

* refactor: inputs state is updated as side effect

- to prepare for AsyncPipeline implementation

* format

* test: add file conversion test

* format

* fix: original implementation deepcopies outputs

* lint

* fix: from_dict was updated

* fix: format

* fix: test

* test: add test for thread safety

* remove unused imports

* format

* test: FIFOPriorityQueue

* chore: add release note

* fix: resolve merge conflict with mermaid changes

* fix: format

* fix: remove unused import

* refactor: rename to avoid accidental conflicts

* chore: remove unused inputs, add missing license header

* chore: extend release notes

* Update releasenotes/notes/fix-pipeline-run-2fefeafc705a6d91.yaml

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* fix: format

* fix: format

* Update release note

---------

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>
Co-authored-by: David S. Batista <dsbatista@gmail.com>
											
										
										
											2025-02-06 15:19:47 +01:00
+								        :param pipeline_inputs: Inputs to the pipeline.
 								        :returns: Converted inputs that can be used by the internal `Pipeline.run` logic.
 								        """
 								        inputs: Dict[str, Dict[str, List[Dict[str, Any]]]] = {}
 								        for component_name, socket_dict in pipeline_inputs.items():
 								            inputs[component_name] = {}
 								            for socket_name, value in socket_dict.items():
 								                inputs[component_name][socket_name] = [{"sender": None, "value": value}]
-												refactor: Isolate logic that distributes Components output after run (#7845)

* Isolate logic that distributes Component outputs

* Handle variadic reset in correct place

* Move methods to PipelineBase

* Enhance variables and method names

* Add missing return type

* Update comment with correct variable name

* Add comment explaining conditional outputs

* Add variadic list assertion and enhance comment explaining the need of a list

* Rename to_remove_from_res to to_remove_from_component_result and enhance comment

* Split elif

* Enhance code to enqueue greedy variadic components

* Revert "Enhance code to enqueue greedy variadic components"

This reverts commit 052ceb889ec8ea100be6eab810cb06d5febea6fe.

* Enhance variadic greedy enqueue comment
											
										
										
											2024-06-14 15:53:28 +02:00
-												fix: pipeline run bugs in cyclic and acyclic pipelines (#8707)

* add component checks

* pipeline should run deterministically

* add FIFOQueue

* add agent tests

* add order dependent tests

* run new tests

* remove code that is not needed

* test: intermediate from cycle outputs are available outside cycle

* add tests for component checks (Claude)

* adapt tests for component checks (o1 review)

* chore: format

* remove tests that aren't needed anymore

* add _calculate_priority tests

* revert accidental change in pyproject.toml

* test format conversion

* adapt to naming convention

* chore: proper docstrings and type hints for PQ

* format

* add more unit tests

* rm unneeded comments

* test input consumption

* lint

* fix: docstrings

* lint

* format

* format

* fix license header

* fix license header

* add component run tests

* fix: pass correct input format to tracing

* fix types

* format

* format

* types

* add defaults from Socket instead of signature

- otherwise components with dynamic inputs would fail

* fix test names

* still wait for optional inputs on greedy variadic sockets

- mirrors previous behavior

* fix format

* wip: warn for ambiguous running order

* wip: alternative warning

* fix license header

* make code more readable

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* Introduce content tracing to a behavioral test

* Fixing linting

* Remove debug print statements

* Fix tracer tests

* remove print

* test: test for component inputs

* test: remove testing for run order

* chore: update component checks from experimental

* chore: update pipeline and base from experimental

* refactor: remove unused method

* refactor: remove unused method

* refactor: outdated comment

* refactor: inputs state is updated as side effect

- to prepare for AsyncPipeline implementation

* format

* test: add file conversion test

* format

* fix: original implementation deepcopies outputs

* lint

* fix: from_dict was updated

* fix: format

* fix: test

* test: add test for thread safety

* remove unused imports

* format

* test: FIFOPriorityQueue

* chore: add release note

* fix: resolve merge conflict with mermaid changes

* fix: format

* fix: remove unused import

* refactor: rename to avoid accidental conflicts

* chore: remove unused inputs, add missing license header

* chore: extend release notes

* Update releasenotes/notes/fix-pipeline-run-2fefeafc705a6d91.yaml

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* fix: format

* fix: format

* Update release note

---------

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>
Co-authored-by: David S. Batista <dsbatista@gmail.com>
											
										
										
											2025-02-06 15:19:47 +01:00
+								        return inputs
-												feat: Rework `Pipeline.run()` to better handle cycles (#8431)

* draft

* Enhance

* Almost works

* Simplify some parts and handle intermediate outputs

* Handle connections with default

* Handle cycles with multiple connections from two components

* Update distributed outputs at the correct time

* Remove Component inputs after it runs

* Add agent pipeline test case

* Fix infite loop test

* Handle some corner cases with loops checking and inputs deletion

* Fix tests

* Add new behavioral test

* Remove unused code in behavioural test

* Fix behavioural test

* Fix max run check

* Simplify outputs distribution

* Simplify subgraph run check

* Remove unused _init_run_queue function

* Remove commented code

* Add some missing type hints

* Simplify cycles breaking

* Fix _distribute_output test

* Fix _find_components_that_will_receive_no_input test

* Fix validation test

* Fix tracer losing Component inputs

* Fix some linting issues

* Remove ignore pylint rule

* Rename method that break cycles and make it raise

* Add docstring to _run_subgraph

* Update Pipeline.run() docstring

* Update comment to clarify cycles execution

* Remove SelfLoop sample Component

* Add behavioural test for unsupported cycles

* Rename behavioural test to be more specific

* Add new behavioural test

* Add release notes

* Remove commented out code and random pass

* Use more efficient function to find cycles

* Simplify _break_supported_cycles_in_graph by using defaultdict

* Stop breaking edges as soon as we make the graph acyclic

* Fix docstring and add some more comments

* Fix _distribute_output docstring

* Fix _find_receivers_from docstring

* More detailed release notes

* Minimize calls to networkx.is_directed_acyclic_graph

* Add some more info on edges keys

* Adjust components_in_cycles comment

* Add new Pipeline behavioural test

* Enhance _find_components_that_will_receive_no_input to cover more cases

* Explain why run_queue is reset after running a subgraph cycle

* Rename _init_inputs_state to _normalize_input_data

* Better explain the subgraph output distribution

* Remove for else

* Fix some comments and docstrings

* Fix linting

* Add missing return type

* Fix typo

* Rename _normalize_input_data to _normalize_varidiac_input_data and add more documentation

* Remove unused import

---------

Co-authored-by: Sebastian Husch Lee <sjrl423@gmail.com>
											
										
										
											2024-10-29 15:43:16 +01:00
-												fix: pipeline run bugs in cyclic and acyclic pipelines (#8707)

* add component checks

* pipeline should run deterministically

* add FIFOQueue

* add agent tests

* add order dependent tests

* run new tests

* remove code that is not needed

* test: intermediate from cycle outputs are available outside cycle

* add tests for component checks (Claude)

* adapt tests for component checks (o1 review)

* chore: format

* remove tests that aren't needed anymore

* add _calculate_priority tests

* revert accidental change in pyproject.toml

* test format conversion

* adapt to naming convention

* chore: proper docstrings and type hints for PQ

* format

* add more unit tests

* rm unneeded comments

* test input consumption

* lint

* fix: docstrings

* lint

* format

* format

* fix license header

* fix license header

* add component run tests

* fix: pass correct input format to tracing

* fix types

* format

* format

* types

* add defaults from Socket instead of signature

- otherwise components with dynamic inputs would fail

* fix test names

* still wait for optional inputs on greedy variadic sockets

- mirrors previous behavior

* fix format

* wip: warn for ambiguous running order

* wip: alternative warning

* fix license header

* make code more readable

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* Introduce content tracing to a behavioral test

* Fixing linting

* Remove debug print statements

* Fix tracer tests

* remove print

* test: test for component inputs

* test: remove testing for run order

* chore: update component checks from experimental

* chore: update pipeline and base from experimental

* refactor: remove unused method

* refactor: remove unused method

* refactor: outdated comment

* refactor: inputs state is updated as side effect

- to prepare for AsyncPipeline implementation

* format

* test: add file conversion test

* format

* fix: original implementation deepcopies outputs

* lint

* fix: from_dict was updated

* fix: format

* fix: test

* test: add test for thread safety

* remove unused imports

* format

* test: FIFOPriorityQueue

* chore: add release note

* fix: resolve merge conflict with mermaid changes

* fix: format

* fix: remove unused import

* refactor: rename to avoid accidental conflicts

* chore: remove unused inputs, add missing license header

* chore: extend release notes

* Update releasenotes/notes/fix-pipeline-run-2fefeafc705a6d91.yaml

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* fix: format

* fix: format

* Update release note

---------

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>
Co-authored-by: David S. Batista <dsbatista@gmail.com>
											
										
										
											2025-02-06 15:19:47 +01:00
+								    @staticmethod
-												feat: adding debugging breakpoints to `Pipeline` and `Agent` (#9611)

* wip: fixing tests

* wip: fixing tests

* wip: fixing tests

* wip: fixing tests

* fixing circular imports

* decoupling resume and initial run() for agent

* adding release notes

* re-raising BreakPointException from pipeline.run()

* fixing imports

* refactor: Refactor suggestions for Pipeline breakpoints (#9614)

* Refactoring

* Start adding debug_path into Breakpoint class

* Fully move debug_path into Breakpoint dataclass

* Simplifications in pipeline run logic

* More simplification

* lint

* More simplification

* Updates

* Rename resume_state to pipeline_snapshot

* PR comments

* Missed renaming of state in a few more places

* feat: Add dataclasses to represent a `PipelineSnapshot` and refactored to use it (#9619)

* Refactor to use dataclasses for PipelineSnapshot and AgentSnapshot

* Fix integration tests

* Mypy

* Fix mypy

* Fix lint

* Refactor AgentSnapshot to only contain needed info

* Fix mypy

* More refactoring

* removing unused import

---------

Co-authored-by: David S. Batista <dsbatista@gmail.com>

* feat: saving include_outputs_from intermediate results to `PipelineState` object (#9629)

* saving intermediate components results in include_outputs_from into the PipelineSnaptshot

* cleaning up

* fixing tests

* fixing tests

* extending tests

* Update haystack/dataclasses/breakpoints.py

Co-authored-by: Sebastian Husch Lee <10526848+sjrl@users.noreply.github.com>

* Update haystack/dataclasses/breakpoints.py

Co-authored-by: Sebastian Husch Lee <10526848+sjrl@users.noreply.github.com>

* linting

* moving intermediate results to pipeline state and adding pipeline outputs to state

* moving ordered_component_names and include_outputs_from to PipelineSnapshot

* moving original_input_data to PipelineSnapshot

* simplifying saving the intermediate results

* Update haystack/dataclasses/breakpoints.py

Co-authored-by: Sebastian Husch Lee <10526848+sjrl@users.noreply.github.com>

* Update haystack/dataclasses/breakpoints.py

Co-authored-by: Sebastian Husch Lee <10526848+sjrl@users.noreply.github.com>

* Update haystack/dataclasses/breakpoints.py

Co-authored-by: Sebastian Husch Lee <10526848+sjrl@users.noreply.github.com>

* Update haystack/dataclasses/breakpoints.py

Co-authored-by: Sebastian Husch Lee <10526848+sjrl@users.noreply.github.com>

---------

Co-authored-by: Sebastian Husch Lee <10526848+sjrl@users.noreply.github.com>

* linting

* cleaning up

* avoiding creating PipelineSnapshot for every component run

* removing unecessary code

* Update checks in Agent to not unecessarily create AgentSnapshot when not needed.

* Update haystack/components/agents/agent.py

Co-authored-by: Sebastian Husch Lee <10526848+sjrl@users.noreply.github.com>

* Update haystack/components/agents/agent.py

Co-authored-by: Sebastian Husch Lee <10526848+sjrl@users.noreply.github.com>

* cleaning up tests

* linting

---------

Co-authored-by: Sebastian Husch Lee <10526848+sjrl@users.noreply.github.com>
Co-authored-by: Sebastian Husch Lee <sjrl423@gmail.com>
											
										
										
											2025-07-24 09:54:23 +01:00
+								    def _consume_component_inputs(
 								        component_name: str, component: Dict, inputs: Dict, is_resume: bool = False
 								    ) -> Dict[str, Any]:
-												fix: pipeline run bugs in cyclic and acyclic pipelines (#8707)

* add component checks

* pipeline should run deterministically

* add FIFOQueue

* add agent tests

* add order dependent tests

* run new tests

* remove code that is not needed

* test: intermediate from cycle outputs are available outside cycle

* add tests for component checks (Claude)

* adapt tests for component checks (o1 review)

* chore: format

* remove tests that aren't needed anymore

* add _calculate_priority tests

* revert accidental change in pyproject.toml

* test format conversion

* adapt to naming convention

* chore: proper docstrings and type hints for PQ

* format

* add more unit tests

* rm unneeded comments

* test input consumption

* lint

* fix: docstrings

* lint

* format

* format

* fix license header

* fix license header

* add component run tests

* fix: pass correct input format to tracing

* fix types

* format

* format

* types

* add defaults from Socket instead of signature

- otherwise components with dynamic inputs would fail

* fix test names

* still wait for optional inputs on greedy variadic sockets

- mirrors previous behavior

* fix format

* wip: warn for ambiguous running order

* wip: alternative warning

* fix license header

* make code more readable

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* Introduce content tracing to a behavioral test

* Fixing linting

* Remove debug print statements

* Fix tracer tests

* remove print

* test: test for component inputs

* test: remove testing for run order

* chore: update component checks from experimental

* chore: update pipeline and base from experimental

* refactor: remove unused method

* refactor: remove unused method

* refactor: outdated comment

* refactor: inputs state is updated as side effect

- to prepare for AsyncPipeline implementation

* format

* test: add file conversion test

* format

* fix: original implementation deepcopies outputs

* lint

* fix: from_dict was updated

* fix: format

* fix: test

* test: add test for thread safety

* remove unused imports

* format

* test: FIFOPriorityQueue

* chore: add release note

* fix: resolve merge conflict with mermaid changes

* fix: format

* fix: remove unused import

* refactor: rename to avoid accidental conflicts

* chore: remove unused inputs, add missing license header

* chore: extend release notes

* Update releasenotes/notes/fix-pipeline-run-2fefeafc705a6d91.yaml

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* fix: format

* fix: format

* Update release note

---------

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>
Co-authored-by: David S. Batista <dsbatista@gmail.com>
											
										
										
											2025-02-06 15:19:47 +01:00
+								        """
 								        Extracts the inputs needed to run for the component and removes them from the global inputs state.
 								        :param component_name: The name of a component.
 								        :param component: Component with component metadata.
 								        :param inputs: Global inputs state.
 								        :returns: The inputs for the component.
 								        """
 								        component_inputs = inputs.get(component_name, {})
 								        consumed_inputs = {}
 								        greedy_inputs_to_remove = set()
 								        for socket_name, socket in component["input_sockets"].items():
 								            socket_inputs = component_inputs.get(socket_name, [])
-												fix: component checks failing for components that return dataframes (#8873)

* fix: use is not to compare to sentinel value

* chore: release notes

* Update releasenotes/notes/fix-component-checks-with-ambiguous-truth-values-949c447b3702e427.yaml

Co-authored-by: David S. Batista <dsbatista@gmail.com>

* fix: another sentinel value

* test: also test base class

* add pandas as test dependency

* format

* Trigger CI

* mark test with xfail strict=False

---------

Co-authored-by: Sebastian Husch Lee <sjrl@users.noreply.github.com>
Co-authored-by: David S. Batista <dsbatista@gmail.com>
Co-authored-by: anakin87 <stefanofiorucci@gmail.com>
											
										
										
											2025-02-19 10:10:48 +01:00
+								            socket_inputs = [sock["value"] for sock in socket_inputs if sock["value"] is not _NO_OUTPUT_PRODUCED]
-												feat: adding debugging breakpoints to `Pipeline` and `Agent` (#9611)

* wip: fixing tests

* wip: fixing tests

* wip: fixing tests

* wip: fixing tests

* fixing circular imports

* decoupling resume and initial run() for agent

* adding release notes

* re-raising BreakPointException from pipeline.run()

* fixing imports

* refactor: Refactor suggestions for Pipeline breakpoints (#9614)

* Refactoring

* Start adding debug_path into Breakpoint class

* Fully move debug_path into Breakpoint dataclass

* Simplifications in pipeline run logic

* More simplification

* lint

* More simplification

* Updates

* Rename resume_state to pipeline_snapshot

* PR comments

* Missed renaming of state in a few more places

* feat: Add dataclasses to represent a `PipelineSnapshot` and refactored to use it (#9619)

* Refactor to use dataclasses for PipelineSnapshot and AgentSnapshot

* Fix integration tests

* Mypy

* Fix mypy

* Fix lint

* Refactor AgentSnapshot to only contain needed info

* Fix mypy

* More refactoring

* removing unused import

---------

Co-authored-by: David S. Batista <dsbatista@gmail.com>

* feat: saving include_outputs_from intermediate results to `PipelineState` object (#9629)

* saving intermediate components results in include_outputs_from into the PipelineSnaptshot

* cleaning up

* fixing tests

* fixing tests

* extending tests

* Update haystack/dataclasses/breakpoints.py

Co-authored-by: Sebastian Husch Lee <10526848+sjrl@users.noreply.github.com>

* Update haystack/dataclasses/breakpoints.py

Co-authored-by: Sebastian Husch Lee <10526848+sjrl@users.noreply.github.com>

* linting

* moving intermediate results to pipeline state and adding pipeline outputs to state

* moving ordered_component_names and include_outputs_from to PipelineSnapshot

* moving original_input_data to PipelineSnapshot

* simplifying saving the intermediate results

* Update haystack/dataclasses/breakpoints.py

Co-authored-by: Sebastian Husch Lee <10526848+sjrl@users.noreply.github.com>

* Update haystack/dataclasses/breakpoints.py

Co-authored-by: Sebastian Husch Lee <10526848+sjrl@users.noreply.github.com>

* Update haystack/dataclasses/breakpoints.py

Co-authored-by: Sebastian Husch Lee <10526848+sjrl@users.noreply.github.com>

* Update haystack/dataclasses/breakpoints.py

Co-authored-by: Sebastian Husch Lee <10526848+sjrl@users.noreply.github.com>

---------

Co-authored-by: Sebastian Husch Lee <10526848+sjrl@users.noreply.github.com>

* linting

* cleaning up

* avoiding creating PipelineSnapshot for every component run

* removing unecessary code

* Update checks in Agent to not unecessarily create AgentSnapshot when not needed.

* Update haystack/components/agents/agent.py

Co-authored-by: Sebastian Husch Lee <10526848+sjrl@users.noreply.github.com>

* Update haystack/components/agents/agent.py

Co-authored-by: Sebastian Husch Lee <10526848+sjrl@users.noreply.github.com>

* cleaning up tests

* linting

---------

Co-authored-by: Sebastian Husch Lee <10526848+sjrl@users.noreply.github.com>
Co-authored-by: Sebastian Husch Lee <sjrl423@gmail.com>
											
										
										
											2025-07-24 09:54:23 +01:00
 								            # if we are resuming a component, the inputs are already consumed, so we just return the first input
 								            if is_resume:
 								                consumed_inputs[socket_name] = socket_inputs[0]
 								                continue
-												fix: pipeline run bugs in cyclic and acyclic pipelines (#8707)

* add component checks

* pipeline should run deterministically

* add FIFOQueue

* add agent tests

* add order dependent tests

* run new tests

* remove code that is not needed

* test: intermediate from cycle outputs are available outside cycle

* add tests for component checks (Claude)

* adapt tests for component checks (o1 review)

* chore: format

* remove tests that aren't needed anymore

* add _calculate_priority tests

* revert accidental change in pyproject.toml

* test format conversion

* adapt to naming convention

* chore: proper docstrings and type hints for PQ

* format

* add more unit tests

* rm unneeded comments

* test input consumption

* lint

* fix: docstrings

* lint

* format

* format

* fix license header

* fix license header

* add component run tests

* fix: pass correct input format to tracing

* fix types

* format

* format

* types

* add defaults from Socket instead of signature

- otherwise components with dynamic inputs would fail

* fix test names

* still wait for optional inputs on greedy variadic sockets

- mirrors previous behavior

* fix format

* wip: warn for ambiguous running order

* wip: alternative warning

* fix license header

* make code more readable

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* Introduce content tracing to a behavioral test

* Fixing linting

* Remove debug print statements

* Fix tracer tests

* remove print

* test: test for component inputs

* test: remove testing for run order

* chore: update component checks from experimental

* chore: update pipeline and base from experimental

* refactor: remove unused method

* refactor: remove unused method

* refactor: outdated comment

* refactor: inputs state is updated as side effect

- to prepare for AsyncPipeline implementation

* format

* test: add file conversion test

* format

* fix: original implementation deepcopies outputs

* lint

* fix: from_dict was updated

* fix: format

* fix: test

* test: add test for thread safety

* remove unused imports

* format

* test: FIFOPriorityQueue

* chore: add release note

* fix: resolve merge conflict with mermaid changes

* fix: format

* fix: remove unused import

* refactor: rename to avoid accidental conflicts

* chore: remove unused inputs, add missing license header

* chore: extend release notes

* Update releasenotes/notes/fix-pipeline-run-2fefeafc705a6d91.yaml

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* fix: format

* fix: format

* Update release note

---------

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>
Co-authored-by: David S. Batista <dsbatista@gmail.com>
											
										
										
											2025-02-06 15:19:47 +01:00
+								            if socket_inputs:
 								                if not socket.is_variadic:
 								                    # We only care about the first input provided to the socket.
 								                    consumed_inputs[socket_name] = socket_inputs[0]
 								                elif socket.is_greedy:
 								                    # We need to keep track of greedy inputs because we always remove them, even if they come from
 								                    # outside the pipeline. Otherwise, a greedy input from the user would trigger a pipeline to run
 								                    # indefinitely.
 								                    greedy_inputs_to_remove.add(socket_name)
 								                    consumed_inputs[socket_name] = [socket_inputs[0]]
 								                elif is_socket_lazy_variadic(socket):
 								                    # We use all inputs provided to the socket on a lazy variadic socket.
 								                    consumed_inputs[socket_name] = socket_inputs
 								        # We prune all inputs except for those that were provided from outside the pipeline (e.g. user inputs).
 								        pruned_inputs = {
 								            socket_name: [
 								                sock for sock in socket if sock["sender"] is None and not socket_name in greedy_inputs_to_remove
 								            ]
 								            for socket_name, socket in component_inputs.items()
 								        }
 								        pruned_inputs = {socket_name: socket for socket_name, socket in pruned_inputs.items() if len(socket) > 0}
-												Fix Pipeline skipping a Component with Variadic input (#8347)

* Fix Pipeline skipping a Component with Variadic input

* Simplify _find_components_that_will_receive_no_input
											
										
										
											2024-09-10 14:59:53 +02:00
-												fix: pipeline run bugs in cyclic and acyclic pipelines (#8707)

* add component checks

* pipeline should run deterministically

* add FIFOQueue

* add agent tests

* add order dependent tests

* run new tests

* remove code that is not needed

* test: intermediate from cycle outputs are available outside cycle

* add tests for component checks (Claude)

* adapt tests for component checks (o1 review)

* chore: format

* remove tests that aren't needed anymore

* add _calculate_priority tests

* revert accidental change in pyproject.toml

* test format conversion

* adapt to naming convention

* chore: proper docstrings and type hints for PQ

* format

* add more unit tests

* rm unneeded comments

* test input consumption

* lint

* fix: docstrings

* lint

* format

* format

* fix license header

* fix license header

* add component run tests

* fix: pass correct input format to tracing

* fix types

* format

* format

* types

* add defaults from Socket instead of signature

- otherwise components with dynamic inputs would fail

* fix test names

* still wait for optional inputs on greedy variadic sockets

- mirrors previous behavior

* fix format

* wip: warn for ambiguous running order

* wip: alternative warning

* fix license header

* make code more readable

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* Introduce content tracing to a behavioral test

* Fixing linting

* Remove debug print statements

* Fix tracer tests

* remove print

* test: test for component inputs

* test: remove testing for run order

* chore: update component checks from experimental

* chore: update pipeline and base from experimental

* refactor: remove unused method

* refactor: remove unused method

* refactor: outdated comment

* refactor: inputs state is updated as side effect

- to prepare for AsyncPipeline implementation

* format

* test: add file conversion test

* format

* fix: original implementation deepcopies outputs

* lint

* fix: from_dict was updated

* fix: format

* fix: test

* test: add test for thread safety

* remove unused imports

* format

* test: FIFOPriorityQueue

* chore: add release note

* fix: resolve merge conflict with mermaid changes

* fix: format

* fix: remove unused import

* refactor: rename to avoid accidental conflicts

* chore: remove unused inputs, add missing license header

* chore: extend release notes

* Update releasenotes/notes/fix-pipeline-run-2fefeafc705a6d91.yaml

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* fix: format

* fix: format

* Update release note

---------

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>
Co-authored-by: David S. Batista <dsbatista@gmail.com>
											
										
										
											2025-02-06 15:19:47 +01:00
+								        inputs[component_name] = pruned_inputs
 								        return consumed_inputs
-												refactor: Isolate logic that finds next runnable component waiting for input (#7880)

* Fix formatting

* Isolate logic that finds next runnable component waiting for input

* Explain more lazy variadics

* Enhance logic following review suggestions

* Simplify code to use a single for

* Fix test
											
										
										
											2024-06-18 16:43:19 +02:00
-												fix: pipeline run bugs in cyclic and acyclic pipelines (#8707)

* add component checks

* pipeline should run deterministically

* add FIFOQueue

* add agent tests

* add order dependent tests

* run new tests

* remove code that is not needed

* test: intermediate from cycle outputs are available outside cycle

* add tests for component checks (Claude)

* adapt tests for component checks (o1 review)

* chore: format

* remove tests that aren't needed anymore

* add _calculate_priority tests

* revert accidental change in pyproject.toml

* test format conversion

* adapt to naming convention

* chore: proper docstrings and type hints for PQ

* format

* add more unit tests

* rm unneeded comments

* test input consumption

* lint

* fix: docstrings

* lint

* format

* format

* fix license header

* fix license header

* add component run tests

* fix: pass correct input format to tracing

* fix types

* format

* format

* types

* add defaults from Socket instead of signature

- otherwise components with dynamic inputs would fail

* fix test names

* still wait for optional inputs on greedy variadic sockets

- mirrors previous behavior

* fix format

* wip: warn for ambiguous running order

* wip: alternative warning

* fix license header

* make code more readable

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* Introduce content tracing to a behavioral test

* Fixing linting

* Remove debug print statements

* Fix tracer tests

* remove print

* test: test for component inputs

* test: remove testing for run order

* chore: update component checks from experimental

* chore: update pipeline and base from experimental

* refactor: remove unused method

* refactor: remove unused method

* refactor: outdated comment

* refactor: inputs state is updated as side effect

- to prepare for AsyncPipeline implementation

* format

* test: add file conversion test

* format

* fix: original implementation deepcopies outputs

* lint

* fix: from_dict was updated

* fix: format

* fix: test

* test: add test for thread safety

* remove unused imports

* format

* test: FIFOPriorityQueue

* chore: add release note

* fix: resolve merge conflict with mermaid changes

* fix: format

* fix: remove unused import

* refactor: rename to avoid accidental conflicts

* chore: remove unused inputs, add missing license header

* chore: extend release notes

* Update releasenotes/notes/fix-pipeline-run-2fefeafc705a6d91.yaml

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* fix: format

* fix: format

* Update release note

---------

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>
Co-authored-by: David S. Batista <dsbatista@gmail.com>
											
										
										
											2025-02-06 15:19:47 +01:00
+								    def _fill_queue(
 								        self, component_names: List[str], inputs: Dict[str, Any], component_visits: Dict[str, int]
 								    ) -> FIFOPriorityQueue:
-												feat: Rework `Pipeline.run()` to better handle cycles (#8431)

* draft

* Enhance

* Almost works

* Simplify some parts and handle intermediate outputs

* Handle connections with default

* Handle cycles with multiple connections from two components

* Update distributed outputs at the correct time

* Remove Component inputs after it runs

* Add agent pipeline test case

* Fix infite loop test

* Handle some corner cases with loops checking and inputs deletion

* Fix tests

* Add new behavioral test

* Remove unused code in behavioural test

* Fix behavioural test

* Fix max run check

* Simplify outputs distribution

* Simplify subgraph run check

* Remove unused _init_run_queue function

* Remove commented code

* Add some missing type hints

* Simplify cycles breaking

* Fix _distribute_output test

* Fix _find_components_that_will_receive_no_input test

* Fix validation test

* Fix tracer losing Component inputs

* Fix some linting issues

* Remove ignore pylint rule

* Rename method that break cycles and make it raise

* Add docstring to _run_subgraph

* Update Pipeline.run() docstring

* Update comment to clarify cycles execution

* Remove SelfLoop sample Component

* Add behavioural test for unsupported cycles

* Rename behavioural test to be more specific

* Add new behavioural test

* Add release notes

* Remove commented out code and random pass

* Use more efficient function to find cycles

* Simplify _break_supported_cycles_in_graph by using defaultdict

* Stop breaking edges as soon as we make the graph acyclic

* Fix docstring and add some more comments

* Fix _distribute_output docstring

* Fix _find_receivers_from docstring

* More detailed release notes

* Minimize calls to networkx.is_directed_acyclic_graph

* Add some more info on edges keys

* Adjust components_in_cycles comment

* Add new Pipeline behavioural test

* Enhance _find_components_that_will_receive_no_input to cover more cases

* Explain why run_queue is reset after running a subgraph cycle

* Rename _init_inputs_state to _normalize_input_data

* Better explain the subgraph output distribution

* Remove for else

* Fix some comments and docstrings

* Fix linting

* Add missing return type

* Fix typo

* Rename _normalize_input_data to _normalize_varidiac_input_data and add more documentation

* Remove unused import

---------

Co-authored-by: Sebastian Husch Lee <sjrl423@gmail.com>
											
										
										
											2024-10-29 15:43:16 +01:00
+								        """
-												fix: pipeline run bugs in cyclic and acyclic pipelines (#8707)

* add component checks

* pipeline should run deterministically

* add FIFOQueue

* add agent tests

* add order dependent tests

* run new tests

* remove code that is not needed

* test: intermediate from cycle outputs are available outside cycle

* add tests for component checks (Claude)

* adapt tests for component checks (o1 review)

* chore: format

* remove tests that aren't needed anymore

* add _calculate_priority tests

* revert accidental change in pyproject.toml

* test format conversion

* adapt to naming convention

* chore: proper docstrings and type hints for PQ

* format

* add more unit tests

* rm unneeded comments

* test input consumption

* lint

* fix: docstrings

* lint

* format

* format

* fix license header

* fix license header

* add component run tests

* fix: pass correct input format to tracing

* fix types

* format

* format

* types

* add defaults from Socket instead of signature

- otherwise components with dynamic inputs would fail

* fix test names

* still wait for optional inputs on greedy variadic sockets

- mirrors previous behavior

* fix format

* wip: warn for ambiguous running order

* wip: alternative warning

* fix license header

* make code more readable

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* Introduce content tracing to a behavioral test

* Fixing linting

* Remove debug print statements

* Fix tracer tests

* remove print

* test: test for component inputs

* test: remove testing for run order

* chore: update component checks from experimental

* chore: update pipeline and base from experimental

* refactor: remove unused method

* refactor: remove unused method

* refactor: outdated comment

* refactor: inputs state is updated as side effect

- to prepare for AsyncPipeline implementation

* format

* test: add file conversion test

* format

* fix: original implementation deepcopies outputs

* lint

* fix: from_dict was updated

* fix: format

* fix: test

* test: add test for thread safety

* remove unused imports

* format

* test: FIFOPriorityQueue

* chore: add release note

* fix: resolve merge conflict with mermaid changes

* fix: format

* fix: remove unused import

* refactor: rename to avoid accidental conflicts

* chore: remove unused inputs, add missing license header

* chore: extend release notes

* Update releasenotes/notes/fix-pipeline-run-2fefeafc705a6d91.yaml

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* fix: format

* fix: format

* Update release note

---------

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>
Co-authored-by: David S. Batista <dsbatista@gmail.com>
											
										
										
											2025-02-06 15:19:47 +01:00
+								        Calculates the execution priority for each component and inserts it into the priority queue.
-												feat: Rework `Pipeline.run()` to better handle cycles (#8431)

* draft

* Enhance

* Almost works

* Simplify some parts and handle intermediate outputs

* Handle connections with default

* Handle cycles with multiple connections from two components

* Update distributed outputs at the correct time

* Remove Component inputs after it runs

* Add agent pipeline test case

* Fix infite loop test

* Handle some corner cases with loops checking and inputs deletion

* Fix tests

* Add new behavioral test

* Remove unused code in behavioural test

* Fix behavioural test

* Fix max run check

* Simplify outputs distribution

* Simplify subgraph run check

* Remove unused _init_run_queue function

* Remove commented code

* Add some missing type hints

* Simplify cycles breaking

* Fix _distribute_output test

* Fix _find_components_that_will_receive_no_input test

* Fix validation test

* Fix tracer losing Component inputs

* Fix some linting issues

* Remove ignore pylint rule

* Rename method that break cycles and make it raise

* Add docstring to _run_subgraph

* Update Pipeline.run() docstring

* Update comment to clarify cycles execution

* Remove SelfLoop sample Component

* Add behavioural test for unsupported cycles

* Rename behavioural test to be more specific

* Add new behavioural test

* Add release notes

* Remove commented out code and random pass

* Use more efficient function to find cycles

* Simplify _break_supported_cycles_in_graph by using defaultdict

* Stop breaking edges as soon as we make the graph acyclic

* Fix docstring and add some more comments

* Fix _distribute_output docstring

* Fix _find_receivers_from docstring

* More detailed release notes

* Minimize calls to networkx.is_directed_acyclic_graph

* Add some more info on edges keys

* Adjust components_in_cycles comment

* Add new Pipeline behavioural test

* Enhance _find_components_that_will_receive_no_input to cover more cases

* Explain why run_queue is reset after running a subgraph cycle

* Rename _init_inputs_state to _normalize_input_data

* Better explain the subgraph output distribution

* Remove for else

* Fix some comments and docstrings

* Fix linting

* Add missing return type

* Fix typo

* Rename _normalize_input_data to _normalize_varidiac_input_data and add more documentation

* Remove unused import

---------

Co-authored-by: Sebastian Husch Lee <sjrl423@gmail.com>
											
										
										
											2024-10-29 15:43:16 +01:00
-												fix: pipeline run bugs in cyclic and acyclic pipelines (#8707)

* add component checks

* pipeline should run deterministically

* add FIFOQueue

* add agent tests

* add order dependent tests

* run new tests

* remove code that is not needed

* test: intermediate from cycle outputs are available outside cycle

* add tests for component checks (Claude)

* adapt tests for component checks (o1 review)

* chore: format

* remove tests that aren't needed anymore

* add _calculate_priority tests

* revert accidental change in pyproject.toml

* test format conversion

* adapt to naming convention

* chore: proper docstrings and type hints for PQ

* format

* add more unit tests

* rm unneeded comments

* test input consumption

* lint

* fix: docstrings

* lint

* format

* format

* fix license header

* fix license header

* add component run tests

* fix: pass correct input format to tracing

* fix types

* format

* format

* types

* add defaults from Socket instead of signature

- otherwise components with dynamic inputs would fail

* fix test names

* still wait for optional inputs on greedy variadic sockets

- mirrors previous behavior

* fix format

* wip: warn for ambiguous running order

* wip: alternative warning

* fix license header

* make code more readable

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* Introduce content tracing to a behavioral test

* Fixing linting

* Remove debug print statements

* Fix tracer tests

* remove print

* test: test for component inputs

* test: remove testing for run order

* chore: update component checks from experimental

* chore: update pipeline and base from experimental

* refactor: remove unused method

* refactor: remove unused method

* refactor: outdated comment

* refactor: inputs state is updated as side effect

- to prepare for AsyncPipeline implementation

* format

* test: add file conversion test

* format

* fix: original implementation deepcopies outputs

* lint

* fix: from_dict was updated

* fix: format

* fix: test

* test: add test for thread safety

* remove unused imports

* format

* test: FIFOPriorityQueue

* chore: add release note

* fix: resolve merge conflict with mermaid changes

* fix: format

* fix: remove unused import

* refactor: rename to avoid accidental conflicts

* chore: remove unused inputs, add missing license header

* chore: extend release notes

* Update releasenotes/notes/fix-pipeline-run-2fefeafc705a6d91.yaml

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* fix: format

* fix: format

* Update release note

---------

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>
Co-authored-by: David S. Batista <dsbatista@gmail.com>
											
										
										
											2025-02-06 15:19:47 +01:00
+								        :param component_names: Names of the components to put into the queue.
 								        :param inputs: Inputs to the components.
 								        :param component_visits: Current state of component visits.
 								        :returns: A prioritized queue of component names.
 								        """
 								        priority_queue = FIFOPriorityQueue()
 								        for component_name in component_names:
 								            component = self._get_component_with_graph_metadata_and_visits(
 								                component_name, component_visits[component_name]
 								            )
 								            priority = self._calculate_priority(component, inputs.get(component_name, {}))
 								            priority_queue.push(component_name, priority)
-												feat: Rework `Pipeline.run()` to better handle cycles (#8431)

* draft

* Enhance

* Almost works

* Simplify some parts and handle intermediate outputs

* Handle connections with default

* Handle cycles with multiple connections from two components

* Update distributed outputs at the correct time

* Remove Component inputs after it runs

* Add agent pipeline test case

* Fix infite loop test

* Handle some corner cases with loops checking and inputs deletion

* Fix tests

* Add new behavioral test

* Remove unused code in behavioural test

* Fix behavioural test

* Fix max run check

* Simplify outputs distribution

* Simplify subgraph run check

* Remove unused _init_run_queue function

* Remove commented code

* Add some missing type hints

* Simplify cycles breaking

* Fix _distribute_output test

* Fix _find_components_that_will_receive_no_input test

* Fix validation test

* Fix tracer losing Component inputs

* Fix some linting issues

* Remove ignore pylint rule

* Rename method that break cycles and make it raise

* Add docstring to _run_subgraph

* Update Pipeline.run() docstring

* Update comment to clarify cycles execution

* Remove SelfLoop sample Component

* Add behavioural test for unsupported cycles

* Rename behavioural test to be more specific

* Add new behavioural test

* Add release notes

* Remove commented out code and random pass

* Use more efficient function to find cycles

* Simplify _break_supported_cycles_in_graph by using defaultdict

* Stop breaking edges as soon as we make the graph acyclic

* Fix docstring and add some more comments

* Fix _distribute_output docstring

* Fix _find_receivers_from docstring

* More detailed release notes

* Minimize calls to networkx.is_directed_acyclic_graph

* Add some more info on edges keys

* Adjust components_in_cycles comment

* Add new Pipeline behavioural test

* Enhance _find_components_that_will_receive_no_input to cover more cases

* Explain why run_queue is reset after running a subgraph cycle

* Rename _init_inputs_state to _normalize_input_data

* Better explain the subgraph output distribution

* Remove for else

* Fix some comments and docstrings

* Fix linting

* Add missing return type

* Fix typo

* Rename _normalize_input_data to _normalize_varidiac_input_data and add more documentation

* Remove unused import

---------

Co-authored-by: Sebastian Husch Lee <sjrl423@gmail.com>
											
										
										
											2024-10-29 15:43:16 +01:00
-												fix: pipeline run bugs in cyclic and acyclic pipelines (#8707)

* add component checks

* pipeline should run deterministically

* add FIFOQueue

* add agent tests

* add order dependent tests

* run new tests

* remove code that is not needed

* test: intermediate from cycle outputs are available outside cycle

* add tests for component checks (Claude)

* adapt tests for component checks (o1 review)

* chore: format

* remove tests that aren't needed anymore

* add _calculate_priority tests

* revert accidental change in pyproject.toml

* test format conversion

* adapt to naming convention

* chore: proper docstrings and type hints for PQ

* format

* add more unit tests

* rm unneeded comments

* test input consumption

* lint

* fix: docstrings

* lint

* format

* format

* fix license header

* fix license header

* add component run tests

* fix: pass correct input format to tracing

* fix types

* format

* format

* types

* add defaults from Socket instead of signature

- otherwise components with dynamic inputs would fail

* fix test names

* still wait for optional inputs on greedy variadic sockets

- mirrors previous behavior

* fix format

* wip: warn for ambiguous running order

* wip: alternative warning

* fix license header

* make code more readable

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* Introduce content tracing to a behavioral test

* Fixing linting

* Remove debug print statements

* Fix tracer tests

* remove print

* test: test for component inputs

* test: remove testing for run order

* chore: update component checks from experimental

* chore: update pipeline and base from experimental

* refactor: remove unused method

* refactor: remove unused method

* refactor: outdated comment

* refactor: inputs state is updated as side effect

- to prepare for AsyncPipeline implementation

* format

* test: add file conversion test

* format

* fix: original implementation deepcopies outputs

* lint

* fix: from_dict was updated

* fix: format

* fix: test

* test: add test for thread safety

* remove unused imports

* format

* test: FIFOPriorityQueue

* chore: add release note

* fix: resolve merge conflict with mermaid changes

* fix: format

* fix: remove unused import

* refactor: rename to avoid accidental conflicts

* chore: remove unused inputs, add missing license header

* chore: extend release notes

* Update releasenotes/notes/fix-pipeline-run-2fefeafc705a6d91.yaml

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* fix: format

* fix: format

* Update release note

---------

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>
Co-authored-by: David S. Batista <dsbatista@gmail.com>
											
										
										
											2025-02-06 15:19:47 +01:00
+								        return priority_queue
-												feat: Rework `Pipeline.run()` to better handle cycles (#8431)

* draft

* Enhance

* Almost works

* Simplify some parts and handle intermediate outputs

* Handle connections with default

* Handle cycles with multiple connections from two components

* Update distributed outputs at the correct time

* Remove Component inputs after it runs

* Add agent pipeline test case

* Fix infite loop test

* Handle some corner cases with loops checking and inputs deletion

* Fix tests

* Add new behavioral test

* Remove unused code in behavioural test

* Fix behavioural test

* Fix max run check

* Simplify outputs distribution

* Simplify subgraph run check

* Remove unused _init_run_queue function

* Remove commented code

* Add some missing type hints

* Simplify cycles breaking

* Fix _distribute_output test

* Fix _find_components_that_will_receive_no_input test

* Fix validation test

* Fix tracer losing Component inputs

* Fix some linting issues

* Remove ignore pylint rule

* Rename method that break cycles and make it raise

* Add docstring to _run_subgraph

* Update Pipeline.run() docstring

* Update comment to clarify cycles execution

* Remove SelfLoop sample Component

* Add behavioural test for unsupported cycles

* Rename behavioural test to be more specific

* Add new behavioural test

* Add release notes

* Remove commented out code and random pass

* Use more efficient function to find cycles

* Simplify _break_supported_cycles_in_graph by using defaultdict

* Stop breaking edges as soon as we make the graph acyclic

* Fix docstring and add some more comments

* Fix _distribute_output docstring

* Fix _find_receivers_from docstring

* More detailed release notes

* Minimize calls to networkx.is_directed_acyclic_graph

* Add some more info on edges keys

* Adjust components_in_cycles comment

* Add new Pipeline behavioural test

* Enhance _find_components_that_will_receive_no_input to cover more cases

* Explain why run_queue is reset after running a subgraph cycle

* Rename _init_inputs_state to _normalize_input_data

* Better explain the subgraph output distribution

* Remove for else

* Fix some comments and docstrings

* Fix linting

* Add missing return type

* Fix typo

* Rename _normalize_input_data to _normalize_varidiac_input_data and add more documentation

* Remove unused import

---------

Co-authored-by: Sebastian Husch Lee <sjrl423@gmail.com>
											
										
										
											2024-10-29 15:43:16 +01:00
-												fix: pipeline run bugs in cyclic and acyclic pipelines (#8707)

* add component checks

* pipeline should run deterministically

* add FIFOQueue

* add agent tests

* add order dependent tests

* run new tests

* remove code that is not needed

* test: intermediate from cycle outputs are available outside cycle

* add tests for component checks (Claude)

* adapt tests for component checks (o1 review)

* chore: format

* remove tests that aren't needed anymore

* add _calculate_priority tests

* revert accidental change in pyproject.toml

* test format conversion

* adapt to naming convention

* chore: proper docstrings and type hints for PQ

* format

* add more unit tests

* rm unneeded comments

* test input consumption

* lint

* fix: docstrings

* lint

* format

* format

* fix license header

* fix license header

* add component run tests

* fix: pass correct input format to tracing

* fix types

* format

* format

* types

* add defaults from Socket instead of signature

- otherwise components with dynamic inputs would fail

* fix test names

* still wait for optional inputs on greedy variadic sockets

- mirrors previous behavior

* fix format

* wip: warn for ambiguous running order

* wip: alternative warning

* fix license header

* make code more readable

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* Introduce content tracing to a behavioral test

* Fixing linting

* Remove debug print statements

* Fix tracer tests

* remove print

* test: test for component inputs

* test: remove testing for run order

* chore: update component checks from experimental

* chore: update pipeline and base from experimental

* refactor: remove unused method

* refactor: remove unused method

* refactor: outdated comment

* refactor: inputs state is updated as side effect

- to prepare for AsyncPipeline implementation

* format

* test: add file conversion test

* format

* fix: original implementation deepcopies outputs

* lint

* fix: from_dict was updated

* fix: format

* fix: test

* test: add test for thread safety

* remove unused imports

* format

* test: FIFOPriorityQueue

* chore: add release note

* fix: resolve merge conflict with mermaid changes

* fix: format

* fix: remove unused import

* refactor: rename to avoid accidental conflicts

* chore: remove unused inputs, add missing license header

* chore: extend release notes

* Update releasenotes/notes/fix-pipeline-run-2fefeafc705a6d91.yaml

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* fix: format

* fix: format

* Update release note

---------

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>
Co-authored-by: David S. Batista <dsbatista@gmail.com>
											
										
										
											2025-02-06 15:19:47 +01:00
+								    @staticmethod
 								    def _calculate_priority(component: Dict, inputs: Dict) -> ComponentPriority:
 								        """
 								        Calculates the execution priority for a component depending on the component's inputs.
-												feat: Rework `Pipeline.run()` to better handle cycles (#8431)

* draft

* Enhance

* Almost works

* Simplify some parts and handle intermediate outputs

* Handle connections with default

* Handle cycles with multiple connections from two components

* Update distributed outputs at the correct time

* Remove Component inputs after it runs

* Add agent pipeline test case

* Fix infite loop test

* Handle some corner cases with loops checking and inputs deletion

* Fix tests

* Add new behavioral test

* Remove unused code in behavioural test

* Fix behavioural test

* Fix max run check

* Simplify outputs distribution

* Simplify subgraph run check

* Remove unused _init_run_queue function

* Remove commented code

* Add some missing type hints

* Simplify cycles breaking

* Fix _distribute_output test

* Fix _find_components_that_will_receive_no_input test

* Fix validation test

* Fix tracer losing Component inputs

* Fix some linting issues

* Remove ignore pylint rule

* Rename method that break cycles and make it raise

* Add docstring to _run_subgraph

* Update Pipeline.run() docstring

* Update comment to clarify cycles execution

* Remove SelfLoop sample Component

* Add behavioural test for unsupported cycles

* Rename behavioural test to be more specific

* Add new behavioural test

* Add release notes

* Remove commented out code and random pass

* Use more efficient function to find cycles

* Simplify _break_supported_cycles_in_graph by using defaultdict

* Stop breaking edges as soon as we make the graph acyclic

* Fix docstring and add some more comments

* Fix _distribute_output docstring

* Fix _find_receivers_from docstring

* More detailed release notes

* Minimize calls to networkx.is_directed_acyclic_graph

* Add some more info on edges keys

* Adjust components_in_cycles comment

* Add new Pipeline behavioural test

* Enhance _find_components_that_will_receive_no_input to cover more cases

* Explain why run_queue is reset after running a subgraph cycle

* Rename _init_inputs_state to _normalize_input_data

* Better explain the subgraph output distribution

* Remove for else

* Fix some comments and docstrings

* Fix linting

* Add missing return type

* Fix typo

* Rename _normalize_input_data to _normalize_varidiac_input_data and add more documentation

* Remove unused import

---------

Co-authored-by: Sebastian Husch Lee <sjrl423@gmail.com>
											
										
										
											2024-10-29 15:43:16 +01:00
-												fix: pipeline run bugs in cyclic and acyclic pipelines (#8707)

* add component checks

* pipeline should run deterministically

* add FIFOQueue

* add agent tests

* add order dependent tests

* run new tests

* remove code that is not needed

* test: intermediate from cycle outputs are available outside cycle

* add tests for component checks (Claude)

* adapt tests for component checks (o1 review)

* chore: format

* remove tests that aren't needed anymore

* add _calculate_priority tests

* revert accidental change in pyproject.toml

* test format conversion

* adapt to naming convention

* chore: proper docstrings and type hints for PQ

* format

* add more unit tests

* rm unneeded comments

* test input consumption

* lint

* fix: docstrings

* lint

* format

* format

* fix license header

* fix license header

* add component run tests

* fix: pass correct input format to tracing

* fix types

* format

* format

* types

* add defaults from Socket instead of signature

- otherwise components with dynamic inputs would fail

* fix test names

* still wait for optional inputs on greedy variadic sockets

- mirrors previous behavior

* fix format

* wip: warn for ambiguous running order

* wip: alternative warning

* fix license header

* make code more readable

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* Introduce content tracing to a behavioral test

* Fixing linting

* Remove debug print statements

* Fix tracer tests

* remove print

* test: test for component inputs

* test: remove testing for run order

* chore: update component checks from experimental

* chore: update pipeline and base from experimental

* refactor: remove unused method

* refactor: remove unused method

* refactor: outdated comment

* refactor: inputs state is updated as side effect

- to prepare for AsyncPipeline implementation

* format

* test: add file conversion test

* format

* fix: original implementation deepcopies outputs

* lint

* fix: from_dict was updated

* fix: format

* fix: test

* test: add test for thread safety

* remove unused imports

* format

* test: FIFOPriorityQueue

* chore: add release note

* fix: resolve merge conflict with mermaid changes

* fix: format

* fix: remove unused import

* refactor: rename to avoid accidental conflicts

* chore: remove unused inputs, add missing license header

* chore: extend release notes

* Update releasenotes/notes/fix-pipeline-run-2fefeafc705a6d91.yaml

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* fix: format

* fix: format

* Update release note

---------

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>
Co-authored-by: David S. Batista <dsbatista@gmail.com>
											
										
										
											2025-02-06 15:19:47 +01:00
+								        :param component: Component metadata and component instance.
 								        :param inputs: Inputs to the component.
 								        :returns: Priority value for the component.
 								        """
 								        if not can_component_run(component, inputs):
 								            return ComponentPriority.BLOCKED
 								        elif is_any_greedy_socket_ready(component, inputs) and are_all_sockets_ready(component, inputs):
 								            return ComponentPriority.HIGHEST
 								        elif all_predecessors_executed(component, inputs):
 								            return ComponentPriority.READY
 								        elif are_all_lazy_variadic_sockets_resolved(component, inputs):
 								            return ComponentPriority.DEFER
 								        else:
 								            return ComponentPriority.DEFER_LAST
-												feat: Rework `Pipeline.run()` to better handle cycles (#8431)

* draft

* Enhance

* Almost works

* Simplify some parts and handle intermediate outputs

* Handle connections with default

* Handle cycles with multiple connections from two components

* Update distributed outputs at the correct time

* Remove Component inputs after it runs

* Add agent pipeline test case

* Fix infite loop test

* Handle some corner cases with loops checking and inputs deletion

* Fix tests

* Add new behavioral test

* Remove unused code in behavioural test

* Fix behavioural test

* Fix max run check

* Simplify outputs distribution

* Simplify subgraph run check

* Remove unused _init_run_queue function

* Remove commented code

* Add some missing type hints

* Simplify cycles breaking

* Fix _distribute_output test

* Fix _find_components_that_will_receive_no_input test

* Fix validation test

* Fix tracer losing Component inputs

* Fix some linting issues

* Remove ignore pylint rule

* Rename method that break cycles and make it raise

* Add docstring to _run_subgraph

* Update Pipeline.run() docstring

* Update comment to clarify cycles execution

* Remove SelfLoop sample Component

* Add behavioural test for unsupported cycles

* Rename behavioural test to be more specific

* Add new behavioural test

* Add release notes

* Remove commented out code and random pass

* Use more efficient function to find cycles

* Simplify _break_supported_cycles_in_graph by using defaultdict

* Stop breaking edges as soon as we make the graph acyclic

* Fix docstring and add some more comments

* Fix _distribute_output docstring

* Fix _find_receivers_from docstring

* More detailed release notes

* Minimize calls to networkx.is_directed_acyclic_graph

* Add some more info on edges keys

* Adjust components_in_cycles comment

* Add new Pipeline behavioural test

* Enhance _find_components_that_will_receive_no_input to cover more cases

* Explain why run_queue is reset after running a subgraph cycle

* Rename _init_inputs_state to _normalize_input_data

* Better explain the subgraph output distribution

* Remove for else

* Fix some comments and docstrings

* Fix linting

* Add missing return type

* Fix typo

* Rename _normalize_input_data to _normalize_varidiac_input_data and add more documentation

* Remove unused import

---------

Co-authored-by: Sebastian Husch Lee <sjrl423@gmail.com>
											
										
										
											2024-10-29 15:43:16 +01:00
-												fix: pipeline run bugs in cyclic and acyclic pipelines (#8707)

* add component checks

* pipeline should run deterministically

* add FIFOQueue

* add agent tests

* add order dependent tests

* run new tests

* remove code that is not needed

* test: intermediate from cycle outputs are available outside cycle

* add tests for component checks (Claude)

* adapt tests for component checks (o1 review)

* chore: format

* remove tests that aren't needed anymore

* add _calculate_priority tests

* revert accidental change in pyproject.toml

* test format conversion

* adapt to naming convention

* chore: proper docstrings and type hints for PQ

* format

* add more unit tests

* rm unneeded comments

* test input consumption

* lint

* fix: docstrings

* lint

* format

* format

* fix license header

* fix license header

* add component run tests

* fix: pass correct input format to tracing

* fix types

* format

* format

* types

* add defaults from Socket instead of signature

- otherwise components with dynamic inputs would fail

* fix test names

* still wait for optional inputs on greedy variadic sockets

- mirrors previous behavior

* fix format

* wip: warn for ambiguous running order

* wip: alternative warning

* fix license header

* make code more readable

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* Introduce content tracing to a behavioral test

* Fixing linting

* Remove debug print statements

* Fix tracer tests

* remove print

* test: test for component inputs

* test: remove testing for run order

* chore: update component checks from experimental

* chore: update pipeline and base from experimental

* refactor: remove unused method

* refactor: remove unused method

* refactor: outdated comment

* refactor: inputs state is updated as side effect

- to prepare for AsyncPipeline implementation

* format

* test: add file conversion test

* format

* fix: original implementation deepcopies outputs

* lint

* fix: from_dict was updated

* fix: format

* fix: test

* test: add test for thread safety

* remove unused imports

* format

* test: FIFOPriorityQueue

* chore: add release note

* fix: resolve merge conflict with mermaid changes

* fix: format

* fix: remove unused import

* refactor: rename to avoid accidental conflicts

* chore: remove unused inputs, add missing license header

* chore: extend release notes

* Update releasenotes/notes/fix-pipeline-run-2fefeafc705a6d91.yaml

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* fix: format

* fix: format

* Update release note

---------

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>
Co-authored-by: David S. Batista <dsbatista@gmail.com>
											
										
										
											2025-02-06 15:19:47 +01:00
+								    def _get_component_with_graph_metadata_and_visits(self, component_name: str, visits: int) -> Dict[str, Any]:
 								        """
 								        Returns the component instance alongside input/output-socket metadata from the graph and adds current visits.
-												feat: Rework `Pipeline.run()` to better handle cycles (#8431)

* draft

* Enhance

* Almost works

* Simplify some parts and handle intermediate outputs

* Handle connections with default

* Handle cycles with multiple connections from two components

* Update distributed outputs at the correct time

* Remove Component inputs after it runs

* Add agent pipeline test case

* Fix infite loop test

* Handle some corner cases with loops checking and inputs deletion

* Fix tests

* Add new behavioral test

* Remove unused code in behavioural test

* Fix behavioural test

* Fix max run check

* Simplify outputs distribution

* Simplify subgraph run check

* Remove unused _init_run_queue function

* Remove commented code

* Add some missing type hints

* Simplify cycles breaking

* Fix _distribute_output test

* Fix _find_components_that_will_receive_no_input test

* Fix validation test

* Fix tracer losing Component inputs

* Fix some linting issues

* Remove ignore pylint rule

* Rename method that break cycles and make it raise

* Add docstring to _run_subgraph

* Update Pipeline.run() docstring

* Update comment to clarify cycles execution

* Remove SelfLoop sample Component

* Add behavioural test for unsupported cycles

* Rename behavioural test to be more specific

* Add new behavioural test

* Add release notes

* Remove commented out code and random pass

* Use more efficient function to find cycles

* Simplify _break_supported_cycles_in_graph by using defaultdict

* Stop breaking edges as soon as we make the graph acyclic

* Fix docstring and add some more comments

* Fix _distribute_output docstring

* Fix _find_receivers_from docstring

* More detailed release notes

* Minimize calls to networkx.is_directed_acyclic_graph

* Add some more info on edges keys

* Adjust components_in_cycles comment

* Add new Pipeline behavioural test

* Enhance _find_components_that_will_receive_no_input to cover more cases

* Explain why run_queue is reset after running a subgraph cycle

* Rename _init_inputs_state to _normalize_input_data

* Better explain the subgraph output distribution

* Remove for else

* Fix some comments and docstrings

* Fix linting

* Add missing return type

* Fix typo

* Rename _normalize_input_data to _normalize_varidiac_input_data and add more documentation

* Remove unused import

---------

Co-authored-by: Sebastian Husch Lee <sjrl423@gmail.com>
											
										
										
											2024-10-29 15:43:16 +01:00
-												fix: pipeline run bugs in cyclic and acyclic pipelines (#8707)

* add component checks

* pipeline should run deterministically

* add FIFOQueue

* add agent tests

* add order dependent tests

* run new tests

* remove code that is not needed

* test: intermediate from cycle outputs are available outside cycle

* add tests for component checks (Claude)

* adapt tests for component checks (o1 review)

* chore: format

* remove tests that aren't needed anymore

* add _calculate_priority tests

* revert accidental change in pyproject.toml

* test format conversion

* adapt to naming convention

* chore: proper docstrings and type hints for PQ

* format

* add more unit tests

* rm unneeded comments

* test input consumption

* lint

* fix: docstrings

* lint

* format

* format

* fix license header

* fix license header

* add component run tests

* fix: pass correct input format to tracing

* fix types

* format

* format

* types

* add defaults from Socket instead of signature

- otherwise components with dynamic inputs would fail

* fix test names

* still wait for optional inputs on greedy variadic sockets

- mirrors previous behavior

* fix format

* wip: warn for ambiguous running order

* wip: alternative warning

* fix license header

* make code more readable

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* Introduce content tracing to a behavioral test

* Fixing linting

* Remove debug print statements

* Fix tracer tests

* remove print

* test: test for component inputs

* test: remove testing for run order

* chore: update component checks from experimental

* chore: update pipeline and base from experimental

* refactor: remove unused method

* refactor: remove unused method

* refactor: outdated comment

* refactor: inputs state is updated as side effect

- to prepare for AsyncPipeline implementation

* format

* test: add file conversion test

* format

* fix: original implementation deepcopies outputs

* lint

* fix: from_dict was updated

* fix: format

* fix: test

* test: add test for thread safety

* remove unused imports

* format

* test: FIFOPriorityQueue

* chore: add release note

* fix: resolve merge conflict with mermaid changes

* fix: format

* fix: remove unused import

* refactor: rename to avoid accidental conflicts

* chore: remove unused inputs, add missing license header

* chore: extend release notes

* Update releasenotes/notes/fix-pipeline-run-2fefeafc705a6d91.yaml

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* fix: format

* fix: format

* Update release note

---------

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>
Co-authored-by: David S. Batista <dsbatista@gmail.com>
											
										
										
											2025-02-06 15:19:47 +01:00
+								        We can't store visits in the pipeline graph because this would prevent reentrance / thread-safe execution.
-												refactor: Isolate logic that finds next runnable component waiting for input (#7880)

* Fix formatting

* Isolate logic that finds next runnable component waiting for input

* Explain more lazy variadics

* Enhance logic following review suggestions

* Simplify code to use a single for

* Fix test
											
										
										
											2024-06-18 16:43:19 +02:00
-												fix: pipeline run bugs in cyclic and acyclic pipelines (#8707)

* add component checks

* pipeline should run deterministically

* add FIFOQueue

* add agent tests

* add order dependent tests

* run new tests

* remove code that is not needed

* test: intermediate from cycle outputs are available outside cycle

* add tests for component checks (Claude)

* adapt tests for component checks (o1 review)

* chore: format

* remove tests that aren't needed anymore

* add _calculate_priority tests

* revert accidental change in pyproject.toml

* test format conversion

* adapt to naming convention

* chore: proper docstrings and type hints for PQ

* format

* add more unit tests

* rm unneeded comments

* test input consumption

* lint

* fix: docstrings

* lint

* format

* format

* fix license header

* fix license header

* add component run tests

* fix: pass correct input format to tracing

* fix types

* format

* format

* types

* add defaults from Socket instead of signature

- otherwise components with dynamic inputs would fail

* fix test names

* still wait for optional inputs on greedy variadic sockets

- mirrors previous behavior

* fix format

* wip: warn for ambiguous running order

* wip: alternative warning

* fix license header

* make code more readable

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* Introduce content tracing to a behavioral test

* Fixing linting

* Remove debug print statements

* Fix tracer tests

* remove print

* test: test for component inputs

* test: remove testing for run order

* chore: update component checks from experimental

* chore: update pipeline and base from experimental

* refactor: remove unused method

* refactor: remove unused method

* refactor: outdated comment

* refactor: inputs state is updated as side effect

- to prepare for AsyncPipeline implementation

* format

* test: add file conversion test

* format

* fix: original implementation deepcopies outputs

* lint

* fix: from_dict was updated

* fix: format

* fix: test

* test: add test for thread safety

* remove unused imports

* format

* test: FIFOPriorityQueue

* chore: add release note

* fix: resolve merge conflict with mermaid changes

* fix: format

* fix: remove unused import

* refactor: rename to avoid accidental conflicts

* chore: remove unused inputs, add missing license header

* chore: extend release notes

* Update releasenotes/notes/fix-pipeline-run-2fefeafc705a6d91.yaml

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* fix: format

* fix: format

* Update release note

---------

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>
Co-authored-by: David S. Batista <dsbatista@gmail.com>
											
										
										
											2025-02-06 15:19:47 +01:00
+								        :param component_name: The name of the component.
 								        :param visits: Number of visits for the component.
 								        :returns: Dict including component instance, input/output-sockets and visits.
 								        """
 								        comp_dict = self.graph.nodes[component_name]
 								        comp_dict = {**comp_dict, "visits": visits}
 								        return comp_dict
-												chore: extract BasePipeline (#7673)

* extract BasePipeline

* release note

* add missing headers

* move __eq__ to the base class

* proper check type equality, bless the tests
											
										
										
											2024-05-10 11:35:15 +02:00
-												fix: pipeline run bugs in cyclic and acyclic pipelines (#8707)

* add component checks

* pipeline should run deterministically

* add FIFOQueue

* add agent tests

* add order dependent tests

* run new tests

* remove code that is not needed

* test: intermediate from cycle outputs are available outside cycle

* add tests for component checks (Claude)

* adapt tests for component checks (o1 review)

* chore: format

* remove tests that aren't needed anymore

* add _calculate_priority tests

* revert accidental change in pyproject.toml

* test format conversion

* adapt to naming convention

* chore: proper docstrings and type hints for PQ

* format

* add more unit tests

* rm unneeded comments

* test input consumption

* lint

* fix: docstrings

* lint

* format

* format

* fix license header

* fix license header

* add component run tests

* fix: pass correct input format to tracing

* fix types

* format

* format

* types

* add defaults from Socket instead of signature

- otherwise components with dynamic inputs would fail

* fix test names

* still wait for optional inputs on greedy variadic sockets

- mirrors previous behavior

* fix format

* wip: warn for ambiguous running order

* wip: alternative warning

* fix license header

* make code more readable

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* Introduce content tracing to a behavioral test

* Fixing linting

* Remove debug print statements

* Fix tracer tests

* remove print

* test: test for component inputs

* test: remove testing for run order

* chore: update component checks from experimental

* chore: update pipeline and base from experimental

* refactor: remove unused method

* refactor: remove unused method

* refactor: outdated comment

* refactor: inputs state is updated as side effect

- to prepare for AsyncPipeline implementation

* format

* test: add file conversion test

* format

* fix: original implementation deepcopies outputs

* lint

* fix: from_dict was updated

* fix: format

* fix: test

* test: add test for thread safety

* remove unused imports

* format

* test: FIFOPriorityQueue

* chore: add release note

* fix: resolve merge conflict with mermaid changes

* fix: format

* fix: remove unused import

* refactor: rename to avoid accidental conflicts

* chore: remove unused inputs, add missing license header

* chore: extend release notes

* Update releasenotes/notes/fix-pipeline-run-2fefeafc705a6d91.yaml

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* fix: format

* fix: format

* Update release note

---------

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>
Co-authored-by: David S. Batista <dsbatista@gmail.com>
											
										
										
											2025-02-06 15:19:47 +01:00
+								    def _get_next_runnable_component(
 								        self, priority_queue: FIFOPriorityQueue, component_visits: Dict[str, int]
 								    ) -> Union[Tuple[ComponentPriority, str, Dict[str, Any]], None]:
 								        """
 								        Returns the next runnable component alongside its metadata from the priority queue.
-												chore: extract BasePipeline (#7673)

* extract BasePipeline

* release note

* add missing headers

* move __eq__ to the base class

* proper check type equality, bless the tests
											
										
										
											2024-05-10 11:35:15 +02:00
-												fix: pipeline run bugs in cyclic and acyclic pipelines (#8707)

* add component checks

* pipeline should run deterministically

* add FIFOQueue

* add agent tests

* add order dependent tests

* run new tests

* remove code that is not needed

* test: intermediate from cycle outputs are available outside cycle

* add tests for component checks (Claude)

* adapt tests for component checks (o1 review)

* chore: format

* remove tests that aren't needed anymore

* add _calculate_priority tests

* revert accidental change in pyproject.toml

* test format conversion

* adapt to naming convention

* chore: proper docstrings and type hints for PQ

* format

* add more unit tests

* rm unneeded comments

* test input consumption

* lint

* fix: docstrings

* lint

* format

* format

* fix license header

* fix license header

* add component run tests

* fix: pass correct input format to tracing

* fix types

* format

* format

* types

* add defaults from Socket instead of signature

- otherwise components with dynamic inputs would fail

* fix test names

* still wait for optional inputs on greedy variadic sockets

- mirrors previous behavior

* fix format

* wip: warn for ambiguous running order

* wip: alternative warning

* fix license header

* make code more readable

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* Introduce content tracing to a behavioral test

* Fixing linting

* Remove debug print statements

* Fix tracer tests

* remove print

* test: test for component inputs

* test: remove testing for run order

* chore: update component checks from experimental

* chore: update pipeline and base from experimental

* refactor: remove unused method

* refactor: remove unused method

* refactor: outdated comment

* refactor: inputs state is updated as side effect

- to prepare for AsyncPipeline implementation

* format

* test: add file conversion test

* format

* fix: original implementation deepcopies outputs

* lint

* fix: from_dict was updated

* fix: format

* fix: test

* test: add test for thread safety

* remove unused imports

* format

* test: FIFOPriorityQueue

* chore: add release note

* fix: resolve merge conflict with mermaid changes

* fix: format

* fix: remove unused import

* refactor: rename to avoid accidental conflicts

* chore: remove unused inputs, add missing license header

* chore: extend release notes

* Update releasenotes/notes/fix-pipeline-run-2fefeafc705a6d91.yaml

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* fix: format

* fix: format

* Update release note

---------

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>
Co-authored-by: David S. Batista <dsbatista@gmail.com>
											
										
										
											2025-02-06 15:19:47 +01:00
+								        :param priority_queue: Priority queue of component names.
 								        :param component_visits: Current state of component visits.
 								        :returns: The next runnable component, the component name, and its priority
 								            or None if no component in the queue can run.
 								        :raises: PipelineMaxComponentRuns if the next runnable component has exceeded the maximum number of runs.
 								        """
 								        priority_and_component_name: Union[Tuple[ComponentPriority, str], None] = (
 								            None if (item := priority_queue.get()) is None else (ComponentPriority(item[0]), str(item[1]))
 								        )
-												refactor: Pipeline.run() (#8019)

* Move utility functions from _enqueue_next_runnable_component (#7895)

* Isolate logic to check if we're stuck in a loop

* Simplify for else

* Add missing return in docstring

* Emit warning when stuck in a loop

* Fix docstring

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>

* Add utility function to move Components in queues

* Add function to find next Component to run

* Comment update

* Add missing break in loop

* Make _add_missing_input_defaults less error prone and add tests

* Fix tests

* Update docstring

* Simplify enqueue logic

* Remove unused _enqueue_next_runnable_component function

* Add method to find Component with lazy variadic input or all inputs with defaults

* Simplify _find_next_runnable_lazy_variadic_or_default_component

* Remove unnecessary type ignore

* Split _dequeue_components_that_received_no_input into separate functions

* Fix linting

* Simplify variadic check when running Component

* Simplify code

* Reorganize functions used by Pipeline.run

* Rename variables used in Pipeline.run() for clarity

* Add comment clarifying last_waiting_queue and before_last_waiting_queue

* Add functions to easily update waiting_queue

---------

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>
											
										
										
											2024-07-12 10:35:23 +02:00
-												feat: Raise warning if the pipeline is unable to continue running due to a blocked component (#9569)

* First pass at alerting a user that the pipeline is blocked

* Change approach and add change to async pipeline

* Fix check in run_async

* Another fix

* Somehow also fixed the max_runs_per_component

* Align sync run and async run component

* Update output type of component outputs to Mapping to align with our protocol

* linting

* ruff

* Make it a core test

* Add reno

* Some refactoring

* Linting

* Fix mypy

* Converting to warning

* Small changes

* Update release note

* More cleanup

* PR comment

* PR comments
											
										
										
											2025-07-15 16:02:39 +02:00
+								        if priority_and_component_name is None:
 								            return None
-												refactor: Pipeline.run() (#8019)

* Move utility functions from _enqueue_next_runnable_component (#7895)

* Isolate logic to check if we're stuck in a loop

* Simplify for else

* Add missing return in docstring

* Emit warning when stuck in a loop

* Fix docstring

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>

* Add utility function to move Components in queues

* Add function to find next Component to run

* Comment update

* Add missing break in loop

* Make _add_missing_input_defaults less error prone and add tests

* Fix tests

* Update docstring

* Simplify enqueue logic

* Remove unused _enqueue_next_runnable_component function

* Add method to find Component with lazy variadic input or all inputs with defaults

* Simplify _find_next_runnable_lazy_variadic_or_default_component

* Remove unnecessary type ignore

* Split _dequeue_components_that_received_no_input into separate functions

* Fix linting

* Simplify variadic check when running Component

* Simplify code

* Reorganize functions used by Pipeline.run

* Rename variables used in Pipeline.run() for clarity

* Add comment clarifying last_waiting_queue and before_last_waiting_queue

* Add functions to easily update waiting_queue

---------

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>
											
										
										
											2024-07-12 10:35:23 +02:00
-												feat: Raise warning if the pipeline is unable to continue running due to a blocked component (#9569)

* First pass at alerting a user that the pipeline is blocked

* Change approach and add change to async pipeline

* Fix check in run_async

* Another fix

* Somehow also fixed the max_runs_per_component

* Align sync run and async run component

* Update output type of component outputs to Mapping to align with our protocol

* linting

* ruff

* Make it a core test

* Add reno

* Some refactoring

* Linting

* Fix mypy

* Converting to warning

* Small changes

* Update release note

* More cleanup

* PR comment

* PR comments
											
										
										
											2025-07-15 16:02:39 +02:00
+								        priority, component_name = priority_and_component_name
 								        comp = self._get_component_with_graph_metadata_and_visits(component_name, component_visits[component_name])
 								        if comp["visits"] > self._max_runs_per_component:
 								            msg = f"Maximum run count {self._max_runs_per_component} reached for component '{component_name}'"
 								            raise PipelineMaxComponentRuns(msg)
 								        return priority, component_name, comp
-												refactor: Pipeline.run() (#8019)

* Move utility functions from _enqueue_next_runnable_component (#7895)

* Isolate logic to check if we're stuck in a loop

* Simplify for else

* Add missing return in docstring

* Emit warning when stuck in a loop

* Fix docstring

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>

* Add utility function to move Components in queues

* Add function to find next Component to run

* Comment update

* Add missing break in loop

* Make _add_missing_input_defaults less error prone and add tests

* Fix tests

* Update docstring

* Simplify enqueue logic

* Remove unused _enqueue_next_runnable_component function

* Add method to find Component with lazy variadic input or all inputs with defaults

* Simplify _find_next_runnable_lazy_variadic_or_default_component

* Remove unnecessary type ignore

* Split _dequeue_components_that_received_no_input into separate functions

* Fix linting

* Simplify variadic check when running Component

* Simplify code

* Reorganize functions used by Pipeline.run

* Rename variables used in Pipeline.run() for clarity

* Add comment clarifying last_waiting_queue and before_last_waiting_queue

* Add functions to easily update waiting_queue

---------

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>
											
										
										
											2024-07-12 10:35:23 +02:00
-												fix: pipeline run bugs in cyclic and acyclic pipelines (#8707)

* add component checks

* pipeline should run deterministically

* add FIFOQueue

* add agent tests

* add order dependent tests

* run new tests

* remove code that is not needed

* test: intermediate from cycle outputs are available outside cycle

* add tests for component checks (Claude)

* adapt tests for component checks (o1 review)

* chore: format

* remove tests that aren't needed anymore

* add _calculate_priority tests

* revert accidental change in pyproject.toml

* test format conversion

* adapt to naming convention

* chore: proper docstrings and type hints for PQ

* format

* add more unit tests

* rm unneeded comments

* test input consumption

* lint

* fix: docstrings

* lint

* format

* format

* fix license header

* fix license header

* add component run tests

* fix: pass correct input format to tracing

* fix types

* format

* format

* types

* add defaults from Socket instead of signature

- otherwise components with dynamic inputs would fail

* fix test names

* still wait for optional inputs on greedy variadic sockets

- mirrors previous behavior

* fix format

* wip: warn for ambiguous running order

* wip: alternative warning

* fix license header

* make code more readable

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* Introduce content tracing to a behavioral test

* Fixing linting

* Remove debug print statements

* Fix tracer tests

* remove print

* test: test for component inputs

* test: remove testing for run order

* chore: update component checks from experimental

* chore: update pipeline and base from experimental

* refactor: remove unused method

* refactor: remove unused method

* refactor: outdated comment

* refactor: inputs state is updated as side effect

- to prepare for AsyncPipeline implementation

* format

* test: add file conversion test

* format

* fix: original implementation deepcopies outputs

* lint

* fix: from_dict was updated

* fix: format

* fix: test

* test: add test for thread safety

* remove unused imports

* format

* test: FIFOPriorityQueue

* chore: add release note

* fix: resolve merge conflict with mermaid changes

* fix: format

* fix: remove unused import

* refactor: rename to avoid accidental conflicts

* chore: remove unused inputs, add missing license header

* chore: extend release notes

* Update releasenotes/notes/fix-pipeline-run-2fefeafc705a6d91.yaml

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* fix: format

* fix: format

* Update release note

---------

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>
Co-authored-by: David S. Batista <dsbatista@gmail.com>
											
										
										
											2025-02-06 15:19:47 +01:00
+								    @staticmethod
-												chore: Make the Haystack core "type complete" (#9438)

* chore: Make the Haystack core "type complete"

For libraries with a `py.typed` marker, it is [recommended][1] to
make all public interfaces "type complete", i.e. to explicitly
annotate all function parameters and return types. Doing so has the
following benefits:

- It maximizes the type information available to users and IDEs.
- It ensures that the argument and return types are the intended ones.
- It sidesteps differences in type inference between the different
  type checker implementations.

This change makes a first step towards type completeness by enabling
the Mypy `disallow_incomplete_defs` for the core modules (excluding
`haystack.components.*` and `haystack.testing.*`) and fixing the
resulting errors.

[1]: https://typing.python.org/en/latest/guides/libraries.html#how-much-of-my-library-needs-types

* chore: Add `python_version = 3.9` to Mypy config

This catches type constructs that are only supported in later Python
versions.

* Remove unused import

* try to fix linting

---------

Co-authored-by: Stefano Fiorucci <stefanofiorucci@gmail.com>
											
										
										
											2025-05-26 11:00:22 +02:00
+								    def _add_missing_input_defaults(
 								        component_inputs: Dict[str, Any], component_input_sockets: Dict[str, InputSocket]
 								    ) -> Dict[str, Any]:
-												fix: pipeline run bugs in cyclic and acyclic pipelines (#8707)

* add component checks

* pipeline should run deterministically

* add FIFOQueue

* add agent tests

* add order dependent tests

* run new tests

* remove code that is not needed

* test: intermediate from cycle outputs are available outside cycle

* add tests for component checks (Claude)

* adapt tests for component checks (o1 review)

* chore: format

* remove tests that aren't needed anymore

* add _calculate_priority tests

* revert accidental change in pyproject.toml

* test format conversion

* adapt to naming convention

* chore: proper docstrings and type hints for PQ

* format

* add more unit tests

* rm unneeded comments

* test input consumption

* lint

* fix: docstrings

* lint

* format

* format

* fix license header

* fix license header

* add component run tests

* fix: pass correct input format to tracing

* fix types

* format

* format

* types

* add defaults from Socket instead of signature

- otherwise components with dynamic inputs would fail

* fix test names

* still wait for optional inputs on greedy variadic sockets

- mirrors previous behavior

* fix format

* wip: warn for ambiguous running order

* wip: alternative warning

* fix license header

* make code more readable

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* Introduce content tracing to a behavioral test

* Fixing linting

* Remove debug print statements

* Fix tracer tests

* remove print

* test: test for component inputs

* test: remove testing for run order

* chore: update component checks from experimental

* chore: update pipeline and base from experimental

* refactor: remove unused method

* refactor: remove unused method

* refactor: outdated comment

* refactor: inputs state is updated as side effect

- to prepare for AsyncPipeline implementation

* format

* test: add file conversion test

* format

* fix: original implementation deepcopies outputs

* lint

* fix: from_dict was updated

* fix: format

* fix: test

* test: add test for thread safety

* remove unused imports

* format

* test: FIFOPriorityQueue

* chore: add release note

* fix: resolve merge conflict with mermaid changes

* fix: format

* fix: remove unused import

* refactor: rename to avoid accidental conflicts

* chore: remove unused inputs, add missing license header

* chore: extend release notes

* Update releasenotes/notes/fix-pipeline-run-2fefeafc705a6d91.yaml

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* fix: format

* fix: format

* Update release note

---------

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>
Co-authored-by: David S. Batista <dsbatista@gmail.com>
											
										
										
											2025-02-06 15:19:47 +01:00
+								        """
 								        Updates the inputs with the default values for the inputs that are missing
-												refactor: Pipeline.run() (#8019)

* Move utility functions from _enqueue_next_runnable_component (#7895)

* Isolate logic to check if we're stuck in a loop

* Simplify for else

* Add missing return in docstring

* Emit warning when stuck in a loop

* Fix docstring

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>

* Add utility function to move Components in queues

* Add function to find next Component to run

* Comment update

* Add missing break in loop

* Make _add_missing_input_defaults less error prone and add tests

* Fix tests

* Update docstring

* Simplify enqueue logic

* Remove unused _enqueue_next_runnable_component function

* Add method to find Component with lazy variadic input or all inputs with defaults

* Simplify _find_next_runnable_lazy_variadic_or_default_component

* Remove unnecessary type ignore

* Split _dequeue_components_that_received_no_input into separate functions

* Fix linting

* Simplify variadic check when running Component

* Simplify code

* Reorganize functions used by Pipeline.run

* Rename variables used in Pipeline.run() for clarity

* Add comment clarifying last_waiting_queue and before_last_waiting_queue

* Add functions to easily update waiting_queue

---------

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>
											
										
										
											2024-07-12 10:35:23 +02:00
-												fix: pipeline run bugs in cyclic and acyclic pipelines (#8707)

* add component checks

* pipeline should run deterministically

* add FIFOQueue

* add agent tests

* add order dependent tests

* run new tests

* remove code that is not needed

* test: intermediate from cycle outputs are available outside cycle

* add tests for component checks (Claude)

* adapt tests for component checks (o1 review)

* chore: format

* remove tests that aren't needed anymore

* add _calculate_priority tests

* revert accidental change in pyproject.toml

* test format conversion

* adapt to naming convention

* chore: proper docstrings and type hints for PQ

* format

* add more unit tests

* rm unneeded comments

* test input consumption

* lint

* fix: docstrings

* lint

* format

* format

* fix license header

* fix license header

* add component run tests

* fix: pass correct input format to tracing

* fix types

* format

* format

* types

* add defaults from Socket instead of signature

- otherwise components with dynamic inputs would fail

* fix test names

* still wait for optional inputs on greedy variadic sockets

- mirrors previous behavior

* fix format

* wip: warn for ambiguous running order

* wip: alternative warning

* fix license header

* make code more readable

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* Introduce content tracing to a behavioral test

* Fixing linting

* Remove debug print statements

* Fix tracer tests

* remove print

* test: test for component inputs

* test: remove testing for run order

* chore: update component checks from experimental

* chore: update pipeline and base from experimental

* refactor: remove unused method

* refactor: remove unused method

* refactor: outdated comment

* refactor: inputs state is updated as side effect

- to prepare for AsyncPipeline implementation

* format

* test: add file conversion test

* format

* fix: original implementation deepcopies outputs

* lint

* fix: from_dict was updated

* fix: format

* fix: test

* test: add test for thread safety

* remove unused imports

* format

* test: FIFOPriorityQueue

* chore: add release note

* fix: resolve merge conflict with mermaid changes

* fix: format

* fix: remove unused import

* refactor: rename to avoid accidental conflicts

* chore: remove unused inputs, add missing license header

* chore: extend release notes

* Update releasenotes/notes/fix-pipeline-run-2fefeafc705a6d91.yaml

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* fix: format

* fix: format

* Update release note

---------

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>
Co-authored-by: David S. Batista <dsbatista@gmail.com>
											
										
										
											2025-02-06 15:19:47 +01:00
+								        :param component_inputs: Inputs for the component.
 								        :param component_input_sockets: Input sockets of the component.
 								        """
 								        for name, socket in component_input_sockets.items():
 								            if not socket.is_mandatory and name not in component_inputs:
 								                if socket.is_variadic:
 								                    component_inputs[name] = [socket.default_value]
 								                else:
 								                    component_inputs[name] = socket.default_value
-												refactor: Pipeline.run() (#8019)

* Move utility functions from _enqueue_next_runnable_component (#7895)

* Isolate logic to check if we're stuck in a loop

* Simplify for else

* Add missing return in docstring

* Emit warning when stuck in a loop

* Fix docstring

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>

* Add utility function to move Components in queues

* Add function to find next Component to run

* Comment update

* Add missing break in loop

* Make _add_missing_input_defaults less error prone and add tests

* Fix tests

* Update docstring

* Simplify enqueue logic

* Remove unused _enqueue_next_runnable_component function

* Add method to find Component with lazy variadic input or all inputs with defaults

* Simplify _find_next_runnable_lazy_variadic_or_default_component

* Remove unnecessary type ignore

* Split _dequeue_components_that_received_no_input into separate functions

* Fix linting

* Simplify variadic check when running Component

* Simplify code

* Reorganize functions used by Pipeline.run

* Rename variables used in Pipeline.run() for clarity

* Add comment clarifying last_waiting_queue and before_last_waiting_queue

* Add functions to easily update waiting_queue

---------

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>
											
										
										
											2024-07-12 10:35:23 +02:00
-												fix: pipeline run bugs in cyclic and acyclic pipelines (#8707)

* add component checks

* pipeline should run deterministically

* add FIFOQueue

* add agent tests

* add order dependent tests

* run new tests

* remove code that is not needed

* test: intermediate from cycle outputs are available outside cycle

* add tests for component checks (Claude)

* adapt tests for component checks (o1 review)

* chore: format

* remove tests that aren't needed anymore

* add _calculate_priority tests

* revert accidental change in pyproject.toml

* test format conversion

* adapt to naming convention

* chore: proper docstrings and type hints for PQ

* format

* add more unit tests

* rm unneeded comments

* test input consumption

* lint

* fix: docstrings

* lint

* format

* format

* fix license header

* fix license header

* add component run tests

* fix: pass correct input format to tracing

* fix types

* format

* format

* types

* add defaults from Socket instead of signature

- otherwise components with dynamic inputs would fail

* fix test names

* still wait for optional inputs on greedy variadic sockets

- mirrors previous behavior

* fix format

* wip: warn for ambiguous running order

* wip: alternative warning

* fix license header

* make code more readable

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* Introduce content tracing to a behavioral test

* Fixing linting

* Remove debug print statements

* Fix tracer tests

* remove print

* test: test for component inputs

* test: remove testing for run order

* chore: update component checks from experimental

* chore: update pipeline and base from experimental

* refactor: remove unused method

* refactor: remove unused method

* refactor: outdated comment

* refactor: inputs state is updated as side effect

- to prepare for AsyncPipeline implementation

* format

* test: add file conversion test

* format

* fix: original implementation deepcopies outputs

* lint

* fix: from_dict was updated

* fix: format

* fix: test

* test: add test for thread safety

* remove unused imports

* format

* test: FIFOPriorityQueue

* chore: add release note

* fix: resolve merge conflict with mermaid changes

* fix: format

* fix: remove unused import

* refactor: rename to avoid accidental conflicts

* chore: remove unused inputs, add missing license header

* chore: extend release notes

* Update releasenotes/notes/fix-pipeline-run-2fefeafc705a6d91.yaml

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* fix: format

* fix: format

* Update release note

---------

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>
Co-authored-by: David S. Batista <dsbatista@gmail.com>
											
										
										
											2025-02-06 15:19:47 +01:00
+								        return component_inputs
-												refactor: Pipeline.run() (#8019)

* Move utility functions from _enqueue_next_runnable_component (#7895)

* Isolate logic to check if we're stuck in a loop

* Simplify for else

* Add missing return in docstring

* Emit warning when stuck in a loop

* Fix docstring

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>

* Add utility function to move Components in queues

* Add function to find next Component to run

* Comment update

* Add missing break in loop

* Make _add_missing_input_defaults less error prone and add tests

* Fix tests

* Update docstring

* Simplify enqueue logic

* Remove unused _enqueue_next_runnable_component function

* Add method to find Component with lazy variadic input or all inputs with defaults

* Simplify _find_next_runnable_lazy_variadic_or_default_component

* Remove unnecessary type ignore

* Split _dequeue_components_that_received_no_input into separate functions

* Fix linting

* Simplify variadic check when running Component

* Simplify code

* Reorganize functions used by Pipeline.run

* Rename variables used in Pipeline.run() for clarity

* Add comment clarifying last_waiting_queue and before_last_waiting_queue

* Add functions to easily update waiting_queue

---------

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>
											
										
										
											2024-07-12 10:35:23 +02:00
-												fix: reduce number of edge cases where lazy variadic components wait for inputs that can't arrive anymore (#8907)

* wip

* fix: running order with lazy variadic components

* fix: tests

* format

* comment

* fix: alternative approach to fixing running order

* unused imports

* revert fix

* remove unneeded return

* remove data based approach to tie breaking

* release note

* trailing spaces

* newline eof

* unused import

* add more explanations to release note
											
										
										
											2025-02-24 16:17:17 +01:00
+								    def _tiebreak_waiting_components(
 								        self,
 								        component_name: str,
 								        priority: ComponentPriority,
 								        priority_queue: FIFOPriorityQueue,
 								        topological_sort: Union[Dict[str, int], None],
-												chore: Make the Haystack core "type complete" (#9438)

* chore: Make the Haystack core "type complete"

For libraries with a `py.typed` marker, it is [recommended][1] to
make all public interfaces "type complete", i.e. to explicitly
annotate all function parameters and return types. Doing so has the
following benefits:

- It maximizes the type information available to users and IDEs.
- It ensures that the argument and return types are the intended ones.
- It sidesteps differences in type inference between the different
  type checker implementations.

This change makes a first step towards type completeness by enabling
the Mypy `disallow_incomplete_defs` for the core modules (excluding
`haystack.components.*` and `haystack.testing.*`) and fixing the
resulting errors.

[1]: https://typing.python.org/en/latest/guides/libraries.html#how-much-of-my-library-needs-types

* chore: Add `python_version = 3.9` to Mypy config

This catches type constructs that are only supported in later Python
versions.

* Remove unused import

* try to fix linting

---------

Co-authored-by: Stefano Fiorucci <stefanofiorucci@gmail.com>
											
										
										
											2025-05-26 11:00:22 +02:00
+								    ) -> Tuple[str, Union[Dict[str, int], None]]:
-												fix: reduce number of edge cases where lazy variadic components wait for inputs that can't arrive anymore (#8907)

* wip

* fix: running order with lazy variadic components

* fix: tests

* format

* comment

* fix: alternative approach to fixing running order

* unused imports

* revert fix

* remove unneeded return

* remove data based approach to tie breaking

* release note

* trailing spaces

* newline eof

* unused import

* add more explanations to release note
											
										
										
											2025-02-24 16:17:17 +01:00
+								        """
 								        Decides which component to run when multiple components are waiting for inputs with the same priority.
 								        :param component_name: The name of the component.
 								        :param priority: Priority of the component.
 								        :param priority_queue: Priority queue of component names.
 								        :param topological_sort: Cached topological sort of all components in the pipeline.
 								        """
 								        components_with_same_priority = [component_name]
 								        while len(priority_queue) > 0:
 								            next_priority, next_component_name = priority_queue.peek()
 								            if next_priority == priority:
 								                priority_queue.pop()  # actually remove the component
 								                components_with_same_priority.append(next_component_name)
 								            else:
 								                break
 								        if len(components_with_same_priority) > 1:
 								            if topological_sort is None:
 								                if networkx.is_directed_acyclic_graph(self.graph):
 								                    topological_sort = networkx.lexicographical_topological_sort(self.graph)
 								                    topological_sort = {node: idx for idx, node in enumerate(topological_sort)}
 								                else:
 								                    condensed = networkx.condensation(self.graph)
 								                    condensed_sorted = {node: idx for idx, node in enumerate(networkx.topological_sort(condensed))}
 								                    topological_sort = {
 								                        component_name: condensed_sorted[node]
 								                        for component_name, node in condensed.graph["mapping"].items()
 								                    }
 								            components_with_same_priority = sorted(
 								                components_with_same_priority, key=lambda comp_name: (topological_sort[comp_name], comp_name.lower())
 								            )
 								            component_name = components_with_same_priority[0]
 								        return component_name, topological_sort
-												fix: pipeline run bugs in cyclic and acyclic pipelines (#8707)

* add component checks

* pipeline should run deterministically

* add FIFOQueue

* add agent tests

* add order dependent tests

* run new tests

* remove code that is not needed

* test: intermediate from cycle outputs are available outside cycle

* add tests for component checks (Claude)

* adapt tests for component checks (o1 review)

* chore: format

* remove tests that aren't needed anymore

* add _calculate_priority tests

* revert accidental change in pyproject.toml

* test format conversion

* adapt to naming convention

* chore: proper docstrings and type hints for PQ

* format

* add more unit tests

* rm unneeded comments

* test input consumption

* lint

* fix: docstrings

* lint

* format

* format

* fix license header

* fix license header

* add component run tests

* fix: pass correct input format to tracing

* fix types

* format

* format

* types

* add defaults from Socket instead of signature

- otherwise components with dynamic inputs would fail

* fix test names

* still wait for optional inputs on greedy variadic sockets

- mirrors previous behavior

* fix format

* wip: warn for ambiguous running order

* wip: alternative warning

* fix license header

* make code more readable

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* Introduce content tracing to a behavioral test

* Fixing linting

* Remove debug print statements

* Fix tracer tests

* remove print

* test: test for component inputs

* test: remove testing for run order

* chore: update component checks from experimental

* chore: update pipeline and base from experimental

* refactor: remove unused method

* refactor: remove unused method

* refactor: outdated comment

* refactor: inputs state is updated as side effect

- to prepare for AsyncPipeline implementation

* format

* test: add file conversion test

* format

* fix: original implementation deepcopies outputs

* lint

* fix: from_dict was updated

* fix: format

* fix: test

* test: add test for thread safety

* remove unused imports

* format

* test: FIFOPriorityQueue

* chore: add release note

* fix: resolve merge conflict with mermaid changes

* fix: format

* fix: remove unused import

* refactor: rename to avoid accidental conflicts

* chore: remove unused inputs, add missing license header

* chore: extend release notes

* Update releasenotes/notes/fix-pipeline-run-2fefeafc705a6d91.yaml

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* fix: format

* fix: format

* Update release note

---------

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>
Co-authored-by: David S. Batista <dsbatista@gmail.com>
											
										
										
											2025-02-06 15:19:47 +01:00
+								    @staticmethod
 								    def _write_component_outputs(
-												fix: reduce number of edge cases where lazy variadic components wait for inputs that can't arrive anymore (#8907)

* wip

* fix: running order with lazy variadic components

* fix: tests

* format

* comment

* fix: alternative approach to fixing running order

* unused imports

* revert fix

* remove unneeded return

* remove data based approach to tie breaking

* release note

* trailing spaces

* newline eof

* unused import

* add more explanations to release note
											
										
										
											2025-02-24 16:17:17 +01:00
+								        component_name: str,
-												feat: Raise warning if the pipeline is unable to continue running due to a blocked component (#9569)

* First pass at alerting a user that the pipeline is blocked

* Change approach and add change to async pipeline

* Fix check in run_async

* Another fix

* Somehow also fixed the max_runs_per_component

* Align sync run and async run component

* Update output type of component outputs to Mapping to align with our protocol

* linting

* ruff

* Make it a core test

* Add reno

* Some refactoring

* Linting

* Fix mypy

* Converting to warning

* Small changes

* Update release note

* More cleanup

* PR comment

* PR comments
											
										
										
											2025-07-15 16:02:39 +02:00
+								        component_outputs: Mapping[str, Any],
-												fix: reduce number of edge cases where lazy variadic components wait for inputs that can't arrive anymore (#8907)

* wip

* fix: running order with lazy variadic components

* fix: tests

* format

* comment

* fix: alternative approach to fixing running order

* unused imports

* revert fix

* remove unneeded return

* remove data based approach to tie breaking

* release note

* trailing spaces

* newline eof

* unused import

* add more explanations to release note
											
										
										
											2025-02-24 16:17:17 +01:00
+								        inputs: Dict[str, Any],
 								        receivers: List[Tuple],
 								        include_outputs_from: Set[str],
-												feat: Raise warning if the pipeline is unable to continue running due to a blocked component (#9569)

* First pass at alerting a user that the pipeline is blocked

* Change approach and add change to async pipeline

* Fix check in run_async

* Another fix

* Somehow also fixed the max_runs_per_component

* Align sync run and async run component

* Update output type of component outputs to Mapping to align with our protocol

* linting

* ruff

* Make it a core test

* Add reno

* Some refactoring

* Linting

* Fix mypy

* Converting to warning

* Small changes

* Update release note

* More cleanup

* PR comment

* PR comments
											
										
										
											2025-07-15 16:02:39 +02:00
+								    ) -> Mapping[str, Any]:
-												fix: pipeline run bugs in cyclic and acyclic pipelines (#8707)

* add component checks

* pipeline should run deterministically

* add FIFOQueue

* add agent tests

* add order dependent tests

* run new tests

* remove code that is not needed

* test: intermediate from cycle outputs are available outside cycle

* add tests for component checks (Claude)

* adapt tests for component checks (o1 review)

* chore: format

* remove tests that aren't needed anymore

* add _calculate_priority tests

* revert accidental change in pyproject.toml

* test format conversion

* adapt to naming convention

* chore: proper docstrings and type hints for PQ

* format

* add more unit tests

* rm unneeded comments

* test input consumption

* lint

* fix: docstrings

* lint

* format

* format

* fix license header

* fix license header

* add component run tests

* fix: pass correct input format to tracing

* fix types

* format

* format

* types

* add defaults from Socket instead of signature

- otherwise components with dynamic inputs would fail

* fix test names

* still wait for optional inputs on greedy variadic sockets

- mirrors previous behavior

* fix format

* wip: warn for ambiguous running order

* wip: alternative warning

* fix license header

* make code more readable

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* Introduce content tracing to a behavioral test

* Fixing linting

* Remove debug print statements

* Fix tracer tests

* remove print

* test: test for component inputs

* test: remove testing for run order

* chore: update component checks from experimental

* chore: update pipeline and base from experimental

* refactor: remove unused method

* refactor: remove unused method

* refactor: outdated comment

* refactor: inputs state is updated as side effect

- to prepare for AsyncPipeline implementation

* format

* test: add file conversion test

* format

* fix: original implementation deepcopies outputs

* lint

* fix: from_dict was updated

* fix: format

* fix: test

* test: add test for thread safety

* remove unused imports

* format

* test: FIFOPriorityQueue

* chore: add release note

* fix: resolve merge conflict with mermaid changes

* fix: format

* fix: remove unused import

* refactor: rename to avoid accidental conflicts

* chore: remove unused inputs, add missing license header

* chore: extend release notes

* Update releasenotes/notes/fix-pipeline-run-2fefeafc705a6d91.yaml

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* fix: format

* fix: format

* Update release note

---------

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>
Co-authored-by: David S. Batista <dsbatista@gmail.com>
											
										
										
											2025-02-06 15:19:47 +01:00
+								        """
 								        Distributes the outputs of a component to the input sockets that it is connected to.
 								        :param component_name: The name of the component.
 								        :param component_outputs: The outputs of the component.
 								        :param inputs: The current global input state.
-												fix: reduce number of edge cases where lazy variadic components wait for inputs that can't arrive anymore (#8907)

* wip

* fix: running order with lazy variadic components

* fix: tests

* format

* comment

* fix: alternative approach to fixing running order

* unused imports

* revert fix

* remove unneeded return

* remove data based approach to tie breaking

* release note

* trailing spaces

* newline eof

* unused import

* add more explanations to release note
											
										
										
											2025-02-24 16:17:17 +01:00
+								        :param receivers: List of components that receive inputs from the component.
-												fix: pipeline run bugs in cyclic and acyclic pipelines (#8707)

* add component checks

* pipeline should run deterministically

* add FIFOQueue

* add agent tests

* add order dependent tests

* run new tests

* remove code that is not needed

* test: intermediate from cycle outputs are available outside cycle

* add tests for component checks (Claude)

* adapt tests for component checks (o1 review)

* chore: format

* remove tests that aren't needed anymore

* add _calculate_priority tests

* revert accidental change in pyproject.toml

* test format conversion

* adapt to naming convention

* chore: proper docstrings and type hints for PQ

* format

* add more unit tests

* rm unneeded comments

* test input consumption

* lint

* fix: docstrings

* lint

* format

* format

* fix license header

* fix license header

* add component run tests

* fix: pass correct input format to tracing

* fix types

* format

* format

* types

* add defaults from Socket instead of signature

- otherwise components with dynamic inputs would fail

* fix test names

* still wait for optional inputs on greedy variadic sockets

- mirrors previous behavior

* fix format

* wip: warn for ambiguous running order

* wip: alternative warning

* fix license header

* make code more readable

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* Introduce content tracing to a behavioral test

* Fixing linting

* Remove debug print statements

* Fix tracer tests

* remove print

* test: test for component inputs

* test: remove testing for run order

* chore: update component checks from experimental

* chore: update pipeline and base from experimental

* refactor: remove unused method

* refactor: remove unused method

* refactor: outdated comment

* refactor: inputs state is updated as side effect

- to prepare for AsyncPipeline implementation

* format

* test: add file conversion test

* format

* fix: original implementation deepcopies outputs

* lint

* fix: from_dict was updated

* fix: format

* fix: test

* test: add test for thread safety

* remove unused imports

* format

* test: FIFOPriorityQueue

* chore: add release note

* fix: resolve merge conflict with mermaid changes

* fix: format

* fix: remove unused import

* refactor: rename to avoid accidental conflicts

* chore: remove unused inputs, add missing license header

* chore: extend release notes

* Update releasenotes/notes/fix-pipeline-run-2fefeafc705a6d91.yaml

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* fix: format

* fix: format

* Update release note

---------

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>
Co-authored-by: David S. Batista <dsbatista@gmail.com>
											
										
										
											2025-02-06 15:19:47 +01:00
+								        :param include_outputs_from: List of component names that should always return an output from the pipeline.
 								        """
 								        for receiver_name, sender_socket, receiver_socket in receivers:
 								            # We either get the value that was produced by the actor or we use the _NO_OUTPUT_PRODUCED class to indicate
 								            # that the sender did not produce an output for this socket.
-												feat: draw/show SuperComponents in detail, expand it and show it's internal components in the visualisation diagram (#9389)

* initial import

* small fixes

* adding tests

* adding tests

* refactoring merge graphs

* updating tests

* docstrings

* adding release notes

* adding SuperComponent name to extended components

* adding colours and legend to different super components

* adding missed docstring parameter

* fixing tests and type checking

* Update haystack/core/pipeline/base.py

Co-authored-by: Sebastian Husch Lee <10526848+sjrl@users.noreply.github.com>

* forcing keyword arguments for draw() and show()

* adding wrapper function and a deprecation warning

* adding pylint disable - this will be removed soon

* wip

* adding a decorator function to test if another function is being called with positional arguments

* adding a decorator function to test if another function is being called with positional arguments

---------

Co-authored-by: Sebastian Husch Lee <10526848+sjrl@users.noreply.github.com>
											
										
										
											2025-05-23 09:21:44 +01:00
+								            # This allows us to track if a predecessor already ran but did not produce an output.
-												fix: pipeline run bugs in cyclic and acyclic pipelines (#8707)

* add component checks

* pipeline should run deterministically

* add FIFOQueue

* add agent tests

* add order dependent tests

* run new tests

* remove code that is not needed

* test: intermediate from cycle outputs are available outside cycle

* add tests for component checks (Claude)

* adapt tests for component checks (o1 review)

* chore: format

* remove tests that aren't needed anymore

* add _calculate_priority tests

* revert accidental change in pyproject.toml

* test format conversion

* adapt to naming convention

* chore: proper docstrings and type hints for PQ

* format

* add more unit tests

* rm unneeded comments

* test input consumption

* lint

* fix: docstrings

* lint

* format

* format

* fix license header

* fix license header

* add component run tests

* fix: pass correct input format to tracing

* fix types

* format

* format

* types

* add defaults from Socket instead of signature

- otherwise components with dynamic inputs would fail

* fix test names

* still wait for optional inputs on greedy variadic sockets

- mirrors previous behavior

* fix format

* wip: warn for ambiguous running order

* wip: alternative warning

* fix license header

* make code more readable

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* Introduce content tracing to a behavioral test

* Fixing linting

* Remove debug print statements

* Fix tracer tests

* remove print

* test: test for component inputs

* test: remove testing for run order

* chore: update component checks from experimental

* chore: update pipeline and base from experimental

* refactor: remove unused method

* refactor: remove unused method

* refactor: outdated comment

* refactor: inputs state is updated as side effect

- to prepare for AsyncPipeline implementation

* format

* test: add file conversion test

* format

* fix: original implementation deepcopies outputs

* lint

* fix: from_dict was updated

* fix: format

* fix: test

* test: add test for thread safety

* remove unused imports

* format

* test: FIFOPriorityQueue

* chore: add release note

* fix: resolve merge conflict with mermaid changes

* fix: format

* fix: remove unused import

* refactor: rename to avoid accidental conflicts

* chore: remove unused inputs, add missing license header

* chore: extend release notes

* Update releasenotes/notes/fix-pipeline-run-2fefeafc705a6d91.yaml

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* fix: format

* fix: format

* Update release note

---------

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>
Co-authored-by: David S. Batista <dsbatista@gmail.com>
											
										
										
											2025-02-06 15:19:47 +01:00
+								            value = component_outputs.get(sender_socket.name, _NO_OUTPUT_PRODUCED)
-												fix: reduce number of edge cases where lazy variadic components wait for inputs that can't arrive anymore (#8907)

* wip

* fix: running order with lazy variadic components

* fix: tests

* format

* comment

* fix: alternative approach to fixing running order

* unused imports

* revert fix

* remove unneeded return

* remove data based approach to tie breaking

* release note

* trailing spaces

* newline eof

* unused import

* add more explanations to release note
											
										
										
											2025-02-24 16:17:17 +01:00
-												fix: pipeline run bugs in cyclic and acyclic pipelines (#8707)

* add component checks

* pipeline should run deterministically

* add FIFOQueue

* add agent tests

* add order dependent tests

* run new tests

* remove code that is not needed

* test: intermediate from cycle outputs are available outside cycle

* add tests for component checks (Claude)

* adapt tests for component checks (o1 review)

* chore: format

* remove tests that aren't needed anymore

* add _calculate_priority tests

* revert accidental change in pyproject.toml

* test format conversion

* adapt to naming convention

* chore: proper docstrings and type hints for PQ

* format

* add more unit tests

* rm unneeded comments

* test input consumption

* lint

* fix: docstrings

* lint

* format

* format

* fix license header

* fix license header

* add component run tests

* fix: pass correct input format to tracing

* fix types

* format

* format

* types

* add defaults from Socket instead of signature

- otherwise components with dynamic inputs would fail

* fix test names

* still wait for optional inputs on greedy variadic sockets

- mirrors previous behavior

* fix format

* wip: warn for ambiguous running order

* wip: alternative warning

* fix license header

* make code more readable

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* Introduce content tracing to a behavioral test

* Fixing linting

* Remove debug print statements

* Fix tracer tests

* remove print

* test: test for component inputs

* test: remove testing for run order

* chore: update component checks from experimental

* chore: update pipeline and base from experimental

* refactor: remove unused method

* refactor: remove unused method

* refactor: outdated comment

* refactor: inputs state is updated as side effect

- to prepare for AsyncPipeline implementation

* format

* test: add file conversion test

* format

* fix: original implementation deepcopies outputs

* lint

* fix: from_dict was updated

* fix: format

* fix: test

* test: add test for thread safety

* remove unused imports

* format

* test: FIFOPriorityQueue

* chore: add release note

* fix: resolve merge conflict with mermaid changes

* fix: format

* fix: remove unused import

* refactor: rename to avoid accidental conflicts

* chore: remove unused inputs, add missing license header

* chore: extend release notes

* Update releasenotes/notes/fix-pipeline-run-2fefeafc705a6d91.yaml

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* fix: format

* fix: format

* Update release note

---------

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>
Co-authored-by: David S. Batista <dsbatista@gmail.com>
											
										
										
											2025-02-06 15:19:47 +01:00
+								            if receiver_name not in inputs:
 								                inputs[receiver_name] = {}
-												fix: only overwrite existing socket inputs when we provide a new value (#8940)

* fix: only overwrite existing socket inputs when we provide a new value

* chore: add release notes

* Apply suggestions from code review

---------

Co-authored-by: Julian Risch <julian.risch@deepset.ai>
											
										
										
											2025-02-27 10:13:41 +01:00
+								            if is_socket_lazy_variadic(receiver_socket):
 								                # If the receiver socket is lazy variadic, we append the new input.
 								                # Lazy variadic sockets can collect multiple inputs.
 								                _write_to_lazy_variadic_socket(
 								                    inputs=inputs,
 								                    receiver_name=receiver_name,
 								                    receiver_socket_name=receiver_socket.name,
 								                    component_name=component_name,
 								                    value=value,
 								                )
-												fix: pipeline run bugs in cyclic and acyclic pipelines (#8707)

* add component checks

* pipeline should run deterministically

* add FIFOQueue

* add agent tests

* add order dependent tests

* run new tests

* remove code that is not needed

* test: intermediate from cycle outputs are available outside cycle

* add tests for component checks (Claude)

* adapt tests for component checks (o1 review)

* chore: format

* remove tests that aren't needed anymore

* add _calculate_priority tests

* revert accidental change in pyproject.toml

* test format conversion

* adapt to naming convention

* chore: proper docstrings and type hints for PQ

* format

* add more unit tests

* rm unneeded comments

* test input consumption

* lint

* fix: docstrings

* lint

* format

* format

* fix license header

* fix license header

* add component run tests

* fix: pass correct input format to tracing

* fix types

* format

* format

* types

* add defaults from Socket instead of signature

- otherwise components with dynamic inputs would fail

* fix test names

* still wait for optional inputs on greedy variadic sockets

- mirrors previous behavior

* fix format

* wip: warn for ambiguous running order

* wip: alternative warning

* fix license header

* make code more readable

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* Introduce content tracing to a behavioral test

* Fixing linting

* Remove debug print statements

* Fix tracer tests

* remove print

* test: test for component inputs

* test: remove testing for run order

* chore: update component checks from experimental

* chore: update pipeline and base from experimental

* refactor: remove unused method

* refactor: remove unused method

* refactor: outdated comment

* refactor: inputs state is updated as side effect

- to prepare for AsyncPipeline implementation

* format

* test: add file conversion test

* format

* fix: original implementation deepcopies outputs

* lint

* fix: from_dict was updated

* fix: format

* fix: test

* test: add test for thread safety

* remove unused imports

* format

* test: FIFOPriorityQueue

* chore: add release note

* fix: resolve merge conflict with mermaid changes

* fix: format

* fix: remove unused import

* refactor: rename to avoid accidental conflicts

* chore: remove unused inputs, add missing license header

* chore: extend release notes

* Update releasenotes/notes/fix-pipeline-run-2fefeafc705a6d91.yaml

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* fix: format

* fix: format

* Update release note

---------

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>
Co-authored-by: David S. Batista <dsbatista@gmail.com>
											
										
										
											2025-02-06 15:19:47 +01:00
+								            else:
-												fix: only overwrite existing socket inputs when we provide a new value (#8940)

* fix: only overwrite existing socket inputs when we provide a new value

* chore: add release notes

* Apply suggestions from code review

---------

Co-authored-by: Julian Risch <julian.risch@deepset.ai>
											
										
										
											2025-02-27 10:13:41 +01:00
+								                # If the receiver socket is not lazy variadic, it is greedy variadic or non-variadic.
 								                # We overwrite with the new input if it's not _NO_OUTPUT_PRODUCED or if the current value is None.
 								                _write_to_standard_socket(
 								                    inputs=inputs,
 								                    receiver_name=receiver_name,
 								                    receiver_socket_name=receiver_socket.name,
 								                    component_name=component_name,
 								                    value=value,
 								                )
-												refactor: Pipeline.run() (#8019)

* Move utility functions from _enqueue_next_runnable_component (#7895)

* Isolate logic to check if we're stuck in a loop

* Simplify for else

* Add missing return in docstring

* Emit warning when stuck in a loop

* Fix docstring

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>

* Add utility function to move Components in queues

* Add function to find next Component to run

* Comment update

* Add missing break in loop

* Make _add_missing_input_defaults less error prone and add tests

* Fix tests

* Update docstring

* Simplify enqueue logic

* Remove unused _enqueue_next_runnable_component function

* Add method to find Component with lazy variadic input or all inputs with defaults

* Simplify _find_next_runnable_lazy_variadic_or_default_component

* Remove unnecessary type ignore

* Split _dequeue_components_that_received_no_input into separate functions

* Fix linting

* Simplify variadic check when running Component

* Simplify code

* Reorganize functions used by Pipeline.run

* Rename variables used in Pipeline.run() for clarity

* Add comment clarifying last_waiting_queue and before_last_waiting_queue

* Add functions to easily update waiting_queue

---------

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>
											
										
										
											2024-07-12 10:35:23 +02:00
-												fix: pipeline run bugs in cyclic and acyclic pipelines (#8707)

* add component checks

* pipeline should run deterministically

* add FIFOQueue

* add agent tests

* add order dependent tests

* run new tests

* remove code that is not needed

* test: intermediate from cycle outputs are available outside cycle

* add tests for component checks (Claude)

* adapt tests for component checks (o1 review)

* chore: format

* remove tests that aren't needed anymore

* add _calculate_priority tests

* revert accidental change in pyproject.toml

* test format conversion

* adapt to naming convention

* chore: proper docstrings and type hints for PQ

* format

* add more unit tests

* rm unneeded comments

* test input consumption

* lint

* fix: docstrings

* lint

* format

* format

* fix license header

* fix license header

* add component run tests

* fix: pass correct input format to tracing

* fix types

* format

* format

* types

* add defaults from Socket instead of signature

- otherwise components with dynamic inputs would fail

* fix test names

* still wait for optional inputs on greedy variadic sockets

- mirrors previous behavior

* fix format

* wip: warn for ambiguous running order

* wip: alternative warning

* fix license header

* make code more readable

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* Introduce content tracing to a behavioral test

* Fixing linting

* Remove debug print statements

* Fix tracer tests

* remove print

* test: test for component inputs

* test: remove testing for run order

* chore: update component checks from experimental

* chore: update pipeline and base from experimental

* refactor: remove unused method

* refactor: remove unused method

* refactor: outdated comment

* refactor: inputs state is updated as side effect

- to prepare for AsyncPipeline implementation

* format

* test: add file conversion test

* format

* fix: original implementation deepcopies outputs

* lint

* fix: from_dict was updated

* fix: format

* fix: test

* test: add test for thread safety

* remove unused imports

* format

* test: FIFOPriorityQueue

* chore: add release note

* fix: resolve merge conflict with mermaid changes

* fix: format

* fix: remove unused import

* refactor: rename to avoid accidental conflicts

* chore: remove unused inputs, add missing license header

* chore: extend release notes

* Update releasenotes/notes/fix-pipeline-run-2fefeafc705a6d91.yaml

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* fix: format

* fix: format

* Update release note

---------

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>
Co-authored-by: David S. Batista <dsbatista@gmail.com>
											
										
										
											2025-02-06 15:19:47 +01:00
+								        # If we want to include all outputs from this actor in the final outputs, we don't need to prune any consumed
 								        # outputs
 								        if component_name in include_outputs_from:
 								            return component_outputs
-												refactor: Pipeline.run() (#8019)

* Move utility functions from _enqueue_next_runnable_component (#7895)

* Isolate logic to check if we're stuck in a loop

* Simplify for else

* Add missing return in docstring

* Emit warning when stuck in a loop

* Fix docstring

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>

* Add utility function to move Components in queues

* Add function to find next Component to run

* Comment update

* Add missing break in loop

* Make _add_missing_input_defaults less error prone and add tests

* Fix tests

* Update docstring

* Simplify enqueue logic

* Remove unused _enqueue_next_runnable_component function

* Add method to find Component with lazy variadic input or all inputs with defaults

* Simplify _find_next_runnable_lazy_variadic_or_default_component

* Remove unnecessary type ignore

* Split _dequeue_components_that_received_no_input into separate functions

* Fix linting

* Simplify variadic check when running Component

* Simplify code

* Reorganize functions used by Pipeline.run

* Rename variables used in Pipeline.run() for clarity

* Add comment clarifying last_waiting_queue and before_last_waiting_queue

* Add functions to easily update waiting_queue

---------

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>
											
										
										
											2024-07-12 10:35:23 +02:00
-												fix: pipeline run bugs in cyclic and acyclic pipelines (#8707)

* add component checks

* pipeline should run deterministically

* add FIFOQueue

* add agent tests

* add order dependent tests

* run new tests

* remove code that is not needed

* test: intermediate from cycle outputs are available outside cycle

* add tests for component checks (Claude)

* adapt tests for component checks (o1 review)

* chore: format

* remove tests that aren't needed anymore

* add _calculate_priority tests

* revert accidental change in pyproject.toml

* test format conversion

* adapt to naming convention

* chore: proper docstrings and type hints for PQ

* format

* add more unit tests

* rm unneeded comments

* test input consumption

* lint

* fix: docstrings

* lint

* format

* format

* fix license header

* fix license header

* add component run tests

* fix: pass correct input format to tracing

* fix types

* format

* format

* types

* add defaults from Socket instead of signature

- otherwise components with dynamic inputs would fail

* fix test names

* still wait for optional inputs on greedy variadic sockets

- mirrors previous behavior

* fix format

* wip: warn for ambiguous running order

* wip: alternative warning

* fix license header

* make code more readable

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* Introduce content tracing to a behavioral test

* Fixing linting

* Remove debug print statements

* Fix tracer tests

* remove print

* test: test for component inputs

* test: remove testing for run order

* chore: update component checks from experimental

* chore: update pipeline and base from experimental

* refactor: remove unused method

* refactor: remove unused method

* refactor: outdated comment

* refactor: inputs state is updated as side effect

- to prepare for AsyncPipeline implementation

* format

* test: add file conversion test

* format

* fix: original implementation deepcopies outputs

* lint

* fix: from_dict was updated

* fix: format

* fix: test

* test: add test for thread safety

* remove unused imports

* format

* test: FIFOPriorityQueue

* chore: add release note

* fix: resolve merge conflict with mermaid changes

* fix: format

* fix: remove unused import

* refactor: rename to avoid accidental conflicts

* chore: remove unused inputs, add missing license header

* chore: extend release notes

* Update releasenotes/notes/fix-pipeline-run-2fefeafc705a6d91.yaml

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* fix: format

* fix: format

* Update release note

---------

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>
Co-authored-by: David S. Batista <dsbatista@gmail.com>
											
										
										
											2025-02-06 15:19:47 +01:00
+								        # We prune outputs that were consumed by any receiving sockets.
 								        # All remaining outputs will be added to the final outputs of the pipeline.
 								        consumed_outputs = {sender_socket.name for _, sender_socket, __ in receivers}
 								        pruned_outputs = {key: value for key, value in component_outputs.items() if key not in consumed_outputs}
-												refactor: Pipeline.run() (#8019)

* Move utility functions from _enqueue_next_runnable_component (#7895)

* Isolate logic to check if we're stuck in a loop

* Simplify for else

* Add missing return in docstring

* Emit warning when stuck in a loop

* Fix docstring

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>

* Add utility function to move Components in queues

* Add function to find next Component to run

* Comment update

* Add missing break in loop

* Make _add_missing_input_defaults less error prone and add tests

* Fix tests

* Update docstring

* Simplify enqueue logic

* Remove unused _enqueue_next_runnable_component function

* Add method to find Component with lazy variadic input or all inputs with defaults

* Simplify _find_next_runnable_lazy_variadic_or_default_component

* Remove unnecessary type ignore

* Split _dequeue_components_that_received_no_input into separate functions

* Fix linting

* Simplify variadic check when running Component

* Simplify code

* Reorganize functions used by Pipeline.run

* Rename variables used in Pipeline.run() for clarity

* Add comment clarifying last_waiting_queue and before_last_waiting_queue

* Add functions to easily update waiting_queue

---------

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>
											
										
										
											2024-07-12 10:35:23 +02:00
-												fix: pipeline run bugs in cyclic and acyclic pipelines (#8707)

* add component checks

* pipeline should run deterministically

* add FIFOQueue

* add agent tests

* add order dependent tests

* run new tests

* remove code that is not needed

* test: intermediate from cycle outputs are available outside cycle

* add tests for component checks (Claude)

* adapt tests for component checks (o1 review)

* chore: format

* remove tests that aren't needed anymore

* add _calculate_priority tests

* revert accidental change in pyproject.toml

* test format conversion

* adapt to naming convention

* chore: proper docstrings and type hints for PQ

* format

* add more unit tests

* rm unneeded comments

* test input consumption

* lint

* fix: docstrings

* lint

* format

* format

* fix license header

* fix license header

* add component run tests

* fix: pass correct input format to tracing

* fix types

* format

* format

* types

* add defaults from Socket instead of signature

- otherwise components with dynamic inputs would fail

* fix test names

* still wait for optional inputs on greedy variadic sockets

- mirrors previous behavior

* fix format

* wip: warn for ambiguous running order

* wip: alternative warning

* fix license header

* make code more readable

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* Introduce content tracing to a behavioral test

* Fixing linting

* Remove debug print statements

* Fix tracer tests

* remove print

* test: test for component inputs

* test: remove testing for run order

* chore: update component checks from experimental

* chore: update pipeline and base from experimental

* refactor: remove unused method

* refactor: remove unused method

* refactor: outdated comment

* refactor: inputs state is updated as side effect

- to prepare for AsyncPipeline implementation

* format

* test: add file conversion test

* format

* fix: original implementation deepcopies outputs

* lint

* fix: from_dict was updated

* fix: format

* fix: test

* test: add test for thread safety

* remove unused imports

* format

* test: FIFOPriorityQueue

* chore: add release note

* fix: resolve merge conflict with mermaid changes

* fix: format

* fix: remove unused import

* refactor: rename to avoid accidental conflicts

* chore: remove unused inputs, add missing license header

* chore: extend release notes

* Update releasenotes/notes/fix-pipeline-run-2fefeafc705a6d91.yaml

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* fix: format

* fix: format

* Update release note

---------

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>
Co-authored-by: David S. Batista <dsbatista@gmail.com>
											
										
										
											2025-02-06 15:19:47 +01:00
+								        return pruned_outputs
-												refactor: Pipeline.run() (#8019)

* Move utility functions from _enqueue_next_runnable_component (#7895)

* Isolate logic to check if we're stuck in a loop

* Simplify for else

* Add missing return in docstring

* Emit warning when stuck in a loop

* Fix docstring

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>

* Add utility function to move Components in queues

* Add function to find next Component to run

* Comment update

* Add missing break in loop

* Make _add_missing_input_defaults less error prone and add tests

* Fix tests

* Update docstring

* Simplify enqueue logic

* Remove unused _enqueue_next_runnable_component function

* Add method to find Component with lazy variadic input or all inputs with defaults

* Simplify _find_next_runnable_lazy_variadic_or_default_component

* Remove unnecessary type ignore

* Split _dequeue_components_that_received_no_input into separate functions

* Fix linting

* Simplify variadic check when running Component

* Simplify code

* Reorganize functions used by Pipeline.run

* Rename variables used in Pipeline.run() for clarity

* Add comment clarifying last_waiting_queue and before_last_waiting_queue

* Add functions to easily update waiting_queue

---------

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>
											
										
										
											2024-07-12 10:35:23 +02:00
-												fix: pipeline run bugs in cyclic and acyclic pipelines (#8707)

* add component checks

* pipeline should run deterministically

* add FIFOQueue

* add agent tests

* add order dependent tests

* run new tests

* remove code that is not needed

* test: intermediate from cycle outputs are available outside cycle

* add tests for component checks (Claude)

* adapt tests for component checks (o1 review)

* chore: format

* remove tests that aren't needed anymore

* add _calculate_priority tests

* revert accidental change in pyproject.toml

* test format conversion

* adapt to naming convention

* chore: proper docstrings and type hints for PQ

* format

* add more unit tests

* rm unneeded comments

* test input consumption

* lint

* fix: docstrings

* lint

* format

* format

* fix license header

* fix license header

* add component run tests

* fix: pass correct input format to tracing

* fix types

* format

* format

* types

* add defaults from Socket instead of signature

- otherwise components with dynamic inputs would fail

* fix test names

* still wait for optional inputs on greedy variadic sockets

- mirrors previous behavior

* fix format

* wip: warn for ambiguous running order

* wip: alternative warning

* fix license header

* make code more readable

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* Introduce content tracing to a behavioral test

* Fixing linting

* Remove debug print statements

* Fix tracer tests

* remove print

* test: test for component inputs

* test: remove testing for run order

* chore: update component checks from experimental

* chore: update pipeline and base from experimental

* refactor: remove unused method

* refactor: remove unused method

* refactor: outdated comment

* refactor: inputs state is updated as side effect

- to prepare for AsyncPipeline implementation

* format

* test: add file conversion test

* format

* fix: original implementation deepcopies outputs

* lint

* fix: from_dict was updated

* fix: format

* fix: test

* test: add test for thread safety

* remove unused imports

* format

* test: FIFOPriorityQueue

* chore: add release note

* fix: resolve merge conflict with mermaid changes

* fix: format

* fix: remove unused import

* refactor: rename to avoid accidental conflicts

* chore: remove unused inputs, add missing license header

* chore: extend release notes

* Update releasenotes/notes/fix-pipeline-run-2fefeafc705a6d91.yaml

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* fix: format

* fix: format

* Update release note

---------

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>
Co-authored-by: David S. Batista <dsbatista@gmail.com>
											
										
										
											2025-02-06 15:19:47 +01:00
+								    @staticmethod
 								    def _is_queue_stale(priority_queue: FIFOPriorityQueue) -> bool:
 								        """
 								        Checks if the priority queue needs to be recomputed because the priorities might have changed.
-												refactor: Pipeline.run() (#8019)

* Move utility functions from _enqueue_next_runnable_component (#7895)

* Isolate logic to check if we're stuck in a loop

* Simplify for else

* Add missing return in docstring

* Emit warning when stuck in a loop

* Fix docstring

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>

* Add utility function to move Components in queues

* Add function to find next Component to run

* Comment update

* Add missing break in loop

* Make _add_missing_input_defaults less error prone and add tests

* Fix tests

* Update docstring

* Simplify enqueue logic

* Remove unused _enqueue_next_runnable_component function

* Add method to find Component with lazy variadic input or all inputs with defaults

* Simplify _find_next_runnable_lazy_variadic_or_default_component

* Remove unnecessary type ignore

* Split _dequeue_components_that_received_no_input into separate functions

* Fix linting

* Simplify variadic check when running Component

* Simplify code

* Reorganize functions used by Pipeline.run

* Rename variables used in Pipeline.run() for clarity

* Add comment clarifying last_waiting_queue and before_last_waiting_queue

* Add functions to easily update waiting_queue

---------

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>
											
										
										
											2024-07-12 10:35:23 +02:00
-												fix: pipeline run bugs in cyclic and acyclic pipelines (#8707)

* add component checks

* pipeline should run deterministically

* add FIFOQueue

* add agent tests

* add order dependent tests

* run new tests

* remove code that is not needed

* test: intermediate from cycle outputs are available outside cycle

* add tests for component checks (Claude)

* adapt tests for component checks (o1 review)

* chore: format

* remove tests that aren't needed anymore

* add _calculate_priority tests

* revert accidental change in pyproject.toml

* test format conversion

* adapt to naming convention

* chore: proper docstrings and type hints for PQ

* format

* add more unit tests

* rm unneeded comments

* test input consumption

* lint

* fix: docstrings

* lint

* format

* format

* fix license header

* fix license header

* add component run tests

* fix: pass correct input format to tracing

* fix types

* format

* format

* types

* add defaults from Socket instead of signature

- otherwise components with dynamic inputs would fail

* fix test names

* still wait for optional inputs on greedy variadic sockets

- mirrors previous behavior

* fix format

* wip: warn for ambiguous running order

* wip: alternative warning

* fix license header

* make code more readable

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* Introduce content tracing to a behavioral test

* Fixing linting

* Remove debug print statements

* Fix tracer tests

* remove print

* test: test for component inputs

* test: remove testing for run order

* chore: update component checks from experimental

* chore: update pipeline and base from experimental

* refactor: remove unused method

* refactor: remove unused method

* refactor: outdated comment

* refactor: inputs state is updated as side effect

- to prepare for AsyncPipeline implementation

* format

* test: add file conversion test

* format

* fix: original implementation deepcopies outputs

* lint

* fix: from_dict was updated

* fix: format

* fix: test

* test: add test for thread safety

* remove unused imports

* format

* test: FIFOPriorityQueue

* chore: add release note

* fix: resolve merge conflict with mermaid changes

* fix: format

* fix: remove unused import

* refactor: rename to avoid accidental conflicts

* chore: remove unused inputs, add missing license header

* chore: extend release notes

* Update releasenotes/notes/fix-pipeline-run-2fefeafc705a6d91.yaml

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* fix: format

* fix: format

* Update release note

---------

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>
Co-authored-by: David S. Batista <dsbatista@gmail.com>
											
										
										
											2025-02-06 15:19:47 +01:00
+								        :param priority_queue: Priority queue of component names.
 								        """
 								        return len(priority_queue) == 0 or priority_queue.peek()[0] > ComponentPriority.READY
-												refactor: Pipeline.run() (#8019)

* Move utility functions from _enqueue_next_runnable_component (#7895)

* Isolate logic to check if we're stuck in a loop

* Simplify for else

* Add missing return in docstring

* Emit warning when stuck in a loop

* Fix docstring

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>

* Add utility function to move Components in queues

* Add function to find next Component to run

* Comment update

* Add missing break in loop

* Make _add_missing_input_defaults less error prone and add tests

* Fix tests

* Update docstring

* Simplify enqueue logic

* Remove unused _enqueue_next_runnable_component function

* Add method to find Component with lazy variadic input or all inputs with defaults

* Simplify _find_next_runnable_lazy_variadic_or_default_component

* Remove unnecessary type ignore

* Split _dequeue_components_that_received_no_input into separate functions

* Fix linting

* Simplify variadic check when running Component

* Simplify code

* Reorganize functions used by Pipeline.run

* Rename variables used in Pipeline.run() for clarity

* Add comment clarifying last_waiting_queue and before_last_waiting_queue

* Add functions to easily update waiting_queue

---------

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>
											
										
										
											2024-07-12 10:35:23 +02:00
-												fix: pipeline run bugs in cyclic and acyclic pipelines (#8707)

* add component checks

* pipeline should run deterministically

* add FIFOQueue

* add agent tests

* add order dependent tests

* run new tests

* remove code that is not needed

* test: intermediate from cycle outputs are available outside cycle

* add tests for component checks (Claude)

* adapt tests for component checks (o1 review)

* chore: format

* remove tests that aren't needed anymore

* add _calculate_priority tests

* revert accidental change in pyproject.toml

* test format conversion

* adapt to naming convention

* chore: proper docstrings and type hints for PQ

* format

* add more unit tests

* rm unneeded comments

* test input consumption

* lint

* fix: docstrings

* lint

* format

* format

* fix license header

* fix license header

* add component run tests

* fix: pass correct input format to tracing

* fix types

* format

* format

* types

* add defaults from Socket instead of signature

- otherwise components with dynamic inputs would fail

* fix test names

* still wait for optional inputs on greedy variadic sockets

- mirrors previous behavior

* fix format

* wip: warn for ambiguous running order

* wip: alternative warning

* fix license header

* make code more readable

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* Introduce content tracing to a behavioral test

* Fixing linting

* Remove debug print statements

* Fix tracer tests

* remove print

* test: test for component inputs

* test: remove testing for run order

* chore: update component checks from experimental

* chore: update pipeline and base from experimental

* refactor: remove unused method

* refactor: remove unused method

* refactor: outdated comment

* refactor: inputs state is updated as side effect

- to prepare for AsyncPipeline implementation

* format

* test: add file conversion test

* format

* fix: original implementation deepcopies outputs

* lint

* fix: from_dict was updated

* fix: format

* fix: test

* test: add test for thread safety

* remove unused imports

* format

* test: FIFOPriorityQueue

* chore: add release note

* fix: resolve merge conflict with mermaid changes

* fix: format

* fix: remove unused import

* refactor: rename to avoid accidental conflicts

* chore: remove unused inputs, add missing license header

* chore: extend release notes

* Update releasenotes/notes/fix-pipeline-run-2fefeafc705a6d91.yaml

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* fix: format

* fix: format

* Update release note

---------

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>
Co-authored-by: David S. Batista <dsbatista@gmail.com>
											
										
										
											2025-02-06 15:19:47 +01:00
+								    @staticmethod
 								    def validate_pipeline(priority_queue: FIFOPriorityQueue) -> None:
 								        """
 								        Validate the pipeline to check if it is blocked or has no valid entry point.
-												refactor: Pipeline.run() (#8019)

* Move utility functions from _enqueue_next_runnable_component (#7895)

* Isolate logic to check if we're stuck in a loop

* Simplify for else

* Add missing return in docstring

* Emit warning when stuck in a loop

* Fix docstring

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>

* Add utility function to move Components in queues

* Add function to find next Component to run

* Comment update

* Add missing break in loop

* Make _add_missing_input_defaults less error prone and add tests

* Fix tests

* Update docstring

* Simplify enqueue logic

* Remove unused _enqueue_next_runnable_component function

* Add method to find Component with lazy variadic input or all inputs with defaults

* Simplify _find_next_runnable_lazy_variadic_or_default_component

* Remove unnecessary type ignore

* Split _dequeue_components_that_received_no_input into separate functions

* Fix linting

* Simplify variadic check when running Component

* Simplify code

* Reorganize functions used by Pipeline.run

* Rename variables used in Pipeline.run() for clarity

* Add comment clarifying last_waiting_queue and before_last_waiting_queue

* Add functions to easily update waiting_queue

---------

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>
											
										
										
											2024-07-12 10:35:23 +02:00
-												fix: pipeline run bugs in cyclic and acyclic pipelines (#8707)

* add component checks

* pipeline should run deterministically

* add FIFOQueue

* add agent tests

* add order dependent tests

* run new tests

* remove code that is not needed

* test: intermediate from cycle outputs are available outside cycle

* add tests for component checks (Claude)

* adapt tests for component checks (o1 review)

* chore: format

* remove tests that aren't needed anymore

* add _calculate_priority tests

* revert accidental change in pyproject.toml

* test format conversion

* adapt to naming convention

* chore: proper docstrings and type hints for PQ

* format

* add more unit tests

* rm unneeded comments

* test input consumption

* lint

* fix: docstrings

* lint

* format

* format

* fix license header

* fix license header

* add component run tests

* fix: pass correct input format to tracing

* fix types

* format

* format

* types

* add defaults from Socket instead of signature

- otherwise components with dynamic inputs would fail

* fix test names

* still wait for optional inputs on greedy variadic sockets

- mirrors previous behavior

* fix format

* wip: warn for ambiguous running order

* wip: alternative warning

* fix license header

* make code more readable

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* Introduce content tracing to a behavioral test

* Fixing linting

* Remove debug print statements

* Fix tracer tests

* remove print

* test: test for component inputs

* test: remove testing for run order

* chore: update component checks from experimental

* chore: update pipeline and base from experimental

* refactor: remove unused method

* refactor: remove unused method

* refactor: outdated comment

* refactor: inputs state is updated as side effect

- to prepare for AsyncPipeline implementation

* format

* test: add file conversion test

* format

* fix: original implementation deepcopies outputs

* lint

* fix: from_dict was updated

* fix: format

* fix: test

* test: add test for thread safety

* remove unused imports

* format

* test: FIFOPriorityQueue

* chore: add release note

* fix: resolve merge conflict with mermaid changes

* fix: format

* fix: remove unused import

* refactor: rename to avoid accidental conflicts

* chore: remove unused inputs, add missing license header

* chore: extend release notes

* Update releasenotes/notes/fix-pipeline-run-2fefeafc705a6d91.yaml

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* fix: format

* fix: format

* Update release note

---------

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>
Co-authored-by: David S. Batista <dsbatista@gmail.com>
											
										
										
											2025-02-06 15:19:47 +01:00
+								        :param priority_queue: Priority queue of component names.
-												enhancement: Add attributes to PipelineRuntimeError (#9182)

* Start refactoring PipelineRuntimeError

* Slight change

* Fix test and remove test that had no asserts

* Change back to pipeline runtime error

* PR comments

* Add reno

* PR comments

* Update test
											
										
										
											2025-04-09 08:18:50 +02:00
+								        :raises PipelineRuntimeError:
 								            If the pipeline is blocked or has no valid entry point.
-												fix: pipeline run bugs in cyclic and acyclic pipelines (#8707)

* add component checks

* pipeline should run deterministically

* add FIFOQueue

* add agent tests

* add order dependent tests

* run new tests

* remove code that is not needed

* test: intermediate from cycle outputs are available outside cycle

* add tests for component checks (Claude)

* adapt tests for component checks (o1 review)

* chore: format

* remove tests that aren't needed anymore

* add _calculate_priority tests

* revert accidental change in pyproject.toml

* test format conversion

* adapt to naming convention

* chore: proper docstrings and type hints for PQ

* format

* add more unit tests

* rm unneeded comments

* test input consumption

* lint

* fix: docstrings

* lint

* format

* format

* fix license header

* fix license header

* add component run tests

* fix: pass correct input format to tracing

* fix types

* format

* format

* types

* add defaults from Socket instead of signature

- otherwise components with dynamic inputs would fail

* fix test names

* still wait for optional inputs on greedy variadic sockets

- mirrors previous behavior

* fix format

* wip: warn for ambiguous running order

* wip: alternative warning

* fix license header

* make code more readable

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* Introduce content tracing to a behavioral test

* Fixing linting

* Remove debug print statements

* Fix tracer tests

* remove print

* test: test for component inputs

* test: remove testing for run order

* chore: update component checks from experimental

* chore: update pipeline and base from experimental

* refactor: remove unused method

* refactor: remove unused method

* refactor: outdated comment

* refactor: inputs state is updated as side effect

- to prepare for AsyncPipeline implementation

* format

* test: add file conversion test

* format

* fix: original implementation deepcopies outputs

* lint

* fix: from_dict was updated

* fix: format

* fix: test

* test: add test for thread safety

* remove unused imports

* format

* test: FIFOPriorityQueue

* chore: add release note

* fix: resolve merge conflict with mermaid changes

* fix: format

* fix: remove unused import

* refactor: rename to avoid accidental conflicts

* chore: remove unused inputs, add missing license header

* chore: extend release notes

* Update releasenotes/notes/fix-pipeline-run-2fefeafc705a6d91.yaml

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* fix: format

* fix: format

* Update release note

---------

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>
Co-authored-by: David S. Batista <dsbatista@gmail.com>
											
										
										
											2025-02-06 15:19:47 +01:00
+								        """
 								        if len(priority_queue) == 0:
 								            return
-												refactor: Pipeline.run() (#8019)

* Move utility functions from _enqueue_next_runnable_component (#7895)

* Isolate logic to check if we're stuck in a loop

* Simplify for else

* Add missing return in docstring

* Emit warning when stuck in a loop

* Fix docstring

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>

* Add utility function to move Components in queues

* Add function to find next Component to run

* Comment update

* Add missing break in loop

* Make _add_missing_input_defaults less error prone and add tests

* Fix tests

* Update docstring

* Simplify enqueue logic

* Remove unused _enqueue_next_runnable_component function

* Add method to find Component with lazy variadic input or all inputs with defaults

* Simplify _find_next_runnable_lazy_variadic_or_default_component

* Remove unnecessary type ignore

* Split _dequeue_components_that_received_no_input into separate functions

* Fix linting

* Simplify variadic check when running Component

* Simplify code

* Reorganize functions used by Pipeline.run

* Rename variables used in Pipeline.run() for clarity

* Add comment clarifying last_waiting_queue and before_last_waiting_queue

* Add functions to easily update waiting_queue

---------

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>
											
										
										
											2024-07-12 10:35:23 +02:00
-												fix: pipeline run bugs in cyclic and acyclic pipelines (#8707)

* add component checks

* pipeline should run deterministically

* add FIFOQueue

* add agent tests

* add order dependent tests

* run new tests

* remove code that is not needed

* test: intermediate from cycle outputs are available outside cycle

* add tests for component checks (Claude)

* adapt tests for component checks (o1 review)

* chore: format

* remove tests that aren't needed anymore

* add _calculate_priority tests

* revert accidental change in pyproject.toml

* test format conversion

* adapt to naming convention

* chore: proper docstrings and type hints for PQ

* format

* add more unit tests

* rm unneeded comments

* test input consumption

* lint

* fix: docstrings

* lint

* format

* format

* fix license header

* fix license header

* add component run tests

* fix: pass correct input format to tracing

* fix types

* format

* format

* types

* add defaults from Socket instead of signature

- otherwise components with dynamic inputs would fail

* fix test names

* still wait for optional inputs on greedy variadic sockets

- mirrors previous behavior

* fix format

* wip: warn for ambiguous running order

* wip: alternative warning

* fix license header

* make code more readable

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* Introduce content tracing to a behavioral test

* Fixing linting

* Remove debug print statements

* Fix tracer tests

* remove print

* test: test for component inputs

* test: remove testing for run order

* chore: update component checks from experimental

* chore: update pipeline and base from experimental

* refactor: remove unused method

* refactor: remove unused method

* refactor: outdated comment

* refactor: inputs state is updated as side effect

- to prepare for AsyncPipeline implementation

* format

* test: add file conversion test

* format

* fix: original implementation deepcopies outputs

* lint

* fix: from_dict was updated

* fix: format

* fix: test

* test: add test for thread safety

* remove unused imports

* format

* test: FIFOPriorityQueue

* chore: add release note

* fix: resolve merge conflict with mermaid changes

* fix: format

* fix: remove unused import

* refactor: rename to avoid accidental conflicts

* chore: remove unused inputs, add missing license header

* chore: extend release notes

* Update releasenotes/notes/fix-pipeline-run-2fefeafc705a6d91.yaml

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* fix: format

* fix: format

* Update release note

---------

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>
Co-authored-by: David S. Batista <dsbatista@gmail.com>
											
										
										
											2025-02-06 15:19:47 +01:00
+								        candidate = priority_queue.peek()
 								        if candidate is not None and candidate[0] == ComponentPriority.BLOCKED:
-												enhancement: Add attributes to PipelineRuntimeError (#9182)

* Start refactoring PipelineRuntimeError

* Slight change

* Fix test and remove test that had no asserts

* Change back to pipeline runtime error

* PR comments

* Add reno

* PR comments

* Update test
											
										
										
											2025-04-09 08:18:50 +02:00
+								            raise PipelineComponentsBlockedError()
-												refactor: Pipeline.run() (#8019)

* Move utility functions from _enqueue_next_runnable_component (#7895)

* Isolate logic to check if we're stuck in a loop

* Simplify for else

* Add missing return in docstring

* Emit warning when stuck in a loop

* Fix docstring

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>

* Add utility function to move Components in queues

* Add function to find next Component to run

* Comment update

* Add missing break in loop

* Make _add_missing_input_defaults less error prone and add tests

* Fix tests

* Update docstring

* Simplify enqueue logic

* Remove unused _enqueue_next_runnable_component function

* Add method to find Component with lazy variadic input or all inputs with defaults

* Simplify _find_next_runnable_lazy_variadic_or_default_component

* Remove unnecessary type ignore

* Split _dequeue_components_that_received_no_input into separate functions

* Fix linting

* Simplify variadic check when running Component

* Simplify code

* Reorganize functions used by Pipeline.run

* Rename variables used in Pipeline.run() for clarity

* Add comment clarifying last_waiting_queue and before_last_waiting_queue

* Add functions to easily update waiting_queue

---------

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>
											
										
										
											2024-07-12 10:35:23 +02:00
-												feat: draw/show SuperComponents in detail, expand it and show it's internal components in the visualisation diagram (#9389)

* initial import

* small fixes

* adding tests

* adding tests

* refactoring merge graphs

* updating tests

* docstrings

* adding release notes

* adding SuperComponent name to extended components

* adding colours and legend to different super components

* adding missed docstring parameter

* fixing tests and type checking

* Update haystack/core/pipeline/base.py

Co-authored-by: Sebastian Husch Lee <10526848+sjrl@users.noreply.github.com>

* forcing keyword arguments for draw() and show()

* adding wrapper function and a deprecation warning

* adding pylint disable - this will be removed soon

* wip

* adding a decorator function to test if another function is being called with positional arguments

* adding a decorator function to test if another function is being called with positional arguments

---------

Co-authored-by: Sebastian Husch Lee <10526848+sjrl@users.noreply.github.com>
											
										
										
											2025-05-23 09:21:44 +01:00
+								    def _find_super_components(self) -> list[tuple[str, Component]]:
 								        """
 								        Find all SuperComponents in the pipeline.
 								        :returns:
 								            List of tuples containing (component_name, component_instance) representing a SuperComponent.
 								        """
 								        super_components = []
 								        for comp_name, comp in self.walk():
 								            # a SuperComponent has a "pipeline" attribute which itself a Pipeline instance
 								            # we don't test against SuperComponent because doing so always lead to circular imports
 								            if hasattr(comp, "pipeline") and isinstance(comp.pipeline, self.__class__):
 								                super_components.append((comp_name, comp))
 								        return super_components
 								    def _merge_super_component_pipelines(self) -> Tuple["networkx.MultiDiGraph", Dict[str, str]]:
 								        """
 								        Merge the internal pipelines of SuperComponents into the main pipeline graph structure.
 								        This creates a new networkx.MultiDiGraph containing all the components from both the main pipeline
 								        and all the internal SuperComponents' pipelines. The SuperComponents are removed and their internal
 								        components are connected to corresponding input and output sockets of the main pipeline.
 								        :returns:
 								            A tuple containing:
 								            - A networkx.MultiDiGraph with the expanded structure of the main pipeline and all it's SuperComponents
 								            - A dictionary mapping component names to boolean indicating that this component was part of a
 								              SuperComponent
 								            - A dictionary mapping component names to their SuperComponent name
 								        """
 								        merged_graph = self.graph.copy()
 								        super_component_mapping: Dict[str, str] = {}
 								        for super_name, super_component in self._find_super_components():
 								            internal_pipeline = super_component.pipeline  # type: ignore
 								            internal_graph = internal_pipeline.graph.copy()
 								            # Mark all components in the internal pipeline as being part of a SuperComponent
 								            for node in internal_graph.nodes():
 								                super_component_mapping[node] = super_name
 								            # edges connected to the super component
 								            incoming_edges = list(merged_graph.in_edges(super_name, data=True))
 								            outgoing_edges = list(merged_graph.out_edges(super_name, data=True))
 								            # merge the SuperComponent graph into the main graph and remove the super component node
 								            # since its components are now part of the main graph
 								            merged_graph = networkx.compose(merged_graph, internal_graph)
 								            merged_graph.remove_node(super_name)
 								            # get the entry and exit points of the SuperComponent internal pipeline
 								            entry_points = [n for n in internal_graph.nodes() if internal_graph.in_degree(n) == 0]
 								            exit_points = [n for n in internal_graph.nodes() if internal_graph.out_degree(n) == 0]
 								            # connect the incoming edges to entry points
 								            for sender, _, edge_data in incoming_edges:
 								                sender_socket = edge_data["from_socket"]
 								                for entry_point in entry_points:
 								                    # find a matching input socket in the entry point
 								                    entry_point_sockets = internal_graph.nodes[entry_point]["input_sockets"]
 								                    for socket_name, socket in entry_point_sockets.items():
 								                        if _types_are_compatible(sender_socket.type, socket.type, self._connection_type_validation):
 								                            merged_graph.add_edge(
 								                                sender,
 								                                entry_point,
 								                                key=f"{sender_socket.name}/{socket_name}",
 								                                conn_type=_type_name(sender_socket.type),
 								                                from_socket=sender_socket,
 								                                to_socket=socket,
 								                                mandatory=socket.is_mandatory,
 								                            )
 								            # connect outgoing edges from exit points
 								            for _, receiver, edge_data in outgoing_edges:
 								                receiver_socket = edge_data["to_socket"]
 								                for exit_point in exit_points:
 								                    # find a matching output socket in the exit point
 								                    exit_point_sockets = internal_graph.nodes[exit_point]["output_sockets"]
 								                    for socket_name, socket in exit_point_sockets.items():
 								                        if _types_are_compatible(socket.type, receiver_socket.type, self._connection_type_validation):
 								                            merged_graph.add_edge(
 								                                exit_point,
 								                                receiver,
 								                                key=f"{socket_name}/{receiver_socket.name}",
 								                                conn_type=_type_name(socket.type),
 								                                from_socket=socket,
 								                                to_socket=receiver_socket,
 								                                mandatory=receiver_socket.is_mandatory,
 								                            )
 								        return merged_graph, super_component_mapping
-												feat: Raise warning if the pipeline is unable to continue running due to a blocked component (#9569)

* First pass at alerting a user that the pipeline is blocked

* Change approach and add change to async pipeline

* Fix check in run_async

* Another fix

* Somehow also fixed the max_runs_per_component

* Align sync run and async run component

* Update output type of component outputs to Mapping to align with our protocol

* linting

* ruff

* Make it a core test

* Add reno

* Some refactoring

* Linting

* Fix mypy

* Converting to warning

* Small changes

* Update release note

* More cleanup

* PR comment

* PR comments
											
										
										
											2025-07-15 16:02:39 +02:00
+								    def _is_pipeline_possibly_blocked(self, current_pipeline_outputs: Dict[str, Any]) -> bool:
 								        """
 								        Heuristically determines whether the pipeline is possibly blocked based on its current outputs.
 								        This method checks if the pipeline has produced any of the expected outputs.
 								        - If no outputs are expected (i.e., `self.outputs()` returns an empty list), the method assumes the pipeline
 								        is not blocked.
 								        - If at least one expected output is present in `current_pipeline_outputs`, the pipeline is also assumed to not
 								        be blocked.
 								        - If none of the expected outputs are present, the pipeline is considered to be possibly blocked.
 								        Note: This check is not definitive—it is intended as a best-effort guess to detect a stalled or misconfigured
 								        pipeline when there are no more runnable components.
 								        :param current_pipeline_outputs: A dictionary of outputs currently produced by the pipeline.
 								        :returns:
 								            bool: True if the pipeline is possibly blocked (i.e., expected outputs are missing), False otherwise.
 								        """
 								        expected_outputs = self.outputs()
 								        return bool(expected_outputs) and not any(k in current_pipeline_outputs for k in expected_outputs)
-												refactor: Pipeline.run() (#8019)

* Move utility functions from _enqueue_next_runnable_component (#7895)

* Isolate logic to check if we're stuck in a loop

* Simplify for else

* Add missing return in docstring

* Emit warning when stuck in a loop

* Fix docstring

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>

* Add utility function to move Components in queues

* Add function to find next Component to run

* Comment update

* Add missing break in loop

* Make _add_missing_input_defaults less error prone and add tests

* Fix tests

* Update docstring

* Simplify enqueue logic

* Remove unused _enqueue_next_runnable_component function

* Add method to find Component with lazy variadic input or all inputs with defaults

* Simplify _find_next_runnable_lazy_variadic_or_default_component

* Remove unnecessary type ignore

* Split _dequeue_components_that_received_no_input into separate functions

* Fix linting

* Simplify variadic check when running Component

* Simplify code

* Reorganize functions used by Pipeline.run

* Rename variables used in Pipeline.run() for clarity

* Add comment clarifying last_waiting_queue and before_last_waiting_queue

* Add functions to easily update waiting_queue

---------

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>
											
										
										
											2024-07-12 10:35:23 +02:00
-												fix: pipeline run bugs in cyclic and acyclic pipelines (#8707)

* add component checks

* pipeline should run deterministically

* add FIFOQueue

* add agent tests

* add order dependent tests

* run new tests

* remove code that is not needed

* test: intermediate from cycle outputs are available outside cycle

* add tests for component checks (Claude)

* adapt tests for component checks (o1 review)

* chore: format

* remove tests that aren't needed anymore

* add _calculate_priority tests

* revert accidental change in pyproject.toml

* test format conversion

* adapt to naming convention

* chore: proper docstrings and type hints for PQ

* format

* add more unit tests

* rm unneeded comments

* test input consumption

* lint

* fix: docstrings

* lint

* format

* format

* fix license header

* fix license header

* add component run tests

* fix: pass correct input format to tracing

* fix types

* format

* format

* types

* add defaults from Socket instead of signature

- otherwise components with dynamic inputs would fail

* fix test names

* still wait for optional inputs on greedy variadic sockets

- mirrors previous behavior

* fix format

* wip: warn for ambiguous running order

* wip: alternative warning

* fix license header

* make code more readable

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* Introduce content tracing to a behavioral test

* Fixing linting

* Remove debug print statements

* Fix tracer tests

* remove print

* test: test for component inputs

* test: remove testing for run order

* chore: update component checks from experimental

* chore: update pipeline and base from experimental

* refactor: remove unused method

* refactor: remove unused method

* refactor: outdated comment

* refactor: inputs state is updated as side effect

- to prepare for AsyncPipeline implementation

* format

* test: add file conversion test

* format

* fix: original implementation deepcopies outputs

* lint

* fix: from_dict was updated

* fix: format

* fix: test

* test: add test for thread safety

* remove unused imports

* format

* test: FIFOPriorityQueue

* chore: add release note

* fix: resolve merge conflict with mermaid changes

* fix: format

* fix: remove unused import

* refactor: rename to avoid accidental conflicts

* chore: remove unused inputs, add missing license header

* chore: extend release notes

* Update releasenotes/notes/fix-pipeline-run-2fefeafc705a6d91.yaml

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* fix: format

* fix: format

* Update release note

---------

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>
Co-authored-by: David S. Batista <dsbatista@gmail.com>
											
										
										
											2025-02-06 15:19:47 +01:00
+								def _connections_status(
 								    sender_node: str, receiver_node: str, sender_sockets: List[OutputSocket], receiver_sockets: List[InputSocket]
-												feat: Add Type Validation parameter for Pipeline Connections (#8875)

* Starting to refactor type util tests to be more systematic

* refactoring

* Expand tests

* Update to type utils

* Add missing subclass check

* Expand and refactor tests, introduce type_validation Literal

* More test refactoring

* Test refactoring, adding type validation variable to pipeline base

* Update relaxed version of type checking to pass all newly added tests

* trim whitespace

* Add tests

* cleanup

* Updates docstrings

* Add reno

* docs

* Fix mypy and add docstrings

* Changes based on advice from Tobi

* Remove unused imports

* Doc strings

* Add connection type validation to to_dict and from_dict

* Update tests

* Fix test

* Also save connection_type_validation at global pipeline level

* Fix tests

* Remove connection type validation from the connect level, only keep at pipeline level

* Formatting

* Fix tests

* formatting
											
										
										
											2025-03-03 16:00:22 +01:00
+								) -> str:
-												refactor: Pipeline.run() (#8019)

* Move utility functions from _enqueue_next_runnable_component (#7895)

* Isolate logic to check if we're stuck in a loop

* Simplify for else

* Add missing return in docstring

* Emit warning when stuck in a loop

* Fix docstring

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>

* Add utility function to move Components in queues

* Add function to find next Component to run

* Comment update

* Add missing break in loop

* Make _add_missing_input_defaults less error prone and add tests

* Fix tests

* Update docstring

* Simplify enqueue logic

* Remove unused _enqueue_next_runnable_component function

* Add method to find Component with lazy variadic input or all inputs with defaults

* Simplify _find_next_runnable_lazy_variadic_or_default_component

* Remove unnecessary type ignore

* Split _dequeue_components_that_received_no_input into separate functions

* Fix linting

* Simplify variadic check when running Component

* Simplify code

* Reorganize functions used by Pipeline.run

* Rename variables used in Pipeline.run() for clarity

* Add comment clarifying last_waiting_queue and before_last_waiting_queue

* Add functions to easily update waiting_queue

---------

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>
											
										
										
											2024-07-12 10:35:23 +02:00
+								    """
-												fix: pipeline run bugs in cyclic and acyclic pipelines (#8707)

* add component checks

* pipeline should run deterministically

* add FIFOQueue

* add agent tests

* add order dependent tests

* run new tests

* remove code that is not needed

* test: intermediate from cycle outputs are available outside cycle

* add tests for component checks (Claude)

* adapt tests for component checks (o1 review)

* chore: format

* remove tests that aren't needed anymore

* add _calculate_priority tests

* revert accidental change in pyproject.toml

* test format conversion

* adapt to naming convention

* chore: proper docstrings and type hints for PQ

* format

* add more unit tests

* rm unneeded comments

* test input consumption

* lint

* fix: docstrings

* lint

* format

* format

* fix license header

* fix license header

* add component run tests

* fix: pass correct input format to tracing

* fix types

* format

* format

* types

* add defaults from Socket instead of signature

- otherwise components with dynamic inputs would fail

* fix test names

* still wait for optional inputs on greedy variadic sockets

- mirrors previous behavior

* fix format

* wip: warn for ambiguous running order

* wip: alternative warning

* fix license header

* make code more readable

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* Introduce content tracing to a behavioral test

* Fixing linting

* Remove debug print statements

* Fix tracer tests

* remove print

* test: test for component inputs

* test: remove testing for run order

* chore: update component checks from experimental

* chore: update pipeline and base from experimental

* refactor: remove unused method

* refactor: remove unused method

* refactor: outdated comment

* refactor: inputs state is updated as side effect

- to prepare for AsyncPipeline implementation

* format

* test: add file conversion test

* format

* fix: original implementation deepcopies outputs

* lint

* fix: from_dict was updated

* fix: format

* fix: test

* test: add test for thread safety

* remove unused imports

* format

* test: FIFOPriorityQueue

* chore: add release note

* fix: resolve merge conflict with mermaid changes

* fix: format

* fix: remove unused import

* refactor: rename to avoid accidental conflicts

* chore: remove unused inputs, add missing license header

* chore: extend release notes

* Update releasenotes/notes/fix-pipeline-run-2fefeafc705a6d91.yaml

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* fix: format

* fix: format

* Update release note

---------

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>
Co-authored-by: David S. Batista <dsbatista@gmail.com>
											
										
										
											2025-02-06 15:19:47 +01:00
+								    Lists the status of the sockets, for error messages.
-												refactor: Pipeline.run() (#8019)

* Move utility functions from _enqueue_next_runnable_component (#7895)

* Isolate logic to check if we're stuck in a loop

* Simplify for else

* Add missing return in docstring

* Emit warning when stuck in a loop

* Fix docstring

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>

* Add utility function to move Components in queues

* Add function to find next Component to run

* Comment update

* Add missing break in loop

* Make _add_missing_input_defaults less error prone and add tests

* Fix tests

* Update docstring

* Simplify enqueue logic

* Remove unused _enqueue_next_runnable_component function

* Add method to find Component with lazy variadic input or all inputs with defaults

* Simplify _find_next_runnable_lazy_variadic_or_default_component

* Remove unnecessary type ignore

* Split _dequeue_components_that_received_no_input into separate functions

* Fix linting

* Simplify variadic check when running Component

* Simplify code

* Reorganize functions used by Pipeline.run

* Rename variables used in Pipeline.run() for clarity

* Add comment clarifying last_waiting_queue and before_last_waiting_queue

* Add functions to easily update waiting_queue

---------

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>
											
										
										
											2024-07-12 10:35:23 +02:00
+								    """
-												fix: pipeline run bugs in cyclic and acyclic pipelines (#8707)

* add component checks

* pipeline should run deterministically

* add FIFOQueue

* add agent tests

* add order dependent tests

* run new tests

* remove code that is not needed

* test: intermediate from cycle outputs are available outside cycle

* add tests for component checks (Claude)

* adapt tests for component checks (o1 review)

* chore: format

* remove tests that aren't needed anymore

* add _calculate_priority tests

* revert accidental change in pyproject.toml

* test format conversion

* adapt to naming convention

* chore: proper docstrings and type hints for PQ

* format

* add more unit tests

* rm unneeded comments

* test input consumption

* lint

* fix: docstrings

* lint

* format

* format

* fix license header

* fix license header

* add component run tests

* fix: pass correct input format to tracing

* fix types

* format

* format

* types

* add defaults from Socket instead of signature

- otherwise components with dynamic inputs would fail

* fix test names

* still wait for optional inputs on greedy variadic sockets

- mirrors previous behavior

* fix format

* wip: warn for ambiguous running order

* wip: alternative warning

* fix license header

* make code more readable

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* Introduce content tracing to a behavioral test

* Fixing linting

* Remove debug print statements

* Fix tracer tests

* remove print

* test: test for component inputs

* test: remove testing for run order

* chore: update component checks from experimental

* chore: update pipeline and base from experimental

* refactor: remove unused method

* refactor: remove unused method

* refactor: outdated comment

* refactor: inputs state is updated as side effect

- to prepare for AsyncPipeline implementation

* format

* test: add file conversion test

* format

* fix: original implementation deepcopies outputs

* lint

* fix: from_dict was updated

* fix: format

* fix: test

* test: add test for thread safety

* remove unused imports

* format

* test: FIFOPriorityQueue

* chore: add release note

* fix: resolve merge conflict with mermaid changes

* fix: format

* fix: remove unused import

* refactor: rename to avoid accidental conflicts

* chore: remove unused inputs, add missing license header

* chore: extend release notes

* Update releasenotes/notes/fix-pipeline-run-2fefeafc705a6d91.yaml

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* fix: format

* fix: format

* Update release note

---------

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>
Co-authored-by: David S. Batista <dsbatista@gmail.com>
											
										
										
											2025-02-06 15:19:47 +01:00
+								    sender_sockets_entries = []
 								    for sender_socket in sender_sockets:
 								        sender_sockets_entries.append(f" - {sender_socket.name}: {_type_name(sender_socket.type)}")
 								    sender_sockets_list = "\n".join(sender_sockets_entries)
-												refactor: Pipeline.run() (#8019)

* Move utility functions from _enqueue_next_runnable_component (#7895)

* Isolate logic to check if we're stuck in a loop

* Simplify for else

* Add missing return in docstring

* Emit warning when stuck in a loop

* Fix docstring

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>

* Add utility function to move Components in queues

* Add function to find next Component to run

* Comment update

* Add missing break in loop

* Make _add_missing_input_defaults less error prone and add tests

* Fix tests

* Update docstring

* Simplify enqueue logic

* Remove unused _enqueue_next_runnable_component function

* Add method to find Component with lazy variadic input or all inputs with defaults

* Simplify _find_next_runnable_lazy_variadic_or_default_component

* Remove unnecessary type ignore

* Split _dequeue_components_that_received_no_input into separate functions

* Fix linting

* Simplify variadic check when running Component

* Simplify code

* Reorganize functions used by Pipeline.run

* Rename variables used in Pipeline.run() for clarity

* Add comment clarifying last_waiting_queue and before_last_waiting_queue

* Add functions to easily update waiting_queue

---------

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>
											
										
										
											2024-07-12 10:35:23 +02:00
-												fix: pipeline run bugs in cyclic and acyclic pipelines (#8707)

* add component checks

* pipeline should run deterministically

* add FIFOQueue

* add agent tests

* add order dependent tests

* run new tests

* remove code that is not needed

* test: intermediate from cycle outputs are available outside cycle

* add tests for component checks (Claude)

* adapt tests for component checks (o1 review)

* chore: format

* remove tests that aren't needed anymore

* add _calculate_priority tests

* revert accidental change in pyproject.toml

* test format conversion

* adapt to naming convention

* chore: proper docstrings and type hints for PQ

* format

* add more unit tests

* rm unneeded comments

* test input consumption

* lint

* fix: docstrings

* lint

* format

* format

* fix license header

* fix license header

* add component run tests

* fix: pass correct input format to tracing

* fix types

* format

* format

* types

* add defaults from Socket instead of signature

- otherwise components with dynamic inputs would fail

* fix test names

* still wait for optional inputs on greedy variadic sockets

- mirrors previous behavior

* fix format

* wip: warn for ambiguous running order

* wip: alternative warning

* fix license header

* make code more readable

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* Introduce content tracing to a behavioral test

* Fixing linting

* Remove debug print statements

* Fix tracer tests

* remove print

* test: test for component inputs

* test: remove testing for run order

* chore: update component checks from experimental

* chore: update pipeline and base from experimental

* refactor: remove unused method

* refactor: remove unused method

* refactor: outdated comment

* refactor: inputs state is updated as side effect

- to prepare for AsyncPipeline implementation

* format

* test: add file conversion test

* format

* fix: original implementation deepcopies outputs

* lint

* fix: from_dict was updated

* fix: format

* fix: test

* test: add test for thread safety

* remove unused imports

* format

* test: FIFOPriorityQueue

* chore: add release note

* fix: resolve merge conflict with mermaid changes

* fix: format

* fix: remove unused import

* refactor: rename to avoid accidental conflicts

* chore: remove unused inputs, add missing license header

* chore: extend release notes

* Update releasenotes/notes/fix-pipeline-run-2fefeafc705a6d91.yaml

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* fix: format

* fix: format

* Update release note

---------

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>
Co-authored-by: David S. Batista <dsbatista@gmail.com>
											
										
										
											2025-02-06 15:19:47 +01:00
+								    receiver_sockets_entries = []
 								    for receiver_socket in receiver_sockets:
 								        if receiver_socket.senders:
 								            sender_status = f"sent by {','.join(receiver_socket.senders)}"
 								        else:
 								            sender_status = "available"
 								        receiver_sockets_entries.append(
 								            f" - {receiver_socket.name}: {_type_name(receiver_socket.type)} ({sender_status})"
 								        )
 								    receiver_sockets_list = "\n".join(receiver_sockets_entries)
-												refactor: Pipeline.run() (#8019)

* Move utility functions from _enqueue_next_runnable_component (#7895)

* Isolate logic to check if we're stuck in a loop

* Simplify for else

* Add missing return in docstring

* Emit warning when stuck in a loop

* Fix docstring

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>

* Add utility function to move Components in queues

* Add function to find next Component to run

* Comment update

* Add missing break in loop

* Make _add_missing_input_defaults less error prone and add tests

* Fix tests

* Update docstring

* Simplify enqueue logic

* Remove unused _enqueue_next_runnable_component function

* Add method to find Component with lazy variadic input or all inputs with defaults

* Simplify _find_next_runnable_lazy_variadic_or_default_component

* Remove unnecessary type ignore

* Split _dequeue_components_that_received_no_input into separate functions

* Fix linting

* Simplify variadic check when running Component

* Simplify code

* Reorganize functions used by Pipeline.run

* Rename variables used in Pipeline.run() for clarity

* Add comment clarifying last_waiting_queue and before_last_waiting_queue

* Add functions to easily update waiting_queue

---------

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>
											
										
										
											2024-07-12 10:35:23 +02:00
-												fix: pipeline run bugs in cyclic and acyclic pipelines (#8707)

* add component checks

* pipeline should run deterministically

* add FIFOQueue

* add agent tests

* add order dependent tests

* run new tests

* remove code that is not needed

* test: intermediate from cycle outputs are available outside cycle

* add tests for component checks (Claude)

* adapt tests for component checks (o1 review)

* chore: format

* remove tests that aren't needed anymore

* add _calculate_priority tests

* revert accidental change in pyproject.toml

* test format conversion

* adapt to naming convention

* chore: proper docstrings and type hints for PQ

* format

* add more unit tests

* rm unneeded comments

* test input consumption

* lint

* fix: docstrings

* lint

* format

* format

* fix license header

* fix license header

* add component run tests

* fix: pass correct input format to tracing

* fix types

* format

* format

* types

* add defaults from Socket instead of signature

- otherwise components with dynamic inputs would fail

* fix test names

* still wait for optional inputs on greedy variadic sockets

- mirrors previous behavior

* fix format

* wip: warn for ambiguous running order

* wip: alternative warning

* fix license header

* make code more readable

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* Introduce content tracing to a behavioral test

* Fixing linting

* Remove debug print statements

* Fix tracer tests

* remove print

* test: test for component inputs

* test: remove testing for run order

* chore: update component checks from experimental

* chore: update pipeline and base from experimental

* refactor: remove unused method

* refactor: remove unused method

* refactor: outdated comment

* refactor: inputs state is updated as side effect

- to prepare for AsyncPipeline implementation

* format

* test: add file conversion test

* format

* fix: original implementation deepcopies outputs

* lint

* fix: from_dict was updated

* fix: format

* fix: test

* test: add test for thread safety

* remove unused imports

* format

* test: FIFOPriorityQueue

* chore: add release note

* fix: resolve merge conflict with mermaid changes

* fix: format

* fix: remove unused import

* refactor: rename to avoid accidental conflicts

* chore: remove unused inputs, add missing license header

* chore: extend release notes

* Update releasenotes/notes/fix-pipeline-run-2fefeafc705a6d91.yaml

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* fix: format

* fix: format

* Update release note

---------

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>
Co-authored-by: David S. Batista <dsbatista@gmail.com>
											
										
										
											2025-02-06 15:19:47 +01:00
+								    return f"'{sender_node}':\n{sender_sockets_list}\n'{receiver_node}':\n{receiver_sockets_list}"
-												fix: only overwrite existing socket inputs when we provide a new value (#8940)

* fix: only overwrite existing socket inputs when we provide a new value

* chore: add release notes

* Apply suggestions from code review

---------

Co-authored-by: Julian Risch <julian.risch@deepset.ai>
											
										
										
											2025-02-27 10:13:41 +01:00
 								# Utility functions for writing to sockets
 								def _write_to_lazy_variadic_socket(
 								    inputs: Dict[str, Any], receiver_name: str, receiver_socket_name: str, component_name: str, value: Any
 								) -> None:
 								    """
 								    Write to a lazy variadic socket.
 								    Mutates inputs in place.
 								    """
 								    if not inputs[receiver_name].get(receiver_socket_name):
 								        inputs[receiver_name][receiver_socket_name] = []
 								    inputs[receiver_name][receiver_socket_name].append({"sender": component_name, "value": value})
 								def _write_to_standard_socket(
 								    inputs: Dict[str, Any], receiver_name: str, receiver_socket_name: str, component_name: str, value: Any
 								) -> None:
 								    """
 								    Write to a greedy variadic or non-variadic socket.
 								    Mutates inputs in place.
 								    """
 								    current_value = inputs[receiver_name].get(receiver_socket_name)
 								    # Only overwrite if there's no existing value, or we have a new value to provide
 								    if current_value is None or value is not _NO_OUTPUT_PRODUCED:
 								        inputs[receiver_name][receiver_socket_name] = [{"sender": component_name, "value": value}]