feat: ComponentTool - preserve docstrings from underlying pipeline components (#9403)

* ComponentTool - preserve docstrings from underlying pipeline components * PR feedback, adjustments
2025-11-24 22:16:34 +00:00 · 2025-05-20 13:11:49 +02:00 · 2025-05-20 13:11:49 +02:00 · f253db3e14
commit f253db3e14
parent 6ad23f822f
4 changed files with 186 additions and 3 deletions
--- a/haystack/tools/component_tool.py
+++ b/haystack/tools/component_tool.py
@ -17,7 +17,7 @@ from haystack.core.serialization import (
 from haystack.tools import Tool
 from haystack.tools.errors import SchemaGenerationError
 from haystack.tools.from_function import _remove_title_from_schema
-from haystack.tools.parameters_schema_utils import _get_param_descriptions, _resolve_type
+from haystack.tools.parameters_schema_utils import _get_component_param_descriptions, _resolve_type
 from haystack.utils.callable_serialization import deserialize_callable, serialize_callable

 logger = logging.getLogger(__name__)
@ -270,7 +270,7 @@ class ComponentTool(Tool):
        :raises SchemaGenerationError: If schema generation fails
        :returns: OpenAI tools schema for the component's run method parameters.
        """
-        component_run_description, param_descriptions = _get_param_descriptions(component.run)
+        component_run_description, param_descriptions = _get_component_param_descriptions(component)

        # collect fields (types and defaults) and descriptions from function parameters
        fields: Dict[str, Any] = {}
--- a/haystack/tools/parameters_schema_utils.py
+++ b/haystack/tools/parameters_schema_utils.py
@ -47,6 +47,68 @@ def _get_param_descriptions(method: Callable) -> Tuple[str, Dict[str, str]]:
    return parsed_doc.short_description or "", param_descriptions


+def _get_component_param_descriptions(component: Any) -> Tuple[str, Dict[str, str]]:
+    """
+    Get parameter descriptions from a component, handling both regular Components and SuperComponents.
+
+    For regular components, this extracts descriptions from the run method's docstring.
+    For SuperComponents, this extracts descriptions from the underlying pipeline components.
+
+    :param component: The component to extract parameter descriptions from
+    :returns: A tuple of (short_description, param_descriptions)
+    """
+    from haystack.core.super_component.super_component import _SuperComponent
+
+    # Get descriptions from the component's run method
+    short_desc, param_descriptions = _get_param_descriptions(component.run)
+
+    # If it's a SuperComponent, enhance the descriptions from the original components
+    if isinstance(component, _SuperComponent):
+        # Collect descriptions from components in the pipeline
+        component_descriptions = []
+        processed_components = set()
+
+        # First gather descriptions from all components that have inputs mapped
+        for super_param_name, pipeline_paths in component.input_mapping.items():
+            # Collect descriptions from all mapped components
+            descriptions = []
+            for path in pipeline_paths:
+                try:
+                    # Get the component and socket this input is mapped fromq
+                    comp_name, socket_name = component._split_component_path(path)
+                    pipeline_component = component.pipeline.get_component(comp_name)
+
+                    # Get run method descriptions for this component
+                    run_desc, run_param_descriptions = _get_param_descriptions(pipeline_component.run)
+
+                    # Don't add the same component description multiple times
+                    if comp_name not in processed_components:
+                        processed_components.add(comp_name)
+                        if run_desc:
+                            component_descriptions.append(f"'{comp_name}': {run_desc}")
+
+                    # Add parameter description if available
+                    if input_param_mapping := run_param_descriptions.get(socket_name):
+                        descriptions.append(f"Provided to the '{comp_name}' component as: '{input_param_mapping}'")
+                except Exception as e:
+                    logger.debug(f"Error extracting description for {super_param_name} from {path}: {str(e)}")
+
+            # We don't only handle a one to one description mapping of input parameters, but a one to many mapping.
+            # i.e. for a combined_input parameter description:
+            # super_comp = SuperComponent(
+            #   pipeline=pipeline,
+            #   input_mapping={"combined_input": ["comp_a.query", "comp_b.text"]},
+            # )
+            if descriptions:
+                param_descriptions[super_param_name] = ", and ".join(descriptions) + "."
+
+        # We also create a combined description for the SuperComponent based on its components
+        if component_descriptions:
+            short_desc = f"A component that combines: {', '.join(component_descriptions)}"
+
+    return short_desc, param_descriptions
+
+
 def _dataclass_to_pydantic_model(dc_type: Any) -> type[BaseModel]:
    """
    Convert a Python dataclass to an equivalent Pydantic model.
--- a/releasenotes/notes/preserve-docstrings-super-component-tools-1fd9eb8a73b5c312.yaml
+++ b/releasenotes/notes/preserve-docstrings-super-component-tools-1fd9eb8a73b5c312.yaml
@ -0,0 +1,16 @@
+---
+enhancements:
+  - |
+    ComponentTool now preserves and combines docstrings from underlying pipeline components when wrapping a SuperComponent.
+    When a SuperComponent is used with ComponentTool, two key improvements are made:
+
+    1. Parameter descriptions are now extracted from the original components in the wrapped pipeline. When
+       a single input is mapped to multiple components, the parameter descriptions are combined from all
+       mapped components, providing comprehensive information about how the parameter is used throughout the pipeline.
+
+    2. The overall component description is now generated from the descriptions of all underlying components
+       instead of using the generic SuperComponent description. This helps LLMs understand what the component
+       actually does rather than just seeing "Runs the wrapped pipeline with the provided inputs."
+
+    These changes make SuperComponents much more useful with LLM function calling as the LLM will get detailed
+    information about both the component's purpose and its parameters.
--- a/test/tools/test_component_tool.py
+++ b/test/tools/test_component_tool.py
@ -14,7 +14,7 @@ import pytest
 from openai.types.chat import ChatCompletion, ChatCompletionMessage
 from openai.types.chat.chat_completion import Choice

-from haystack import Pipeline, component
+from haystack import Pipeline, component, SuperComponent
 from haystack.components.builders import PromptBuilder
 from haystack.components.generators.chat import OpenAIChatGenerator
 from haystack.components.tools import ToolInvoker
@ -639,3 +639,108 @@ class TestToolComponentInPipelineWithOpenAI:
            result = pipeline.run({"llm": {"messages": [ChatMessage.from_user(text="Hello")], "tools": [tool]}})

        assert result["llm"]["replies"][0].text == "A response from the model"
+
+    def test_component_tool_with_super_component_docstrings(self, monkeypatch):
+        """Test that ComponentTool preserves docstrings from underlying pipeline components in SuperComponents."""
+
+        @component
+        class AnnotatedComponent:
+            """An annotated component with descriptive parameter docstrings."""
+
+            @component.output_types(result=str)
+            def run(self, text: str, number: int = 42):
+                """Process inputs and return result.
+                :param text: A detailed description of the text parameter that should be preserved
+                :param number: A detailed description of the number parameter that should be preserved
+                """
+                return {"result": f"Processed: {text} and {number}"}
+
+        # Create a pipeline with the annotated component
+        pipeline = Pipeline()
+        pipeline.add_component("processor", AnnotatedComponent())
+        # Create SuperComponent with mapping
+        super_comp = SuperComponent(
+            pipeline=pipeline,
+            input_mapping={"input_text": ["processor.text"], "input_number": ["processor.number"]},
+            output_mapping={"processor.result": "processed_result"},
+        )
+
+        # Create ComponentTool from SuperComponent
+        tool = ComponentTool(component=super_comp, name="text_processor")
+
+        # Verify that schema includes the docstrings from the original component
+        assert tool.parameters == {
+            "type": "object",
+            "description": "A component that combines: 'processor': Process inputs and return result.",
+            "properties": {
+                "input_text": {
+                    "type": "string",
+                    "description": "Provided to the 'processor' component as: 'A detailed description of the text parameter that should be preserved'.",
+                },
+                "input_number": {
+                    "type": "integer",
+                    "description": "Provided to the 'processor' component as: 'A detailed description of the number parameter that should be preserved'.",
+                },
+            },
+            "required": ["input_text"],
+        }
+
+        # Test the tool functionality works
+        result = tool.invoke(input_text="Hello", input_number=42)
+        assert result["processed_result"] == "Processed: Hello and 42"
+
+    def test_component_tool_with_multiple_mapped_docstrings(self):
+        """Test that ComponentTool combines docstrings from multiple components when a single input maps to multiple components."""
+
+        @component
+        class ComponentA:
+            """Component A with descriptive docstrings."""
+
+            @component.output_types(output_a=str)
+            def run(self, query: str):
+                """Process query in component A.
+                :param query: The query string for component A
+                """
+                return {"output_a": f"A processed: {query}"}
+
+        @component
+        class ComponentB:
+            """Component B with descriptive docstrings."""
+
+            @component.output_types(output_b=str)
+            def run(self, text: str):
+                """Process text in component B.
+                :param text: Text to process in component B
+                """
+                return {"output_b": f"B processed: {text}"}
+
+        # Create a pipeline with both components
+        pipeline = Pipeline()
+        pipeline.add_component("comp_a", ComponentA())
+        pipeline.add_component("comp_b", ComponentB())
+
+        # Create SuperComponent with a single input mapped to both components
+        super_comp = SuperComponent(
+            pipeline=pipeline, input_mapping={"combined_input": ["comp_a.query", "comp_b.text"]}
+        )
+
+        # Create ComponentTool from SuperComponent
+        tool = ComponentTool(component=super_comp, name="combined_processor")
+
+        # Verify that schema includes combined docstrings from both components
+        assert tool.parameters == {
+            "type": "object",
+            "description": "A component that combines: 'comp_a': Process query in component A., 'comp_b': Process text in component B.",
+            "properties": {
+                "combined_input": {
+                    "type": "string",
+                    "description": "Provided to the 'comp_a' component as: 'The query string for component A', and Provided to the 'comp_b' component as: 'Text to process in component B'.",
+                }
+            },
+            "required": ["combined_input"],
+        }
+
+        # Test the tool functionality works
+        result = tool.invoke(combined_input="test input")
+        assert result["output_a"] == "A processed: test input"
+        assert result["output_b"] == "B processed: test input"