feat: ComponentTool - preserve docstrings from underlying pipeline components (#9403)

* ComponentTool - preserve docstrings from underlying pipeline components

* PR feedback, adjustments
This commit is contained in:
Vladimir Blagojevic 2025-05-20 13:11:49 +02:00 committed by GitHub
parent 6ad23f822f
commit f253db3e14
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 186 additions and 3 deletions

View File

@ -17,7 +17,7 @@ from haystack.core.serialization import (
from haystack.tools import Tool
from haystack.tools.errors import SchemaGenerationError
from haystack.tools.from_function import _remove_title_from_schema
from haystack.tools.parameters_schema_utils import _get_param_descriptions, _resolve_type
from haystack.tools.parameters_schema_utils import _get_component_param_descriptions, _resolve_type
from haystack.utils.callable_serialization import deserialize_callable, serialize_callable
logger = logging.getLogger(__name__)
@ -270,7 +270,7 @@ class ComponentTool(Tool):
:raises SchemaGenerationError: If schema generation fails
:returns: OpenAI tools schema for the component's run method parameters.
"""
component_run_description, param_descriptions = _get_param_descriptions(component.run)
component_run_description, param_descriptions = _get_component_param_descriptions(component)
# collect fields (types and defaults) and descriptions from function parameters
fields: Dict[str, Any] = {}

View File

@ -47,6 +47,68 @@ def _get_param_descriptions(method: Callable) -> Tuple[str, Dict[str, str]]:
return parsed_doc.short_description or "", param_descriptions
def _get_component_param_descriptions(component: Any) -> Tuple[str, Dict[str, str]]:
"""
Get parameter descriptions from a component, handling both regular Components and SuperComponents.
For regular components, this extracts descriptions from the run method's docstring.
For SuperComponents, this extracts descriptions from the underlying pipeline components.
:param component: The component to extract parameter descriptions from
:returns: A tuple of (short_description, param_descriptions)
"""
from haystack.core.super_component.super_component import _SuperComponent
# Get descriptions from the component's run method
short_desc, param_descriptions = _get_param_descriptions(component.run)
# If it's a SuperComponent, enhance the descriptions from the original components
if isinstance(component, _SuperComponent):
# Collect descriptions from components in the pipeline
component_descriptions = []
processed_components = set()
# First gather descriptions from all components that have inputs mapped
for super_param_name, pipeline_paths in component.input_mapping.items():
# Collect descriptions from all mapped components
descriptions = []
for path in pipeline_paths:
try:
# Get the component and socket this input is mapped fromq
comp_name, socket_name = component._split_component_path(path)
pipeline_component = component.pipeline.get_component(comp_name)
# Get run method descriptions for this component
run_desc, run_param_descriptions = _get_param_descriptions(pipeline_component.run)
# Don't add the same component description multiple times
if comp_name not in processed_components:
processed_components.add(comp_name)
if run_desc:
component_descriptions.append(f"'{comp_name}': {run_desc}")
# Add parameter description if available
if input_param_mapping := run_param_descriptions.get(socket_name):
descriptions.append(f"Provided to the '{comp_name}' component as: '{input_param_mapping}'")
except Exception as e:
logger.debug(f"Error extracting description for {super_param_name} from {path}: {str(e)}")
# We don't only handle a one to one description mapping of input parameters, but a one to many mapping.
# i.e. for a combined_input parameter description:
# super_comp = SuperComponent(
# pipeline=pipeline,
# input_mapping={"combined_input": ["comp_a.query", "comp_b.text"]},
# )
if descriptions:
param_descriptions[super_param_name] = ", and ".join(descriptions) + "."
# We also create a combined description for the SuperComponent based on its components
if component_descriptions:
short_desc = f"A component that combines: {', '.join(component_descriptions)}"
return short_desc, param_descriptions
def _dataclass_to_pydantic_model(dc_type: Any) -> type[BaseModel]:
"""
Convert a Python dataclass to an equivalent Pydantic model.

View File

@ -0,0 +1,16 @@
---
enhancements:
- |
ComponentTool now preserves and combines docstrings from underlying pipeline components when wrapping a SuperComponent.
When a SuperComponent is used with ComponentTool, two key improvements are made:
1. Parameter descriptions are now extracted from the original components in the wrapped pipeline. When
a single input is mapped to multiple components, the parameter descriptions are combined from all
mapped components, providing comprehensive information about how the parameter is used throughout the pipeline.
2. The overall component description is now generated from the descriptions of all underlying components
instead of using the generic SuperComponent description. This helps LLMs understand what the component
actually does rather than just seeing "Runs the wrapped pipeline with the provided inputs."
These changes make SuperComponents much more useful with LLM function calling as the LLM will get detailed
information about both the component's purpose and its parameters.

View File

@ -14,7 +14,7 @@ import pytest
from openai.types.chat import ChatCompletion, ChatCompletionMessage
from openai.types.chat.chat_completion import Choice
from haystack import Pipeline, component
from haystack import Pipeline, component, SuperComponent
from haystack.components.builders import PromptBuilder
from haystack.components.generators.chat import OpenAIChatGenerator
from haystack.components.tools import ToolInvoker
@ -639,3 +639,108 @@ class TestToolComponentInPipelineWithOpenAI:
result = pipeline.run({"llm": {"messages": [ChatMessage.from_user(text="Hello")], "tools": [tool]}})
assert result["llm"]["replies"][0].text == "A response from the model"
def test_component_tool_with_super_component_docstrings(self, monkeypatch):
"""Test that ComponentTool preserves docstrings from underlying pipeline components in SuperComponents."""
@component
class AnnotatedComponent:
"""An annotated component with descriptive parameter docstrings."""
@component.output_types(result=str)
def run(self, text: str, number: int = 42):
"""Process inputs and return result.
:param text: A detailed description of the text parameter that should be preserved
:param number: A detailed description of the number parameter that should be preserved
"""
return {"result": f"Processed: {text} and {number}"}
# Create a pipeline with the annotated component
pipeline = Pipeline()
pipeline.add_component("processor", AnnotatedComponent())
# Create SuperComponent with mapping
super_comp = SuperComponent(
pipeline=pipeline,
input_mapping={"input_text": ["processor.text"], "input_number": ["processor.number"]},
output_mapping={"processor.result": "processed_result"},
)
# Create ComponentTool from SuperComponent
tool = ComponentTool(component=super_comp, name="text_processor")
# Verify that schema includes the docstrings from the original component
assert tool.parameters == {
"type": "object",
"description": "A component that combines: 'processor': Process inputs and return result.",
"properties": {
"input_text": {
"type": "string",
"description": "Provided to the 'processor' component as: 'A detailed description of the text parameter that should be preserved'.",
},
"input_number": {
"type": "integer",
"description": "Provided to the 'processor' component as: 'A detailed description of the number parameter that should be preserved'.",
},
},
"required": ["input_text"],
}
# Test the tool functionality works
result = tool.invoke(input_text="Hello", input_number=42)
assert result["processed_result"] == "Processed: Hello and 42"
def test_component_tool_with_multiple_mapped_docstrings(self):
"""Test that ComponentTool combines docstrings from multiple components when a single input maps to multiple components."""
@component
class ComponentA:
"""Component A with descriptive docstrings."""
@component.output_types(output_a=str)
def run(self, query: str):
"""Process query in component A.
:param query: The query string for component A
"""
return {"output_a": f"A processed: {query}"}
@component
class ComponentB:
"""Component B with descriptive docstrings."""
@component.output_types(output_b=str)
def run(self, text: str):
"""Process text in component B.
:param text: Text to process in component B
"""
return {"output_b": f"B processed: {text}"}
# Create a pipeline with both components
pipeline = Pipeline()
pipeline.add_component("comp_a", ComponentA())
pipeline.add_component("comp_b", ComponentB())
# Create SuperComponent with a single input mapped to both components
super_comp = SuperComponent(
pipeline=pipeline, input_mapping={"combined_input": ["comp_a.query", "comp_b.text"]}
)
# Create ComponentTool from SuperComponent
tool = ComponentTool(component=super_comp, name="combined_processor")
# Verify that schema includes combined docstrings from both components
assert tool.parameters == {
"type": "object",
"description": "A component that combines: 'comp_a': Process query in component A., 'comp_b': Process text in component B.",
"properties": {
"combined_input": {
"type": "string",
"description": "Provided to the 'comp_a' component as: 'The query string for component A', and Provided to the 'comp_b' component as: 'Text to process in component B'.",
}
},
"required": ["combined_input"],
}
# Test the tool functionality works
result = tool.invoke(combined_input="test input")
assert result["output_a"] == "A processed: test input"
assert result["output_b"] == "B processed: test input"