diff --git a/haystack/tools/component_tool.py b/haystack/tools/component_tool.py index 0e35dc618..7cac568d4 100644 --- a/haystack/tools/component_tool.py +++ b/haystack/tools/component_tool.py @@ -17,7 +17,7 @@ from haystack.core.serialization import ( from haystack.tools import Tool from haystack.tools.errors import SchemaGenerationError from haystack.tools.from_function import _remove_title_from_schema -from haystack.tools.parameters_schema_utils import _get_param_descriptions, _resolve_type +from haystack.tools.parameters_schema_utils import _get_component_param_descriptions, _resolve_type from haystack.utils.callable_serialization import deserialize_callable, serialize_callable logger = logging.getLogger(__name__) @@ -270,7 +270,7 @@ class ComponentTool(Tool): :raises SchemaGenerationError: If schema generation fails :returns: OpenAI tools schema for the component's run method parameters. """ - component_run_description, param_descriptions = _get_param_descriptions(component.run) + component_run_description, param_descriptions = _get_component_param_descriptions(component) # collect fields (types and defaults) and descriptions from function parameters fields: Dict[str, Any] = {} diff --git a/haystack/tools/parameters_schema_utils.py b/haystack/tools/parameters_schema_utils.py index c322785a7..6758d1ce6 100644 --- a/haystack/tools/parameters_schema_utils.py +++ b/haystack/tools/parameters_schema_utils.py @@ -47,6 +47,68 @@ def _get_param_descriptions(method: Callable) -> Tuple[str, Dict[str, str]]: return parsed_doc.short_description or "", param_descriptions +def _get_component_param_descriptions(component: Any) -> Tuple[str, Dict[str, str]]: + """ + Get parameter descriptions from a component, handling both regular Components and SuperComponents. + + For regular components, this extracts descriptions from the run method's docstring. + For SuperComponents, this extracts descriptions from the underlying pipeline components. + + :param component: The component to extract parameter descriptions from + :returns: A tuple of (short_description, param_descriptions) + """ + from haystack.core.super_component.super_component import _SuperComponent + + # Get descriptions from the component's run method + short_desc, param_descriptions = _get_param_descriptions(component.run) + + # If it's a SuperComponent, enhance the descriptions from the original components + if isinstance(component, _SuperComponent): + # Collect descriptions from components in the pipeline + component_descriptions = [] + processed_components = set() + + # First gather descriptions from all components that have inputs mapped + for super_param_name, pipeline_paths in component.input_mapping.items(): + # Collect descriptions from all mapped components + descriptions = [] + for path in pipeline_paths: + try: + # Get the component and socket this input is mapped fromq + comp_name, socket_name = component._split_component_path(path) + pipeline_component = component.pipeline.get_component(comp_name) + + # Get run method descriptions for this component + run_desc, run_param_descriptions = _get_param_descriptions(pipeline_component.run) + + # Don't add the same component description multiple times + if comp_name not in processed_components: + processed_components.add(comp_name) + if run_desc: + component_descriptions.append(f"'{comp_name}': {run_desc}") + + # Add parameter description if available + if input_param_mapping := run_param_descriptions.get(socket_name): + descriptions.append(f"Provided to the '{comp_name}' component as: '{input_param_mapping}'") + except Exception as e: + logger.debug(f"Error extracting description for {super_param_name} from {path}: {str(e)}") + + # We don't only handle a one to one description mapping of input parameters, but a one to many mapping. + # i.e. for a combined_input parameter description: + # super_comp = SuperComponent( + # pipeline=pipeline, + # input_mapping={"combined_input": ["comp_a.query", "comp_b.text"]}, + # ) + if descriptions: + param_descriptions[super_param_name] = ", and ".join(descriptions) + "." + + # We also create a combined description for the SuperComponent based on its components + if component_descriptions: + short_desc = f"A component that combines: {', '.join(component_descriptions)}" + + return short_desc, param_descriptions + + def _dataclass_to_pydantic_model(dc_type: Any) -> type[BaseModel]: """ Convert a Python dataclass to an equivalent Pydantic model. diff --git a/releasenotes/notes/preserve-docstrings-super-component-tools-1fd9eb8a73b5c312.yaml b/releasenotes/notes/preserve-docstrings-super-component-tools-1fd9eb8a73b5c312.yaml new file mode 100644 index 000000000..854c8aa60 --- /dev/null +++ b/releasenotes/notes/preserve-docstrings-super-component-tools-1fd9eb8a73b5c312.yaml @@ -0,0 +1,16 @@ +--- +enhancements: + - | + ComponentTool now preserves and combines docstrings from underlying pipeline components when wrapping a SuperComponent. + When a SuperComponent is used with ComponentTool, two key improvements are made: + + 1. Parameter descriptions are now extracted from the original components in the wrapped pipeline. When + a single input is mapped to multiple components, the parameter descriptions are combined from all + mapped components, providing comprehensive information about how the parameter is used throughout the pipeline. + + 2. The overall component description is now generated from the descriptions of all underlying components + instead of using the generic SuperComponent description. This helps LLMs understand what the component + actually does rather than just seeing "Runs the wrapped pipeline with the provided inputs." + + These changes make SuperComponents much more useful with LLM function calling as the LLM will get detailed + information about both the component's purpose and its parameters. diff --git a/test/tools/test_component_tool.py b/test/tools/test_component_tool.py index 549200a66..f6975992a 100644 --- a/test/tools/test_component_tool.py +++ b/test/tools/test_component_tool.py @@ -14,7 +14,7 @@ import pytest from openai.types.chat import ChatCompletion, ChatCompletionMessage from openai.types.chat.chat_completion import Choice -from haystack import Pipeline, component +from haystack import Pipeline, component, SuperComponent from haystack.components.builders import PromptBuilder from haystack.components.generators.chat import OpenAIChatGenerator from haystack.components.tools import ToolInvoker @@ -639,3 +639,108 @@ class TestToolComponentInPipelineWithOpenAI: result = pipeline.run({"llm": {"messages": [ChatMessage.from_user(text="Hello")], "tools": [tool]}}) assert result["llm"]["replies"][0].text == "A response from the model" + + def test_component_tool_with_super_component_docstrings(self, monkeypatch): + """Test that ComponentTool preserves docstrings from underlying pipeline components in SuperComponents.""" + + @component + class AnnotatedComponent: + """An annotated component with descriptive parameter docstrings.""" + + @component.output_types(result=str) + def run(self, text: str, number: int = 42): + """Process inputs and return result. + :param text: A detailed description of the text parameter that should be preserved + :param number: A detailed description of the number parameter that should be preserved + """ + return {"result": f"Processed: {text} and {number}"} + + # Create a pipeline with the annotated component + pipeline = Pipeline() + pipeline.add_component("processor", AnnotatedComponent()) + # Create SuperComponent with mapping + super_comp = SuperComponent( + pipeline=pipeline, + input_mapping={"input_text": ["processor.text"], "input_number": ["processor.number"]}, + output_mapping={"processor.result": "processed_result"}, + ) + + # Create ComponentTool from SuperComponent + tool = ComponentTool(component=super_comp, name="text_processor") + + # Verify that schema includes the docstrings from the original component + assert tool.parameters == { + "type": "object", + "description": "A component that combines: 'processor': Process inputs and return result.", + "properties": { + "input_text": { + "type": "string", + "description": "Provided to the 'processor' component as: 'A detailed description of the text parameter that should be preserved'.", + }, + "input_number": { + "type": "integer", + "description": "Provided to the 'processor' component as: 'A detailed description of the number parameter that should be preserved'.", + }, + }, + "required": ["input_text"], + } + + # Test the tool functionality works + result = tool.invoke(input_text="Hello", input_number=42) + assert result["processed_result"] == "Processed: Hello and 42" + + def test_component_tool_with_multiple_mapped_docstrings(self): + """Test that ComponentTool combines docstrings from multiple components when a single input maps to multiple components.""" + + @component + class ComponentA: + """Component A with descriptive docstrings.""" + + @component.output_types(output_a=str) + def run(self, query: str): + """Process query in component A. + :param query: The query string for component A + """ + return {"output_a": f"A processed: {query}"} + + @component + class ComponentB: + """Component B with descriptive docstrings.""" + + @component.output_types(output_b=str) + def run(self, text: str): + """Process text in component B. + :param text: Text to process in component B + """ + return {"output_b": f"B processed: {text}"} + + # Create a pipeline with both components + pipeline = Pipeline() + pipeline.add_component("comp_a", ComponentA()) + pipeline.add_component("comp_b", ComponentB()) + + # Create SuperComponent with a single input mapped to both components + super_comp = SuperComponent( + pipeline=pipeline, input_mapping={"combined_input": ["comp_a.query", "comp_b.text"]} + ) + + # Create ComponentTool from SuperComponent + tool = ComponentTool(component=super_comp, name="combined_processor") + + # Verify that schema includes combined docstrings from both components + assert tool.parameters == { + "type": "object", + "description": "A component that combines: 'comp_a': Process query in component A., 'comp_b': Process text in component B.", + "properties": { + "combined_input": { + "type": "string", + "description": "Provided to the 'comp_a' component as: 'The query string for component A', and Provided to the 'comp_b' component as: 'Text to process in component B'.", + } + }, + "required": ["combined_input"], + } + + # Test the tool functionality works + result = tool.invoke(combined_input="test input") + assert result["output_a"] == "A processed: test input" + assert result["output_b"] == "B processed: test input"