mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-06-26 22:00:13 +00:00
feat: ComponentTool - preserve docstrings from underlying pipeline components (#9403)
* ComponentTool - preserve docstrings from underlying pipeline components * PR feedback, adjustments
This commit is contained in:
parent
6ad23f822f
commit
f253db3e14
@ -17,7 +17,7 @@ from haystack.core.serialization import (
|
||||
from haystack.tools import Tool
|
||||
from haystack.tools.errors import SchemaGenerationError
|
||||
from haystack.tools.from_function import _remove_title_from_schema
|
||||
from haystack.tools.parameters_schema_utils import _get_param_descriptions, _resolve_type
|
||||
from haystack.tools.parameters_schema_utils import _get_component_param_descriptions, _resolve_type
|
||||
from haystack.utils.callable_serialization import deserialize_callable, serialize_callable
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@ -270,7 +270,7 @@ class ComponentTool(Tool):
|
||||
:raises SchemaGenerationError: If schema generation fails
|
||||
:returns: OpenAI tools schema for the component's run method parameters.
|
||||
"""
|
||||
component_run_description, param_descriptions = _get_param_descriptions(component.run)
|
||||
component_run_description, param_descriptions = _get_component_param_descriptions(component)
|
||||
|
||||
# collect fields (types and defaults) and descriptions from function parameters
|
||||
fields: Dict[str, Any] = {}
|
||||
|
@ -47,6 +47,68 @@ def _get_param_descriptions(method: Callable) -> Tuple[str, Dict[str, str]]:
|
||||
return parsed_doc.short_description or "", param_descriptions
|
||||
|
||||
|
||||
def _get_component_param_descriptions(component: Any) -> Tuple[str, Dict[str, str]]:
|
||||
"""
|
||||
Get parameter descriptions from a component, handling both regular Components and SuperComponents.
|
||||
|
||||
For regular components, this extracts descriptions from the run method's docstring.
|
||||
For SuperComponents, this extracts descriptions from the underlying pipeline components.
|
||||
|
||||
:param component: The component to extract parameter descriptions from
|
||||
:returns: A tuple of (short_description, param_descriptions)
|
||||
"""
|
||||
from haystack.core.super_component.super_component import _SuperComponent
|
||||
|
||||
# Get descriptions from the component's run method
|
||||
short_desc, param_descriptions = _get_param_descriptions(component.run)
|
||||
|
||||
# If it's a SuperComponent, enhance the descriptions from the original components
|
||||
if isinstance(component, _SuperComponent):
|
||||
# Collect descriptions from components in the pipeline
|
||||
component_descriptions = []
|
||||
processed_components = set()
|
||||
|
||||
# First gather descriptions from all components that have inputs mapped
|
||||
for super_param_name, pipeline_paths in component.input_mapping.items():
|
||||
# Collect descriptions from all mapped components
|
||||
descriptions = []
|
||||
for path in pipeline_paths:
|
||||
try:
|
||||
# Get the component and socket this input is mapped fromq
|
||||
comp_name, socket_name = component._split_component_path(path)
|
||||
pipeline_component = component.pipeline.get_component(comp_name)
|
||||
|
||||
# Get run method descriptions for this component
|
||||
run_desc, run_param_descriptions = _get_param_descriptions(pipeline_component.run)
|
||||
|
||||
# Don't add the same component description multiple times
|
||||
if comp_name not in processed_components:
|
||||
processed_components.add(comp_name)
|
||||
if run_desc:
|
||||
component_descriptions.append(f"'{comp_name}': {run_desc}")
|
||||
|
||||
# Add parameter description if available
|
||||
if input_param_mapping := run_param_descriptions.get(socket_name):
|
||||
descriptions.append(f"Provided to the '{comp_name}' component as: '{input_param_mapping}'")
|
||||
except Exception as e:
|
||||
logger.debug(f"Error extracting description for {super_param_name} from {path}: {str(e)}")
|
||||
|
||||
# We don't only handle a one to one description mapping of input parameters, but a one to many mapping.
|
||||
# i.e. for a combined_input parameter description:
|
||||
# super_comp = SuperComponent(
|
||||
# pipeline=pipeline,
|
||||
# input_mapping={"combined_input": ["comp_a.query", "comp_b.text"]},
|
||||
# )
|
||||
if descriptions:
|
||||
param_descriptions[super_param_name] = ", and ".join(descriptions) + "."
|
||||
|
||||
# We also create a combined description for the SuperComponent based on its components
|
||||
if component_descriptions:
|
||||
short_desc = f"A component that combines: {', '.join(component_descriptions)}"
|
||||
|
||||
return short_desc, param_descriptions
|
||||
|
||||
|
||||
def _dataclass_to_pydantic_model(dc_type: Any) -> type[BaseModel]:
|
||||
"""
|
||||
Convert a Python dataclass to an equivalent Pydantic model.
|
||||
|
@ -0,0 +1,16 @@
|
||||
---
|
||||
enhancements:
|
||||
- |
|
||||
ComponentTool now preserves and combines docstrings from underlying pipeline components when wrapping a SuperComponent.
|
||||
When a SuperComponent is used with ComponentTool, two key improvements are made:
|
||||
|
||||
1. Parameter descriptions are now extracted from the original components in the wrapped pipeline. When
|
||||
a single input is mapped to multiple components, the parameter descriptions are combined from all
|
||||
mapped components, providing comprehensive information about how the parameter is used throughout the pipeline.
|
||||
|
||||
2. The overall component description is now generated from the descriptions of all underlying components
|
||||
instead of using the generic SuperComponent description. This helps LLMs understand what the component
|
||||
actually does rather than just seeing "Runs the wrapped pipeline with the provided inputs."
|
||||
|
||||
These changes make SuperComponents much more useful with LLM function calling as the LLM will get detailed
|
||||
information about both the component's purpose and its parameters.
|
@ -14,7 +14,7 @@ import pytest
|
||||
from openai.types.chat import ChatCompletion, ChatCompletionMessage
|
||||
from openai.types.chat.chat_completion import Choice
|
||||
|
||||
from haystack import Pipeline, component
|
||||
from haystack import Pipeline, component, SuperComponent
|
||||
from haystack.components.builders import PromptBuilder
|
||||
from haystack.components.generators.chat import OpenAIChatGenerator
|
||||
from haystack.components.tools import ToolInvoker
|
||||
@ -639,3 +639,108 @@ class TestToolComponentInPipelineWithOpenAI:
|
||||
result = pipeline.run({"llm": {"messages": [ChatMessage.from_user(text="Hello")], "tools": [tool]}})
|
||||
|
||||
assert result["llm"]["replies"][0].text == "A response from the model"
|
||||
|
||||
def test_component_tool_with_super_component_docstrings(self, monkeypatch):
|
||||
"""Test that ComponentTool preserves docstrings from underlying pipeline components in SuperComponents."""
|
||||
|
||||
@component
|
||||
class AnnotatedComponent:
|
||||
"""An annotated component with descriptive parameter docstrings."""
|
||||
|
||||
@component.output_types(result=str)
|
||||
def run(self, text: str, number: int = 42):
|
||||
"""Process inputs and return result.
|
||||
:param text: A detailed description of the text parameter that should be preserved
|
||||
:param number: A detailed description of the number parameter that should be preserved
|
||||
"""
|
||||
return {"result": f"Processed: {text} and {number}"}
|
||||
|
||||
# Create a pipeline with the annotated component
|
||||
pipeline = Pipeline()
|
||||
pipeline.add_component("processor", AnnotatedComponent())
|
||||
# Create SuperComponent with mapping
|
||||
super_comp = SuperComponent(
|
||||
pipeline=pipeline,
|
||||
input_mapping={"input_text": ["processor.text"], "input_number": ["processor.number"]},
|
||||
output_mapping={"processor.result": "processed_result"},
|
||||
)
|
||||
|
||||
# Create ComponentTool from SuperComponent
|
||||
tool = ComponentTool(component=super_comp, name="text_processor")
|
||||
|
||||
# Verify that schema includes the docstrings from the original component
|
||||
assert tool.parameters == {
|
||||
"type": "object",
|
||||
"description": "A component that combines: 'processor': Process inputs and return result.",
|
||||
"properties": {
|
||||
"input_text": {
|
||||
"type": "string",
|
||||
"description": "Provided to the 'processor' component as: 'A detailed description of the text parameter that should be preserved'.",
|
||||
},
|
||||
"input_number": {
|
||||
"type": "integer",
|
||||
"description": "Provided to the 'processor' component as: 'A detailed description of the number parameter that should be preserved'.",
|
||||
},
|
||||
},
|
||||
"required": ["input_text"],
|
||||
}
|
||||
|
||||
# Test the tool functionality works
|
||||
result = tool.invoke(input_text="Hello", input_number=42)
|
||||
assert result["processed_result"] == "Processed: Hello and 42"
|
||||
|
||||
def test_component_tool_with_multiple_mapped_docstrings(self):
|
||||
"""Test that ComponentTool combines docstrings from multiple components when a single input maps to multiple components."""
|
||||
|
||||
@component
|
||||
class ComponentA:
|
||||
"""Component A with descriptive docstrings."""
|
||||
|
||||
@component.output_types(output_a=str)
|
||||
def run(self, query: str):
|
||||
"""Process query in component A.
|
||||
:param query: The query string for component A
|
||||
"""
|
||||
return {"output_a": f"A processed: {query}"}
|
||||
|
||||
@component
|
||||
class ComponentB:
|
||||
"""Component B with descriptive docstrings."""
|
||||
|
||||
@component.output_types(output_b=str)
|
||||
def run(self, text: str):
|
||||
"""Process text in component B.
|
||||
:param text: Text to process in component B
|
||||
"""
|
||||
return {"output_b": f"B processed: {text}"}
|
||||
|
||||
# Create a pipeline with both components
|
||||
pipeline = Pipeline()
|
||||
pipeline.add_component("comp_a", ComponentA())
|
||||
pipeline.add_component("comp_b", ComponentB())
|
||||
|
||||
# Create SuperComponent with a single input mapped to both components
|
||||
super_comp = SuperComponent(
|
||||
pipeline=pipeline, input_mapping={"combined_input": ["comp_a.query", "comp_b.text"]}
|
||||
)
|
||||
|
||||
# Create ComponentTool from SuperComponent
|
||||
tool = ComponentTool(component=super_comp, name="combined_processor")
|
||||
|
||||
# Verify that schema includes combined docstrings from both components
|
||||
assert tool.parameters == {
|
||||
"type": "object",
|
||||
"description": "A component that combines: 'comp_a': Process query in component A., 'comp_b': Process text in component B.",
|
||||
"properties": {
|
||||
"combined_input": {
|
||||
"type": "string",
|
||||
"description": "Provided to the 'comp_a' component as: 'The query string for component A', and Provided to the 'comp_b' component as: 'Text to process in component B'.",
|
||||
}
|
||||
},
|
||||
"required": ["combined_input"],
|
||||
}
|
||||
|
||||
# Test the tool functionality works
|
||||
result = tool.invoke(combined_input="test input")
|
||||
assert result["output_a"] == "A processed: test input"
|
||||
assert result["output_b"] == "B processed: test input"
|
||||
|
Loading…
x
Reference in New Issue
Block a user