feat: Adding StringJoiner (#8357)

* Adding StringJoiner

* Release notes

* Remove typing

* Remove unused import

* Try to fix header

* Fix one test

* Add to docs, move test to behavioral pipeline test

* Undo changes

* Fix test

* Update haystack/components/joiners/string_joiner.py

Co-authored-by: Stefano Fiorucci <stefanofiorucci@gmail.com>

* Update haystack/components/joiners/string_joiner.py

Co-authored-by: Stefano Fiorucci <stefanofiorucci@gmail.com>

* Provide usage example

* Apply suggestions from code review

Co-authored-by: Stefano Fiorucci <stefanofiorucci@gmail.com>

---------

Co-authored-by: Stefano Fiorucci <stefanofiorucci@gmail.com>
Co-authored-by: Silvano Cerza <3314350+silvanocerza@users.noreply.github.com>
This commit is contained in:
Sebastian Husch Lee 2024-10-30 16:03:41 +01:00 committed by GitHub
parent 3e30b1db7e
commit 294a67e426
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 135 additions and 4 deletions

View File

@ -24,7 +24,7 @@ pip install haystack-ai
Install from the `main` branch to try the newest features:
```sh
pip install git+https://github.com/deepset-ai/haystack.git@main
pip install git+https://github.com/deepset-ai/haystack.git@main
```
Haystack supports multiple installation methods including Docker images. For a comprehensive guide please refer

View File

@ -1,7 +1,7 @@
loaders:
- type: haystack_pydoc_tools.loaders.CustomPythonLoader
search_path: [../../../haystack/components/joiners]
modules: ["document_joiner", "branch", "answer_joiner"]
modules: ["document_joiner", "branch", "answer_joiner", "string_joiner"]
ignore_when_discovered: ["__init__"]
processors:
- type: filter

View File

@ -5,5 +5,6 @@
from .answer_joiner import AnswerJoiner
from .branch import BranchJoiner
from .document_joiner import DocumentJoiner
from .string_joiner import StringJoiner
__all__ = ["DocumentJoiner", "BranchJoiner", "AnswerJoiner"]
__all__ = ["DocumentJoiner", "BranchJoiner", "AnswerJoiner", "StringJoiner"]

View File

@ -0,0 +1,59 @@
# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
#
# SPDX-License-Identifier: Apache-2.0
from typing import List
from haystack import component, logging
from haystack.core.component.types import Variadic
logger = logging.getLogger(__name__)
@component
class StringJoiner:
"""
Component to join strings from different components to a list of strings.
### Usage example
```python
from haystack.components.joiners import StringJoiner
from haystack.components.builders import PromptBuilder
from haystack.core.pipeline import Pipeline
from haystack.components.generators.chat import OpenAIChatGenerator
from haystack.dataclasses import ChatMessage
string_1 = "What's Natural Language Processing?"
string_2 = "What is life?"
pipeline = Pipeline()
pipeline.add_component("prompt_builder_1", PromptBuilder("Builder 1: {{query}}"))
pipeline.add_component("prompt_builder_2", PromptBuilder("Builder 2: {{query}}"))
pipeline.add_component("string_joiner", StringJoiner())
pipeline.connect("prompt_builder_1.prompt", "string_joiner.strings")
pipeline.connect("prompt_builder_2.prompt", "string_joiner.strings")
print(pipeline.run(data={"prompt_builder_1": {"query": string_1}, "prompt_builder_2": {"query": string_2}}))
>> {"string_joiner": {"strings": ["Builder 1: What's Natural Language Processing?", "Builder 2: What is life?"]}}
```
"""
@component.output_types(strings=List[str])
def run(self, strings: Variadic[str]):
"""
Joins strings into a list of strings
:param strings:
strings from different components
:returns:
A dictionary with the following keys:
- `strings`: Merged list of strings
"""
out_strings = list(strings)
return {"strings": out_strings}

View File

@ -0,0 +1,4 @@
---
features:
- |
Added component StringJoiner to join strings from different components to a list of strings.

View File

@ -0,0 +1,37 @@
# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
#
# SPDX-License-Identifier: Apache-2.0
from haystack.core.serialization import component_from_dict, component_to_dict
from haystack.components.joiners.string_joiner import StringJoiner
class TestStringJoiner:
def test_init(self):
joiner = StringJoiner()
assert isinstance(joiner, StringJoiner)
def test_to_dict(self):
joiner = StringJoiner()
data = component_to_dict(joiner, name="string_joiner")
assert data == {"type": "haystack.components.joiners.string_joiner.StringJoiner", "init_parameters": {}}
def test_from_dict(self):
data = {"type": "haystack.components.joiners.string_joiner.StringJoiner", "init_parameters": {}}
string_joiner = component_from_dict(StringJoiner, data=data, name="string_joiner")
assert isinstance(string_joiner, StringJoiner)
def test_empty_list(self):
joiner = StringJoiner()
result = joiner.run([])
assert result == {"strings": []}
def test_single_string(self):
joiner = StringJoiner()
result = joiner.run("a")
assert result == {"strings": ["a"]}
def test_two_strings(self):
joiner = StringJoiner()
result = joiner.run(["a", "b"])
assert result == {"strings": ["a", "b"]}

View File

@ -43,6 +43,7 @@ Feature: Pipeline running
| that is linear and a component in the middle receives optional input from other components and input from the user |
| that has a loop in the middle |
| that has variadic component that receives a conditional input |
| that has a string variadic component |
Scenario Outline: Running a bad Pipeline
Given a pipeline <kind>

View File

@ -13,7 +13,7 @@ from haystack.components.builders import PromptBuilder, AnswerBuilder, ChatPromp
from haystack.components.preprocessors import DocumentCleaner, DocumentSplitter
from haystack.components.retrievers.in_memory import InMemoryBM25Retriever
from haystack.document_stores.in_memory import InMemoryDocumentStore
from haystack.components.joiners import BranchJoiner, DocumentJoiner, AnswerJoiner
from haystack.components.joiners import BranchJoiner, DocumentJoiner, AnswerJoiner, StringJoiner
from haystack.testing.sample_components import (
Accumulate,
AddFixedValue,
@ -2195,3 +2195,32 @@ def that_has_variadic_component_that_receives_a_conditional_input():
],
),
]
@given("a pipeline that has a string variadic component", target_fixture="pipeline_data")
def that_has_a_string_variadic_component():
string_1 = "What's Natural Language Processing?"
string_2 = "What's is life?"
pipeline = Pipeline()
pipeline.add_component("prompt_builder_1", PromptBuilder("Builder 1: {{query}}"))
pipeline.add_component("prompt_builder_2", PromptBuilder("Builder 2: {{query}}"))
pipeline.add_component("string_joiner", StringJoiner())
pipeline.connect("prompt_builder_1.prompt", "string_joiner.strings")
pipeline.connect("prompt_builder_2.prompt", "string_joiner.strings")
return (
pipeline,
[
PipelineRunData(
inputs={"prompt_builder_1": {"query": string_1}, "prompt_builder_2": {"query": string_2}},
expected_outputs={
"string_joiner": {
"strings": ["Builder 1: What's Natural Language Processing?", "Builder 2: What's is life?"]
}
},
expected_run_order=["prompt_builder_1", "prompt_builder_2", "string_joiner"],
)
],
)