mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-07-25 18:00:28 +00:00
1221 lines
39 KiB
Python
1221 lines
39 KiB
Python
import pytest
|
|
import logging
|
|
|
|
import haystack
|
|
from haystack import Pipeline, Document, Answer
|
|
from haystack.document_stores.memory import InMemoryDocumentStore
|
|
from haystack.nodes.other.shaper import Shaper
|
|
from haystack.nodes.retriever.sparse import BM25Retriever
|
|
|
|
|
|
@pytest.fixture
|
|
def mock_function(monkeypatch):
|
|
monkeypatch.setattr(
|
|
haystack.nodes.other.shaper, "REGISTERED_FUNCTIONS", {"test_function": lambda a, b: ([a] * len(b),)}
|
|
)
|
|
|
|
|
|
@pytest.fixture
|
|
def mock_function_two_outputs(monkeypatch):
|
|
monkeypatch.setattr(
|
|
haystack.nodes.other.shaper, "REGISTERED_FUNCTIONS", {"two_output_test_function": lambda a: (a, len(a))}
|
|
)
|
|
|
|
|
|
@pytest.mark.unit
|
|
def test_basic_invocation_only_inputs(mock_function):
|
|
shaper = Shaper(func="test_function", inputs={"a": "query", "b": "documents"}, outputs=["c"])
|
|
results, _ = shaper.run(query="test query", documents=["doesn't", "really", "matter"])
|
|
assert results["invocation_context"]["c"] == ["test query", "test query", "test query"]
|
|
|
|
|
|
@pytest.mark.unit
|
|
def test_multiple_outputs(mock_function_two_outputs):
|
|
shaper = Shaper(func="two_output_test_function", inputs={"a": "query"}, outputs=["c", "d"])
|
|
results, _ = shaper.run(query="test")
|
|
assert results["invocation_context"]["c"] == "test"
|
|
assert results["invocation_context"]["d"] == 4
|
|
|
|
|
|
@pytest.mark.unit
|
|
def test_multiple_outputs_error(mock_function_two_outputs, caplog):
|
|
shaper = Shaper(func="two_output_test_function", inputs={"a": "query"}, outputs=["c"])
|
|
with caplog.at_level(logging.WARNING):
|
|
results, _ = shaper.run(query="test")
|
|
assert "Only 1 output(s) will be stored." in caplog.text
|
|
|
|
|
|
@pytest.mark.unit
|
|
def test_basic_invocation_only_params(mock_function):
|
|
shaper = Shaper(func="test_function", params={"a": "A", "b": list(range(3))}, outputs=["c"])
|
|
results, _ = shaper.run()
|
|
assert results["invocation_context"]["c"] == ["A", "A", "A"]
|
|
|
|
|
|
@pytest.mark.unit
|
|
def test_basic_invocation_inputs_and_params(mock_function):
|
|
shaper = Shaper(func="test_function", inputs={"a": "query"}, params={"b": list(range(2))}, outputs=["c"])
|
|
results, _ = shaper.run(query="test query")
|
|
assert results["invocation_context"]["c"] == ["test query", "test query"]
|
|
|
|
|
|
@pytest.mark.unit
|
|
def test_basic_invocation_inputs_and_params_colliding(mock_function):
|
|
shaper = Shaper(
|
|
func="test_function", inputs={"a": "query"}, params={"a": "default value", "b": list(range(2))}, outputs=["c"]
|
|
)
|
|
results, _ = shaper.run(query="test query")
|
|
assert results["invocation_context"]["c"] == ["test query", "test query"]
|
|
|
|
|
|
@pytest.mark.unit
|
|
def test_basic_invocation_inputs_and_params_using_params_as_defaults(mock_function):
|
|
shaper = Shaper(
|
|
func="test_function", inputs={"a": "query"}, params={"a": "default", "b": list(range(2))}, outputs=["c"]
|
|
)
|
|
results, _ = shaper.run()
|
|
assert results["invocation_context"]["c"] == ["default", "default"]
|
|
|
|
|
|
@pytest.mark.unit
|
|
def test_missing_argument(mock_function):
|
|
shaper = Shaper(func="test_function", inputs={"b": "documents"}, outputs=["c"])
|
|
with pytest.raises(ValueError, match="Shaper couldn't apply the function to your inputs and parameters."):
|
|
shaper.run(query="test query", documents=["doesn't", "really", "matter"])
|
|
|
|
|
|
@pytest.mark.unit
|
|
def test_excess_argument(mock_function):
|
|
shaper = Shaper(
|
|
func="test_function", inputs={"a": "query", "b": "documents", "something_extra": "query"}, outputs=["c"]
|
|
)
|
|
with pytest.raises(ValueError, match="Shaper couldn't apply the function to your inputs and parameters."):
|
|
shaper.run(query="test query", documents=["doesn't", "really", "matter"])
|
|
|
|
|
|
@pytest.mark.unit
|
|
def test_value_not_in_invocation_context(mock_function):
|
|
shaper = Shaper(func="test_function", inputs={"a": "query", "b": "something_that_does_not_exist"}, outputs=["c"])
|
|
with pytest.raises(ValueError, match="Shaper couldn't apply the function to your inputs and parameters."):
|
|
shaper.run(query="test query", documents=["doesn't", "really", "matter"])
|
|
|
|
|
|
@pytest.mark.unit
|
|
def test_value_only_in_invocation_context(mock_function):
|
|
shaper = Shaper(func="test_function", inputs={"a": "query", "b": "invocation_context_specific"}, outputs=["c"])
|
|
results, _s = shaper.run(
|
|
query="test query", invocation_context={"invocation_context_specific": ["doesn't", "really", "matter"]}
|
|
)
|
|
assert results["invocation_context"]["c"] == ["test query", "test query", "test query"]
|
|
|
|
|
|
@pytest.mark.unit
|
|
def test_yaml(mock_function, tmp_path):
|
|
with open(tmp_path / "tmp_config.yml", "w") as tmp_file:
|
|
tmp_file.write(
|
|
f"""
|
|
version: ignore
|
|
components:
|
|
- name: shaper
|
|
type: Shaper
|
|
params:
|
|
func: test_function
|
|
inputs:
|
|
a: query
|
|
params:
|
|
b: [1, 1]
|
|
outputs:
|
|
- c
|
|
pipelines:
|
|
- name: query
|
|
nodes:
|
|
- name: shaper
|
|
inputs:
|
|
- Query
|
|
"""
|
|
)
|
|
pipeline = Pipeline.load_from_yaml(path=tmp_path / "tmp_config.yml")
|
|
result = pipeline.run(
|
|
query="test query", documents=[Document(content="first"), Document(content="second"), Document(content="third")]
|
|
)
|
|
assert result["invocation_context"]["c"] == ["test query", "test query"]
|
|
assert result["query"] == "test query"
|
|
assert result["documents"] == [Document(content="first"), Document(content="second"), Document(content="third")]
|
|
|
|
|
|
#
|
|
# rename
|
|
#
|
|
|
|
|
|
@pytest.mark.unit
|
|
def test_rename():
|
|
shaper = Shaper(func="rename", inputs={"value": "query"}, outputs=["questions"])
|
|
results, _ = shaper.run(query="test query")
|
|
assert results["invocation_context"]["questions"] == "test query"
|
|
|
|
|
|
@pytest.mark.unit
|
|
def test_rename_yaml(tmp_path):
|
|
with open(tmp_path / "tmp_config.yml", "w") as tmp_file:
|
|
tmp_file.write(
|
|
f"""
|
|
version: ignore
|
|
components:
|
|
- name: shaper
|
|
type: Shaper
|
|
params:
|
|
func: rename
|
|
inputs:
|
|
value: query
|
|
outputs:
|
|
- questions
|
|
pipelines:
|
|
- name: query
|
|
nodes:
|
|
- name: shaper
|
|
inputs:
|
|
- Query
|
|
"""
|
|
)
|
|
pipeline = Pipeline.load_from_yaml(path=tmp_path / "tmp_config.yml")
|
|
result = pipeline.run(query="test query")
|
|
assert result["invocation_context"]["query"] == "test query"
|
|
assert result["invocation_context"]["questions"] == "test query"
|
|
|
|
|
|
#
|
|
# value_to_list
|
|
#
|
|
|
|
|
|
@pytest.mark.unit
|
|
def test_value_to_list():
|
|
shaper = Shaper(func="value_to_list", inputs={"value": "query", "target_list": "documents"}, outputs=["questions"])
|
|
results, _ = shaper.run(query="test query", documents=["doesn't", "really", "matter"])
|
|
assert results["invocation_context"]["questions"] == ["test query", "test query", "test query"]
|
|
|
|
|
|
@pytest.mark.unit
|
|
def test_value_to_list_yaml(tmp_path):
|
|
with open(tmp_path / "tmp_config.yml", "w") as tmp_file:
|
|
tmp_file.write(
|
|
f"""
|
|
version: ignore
|
|
components:
|
|
- name: shaper
|
|
type: Shaper
|
|
params:
|
|
func: value_to_list
|
|
inputs:
|
|
value: query
|
|
target_list: documents
|
|
outputs:
|
|
- questions
|
|
pipelines:
|
|
- name: query
|
|
nodes:
|
|
- name: shaper
|
|
inputs:
|
|
- Query
|
|
"""
|
|
)
|
|
pipeline = Pipeline.load_from_yaml(path=tmp_path / "tmp_config.yml")
|
|
result = pipeline.run(
|
|
query="test query", documents=[Document(content="first"), Document(content="second"), Document(content="third")]
|
|
)
|
|
assert result["invocation_context"]["questions"] == ["test query", "test query", "test query"]
|
|
# Assert pipeline output is unaffected
|
|
assert result["query"] == "test query"
|
|
assert result["documents"] == [Document(content="first"), Document(content="second"), Document(content="third")]
|
|
|
|
|
|
#
|
|
# join_lists
|
|
#
|
|
|
|
|
|
@pytest.mark.unit
|
|
def test_join_lists():
|
|
shaper = Shaper(func="join_lists", params={"lists": [[1, 2, 3], [4, 5]]}, outputs=["list"])
|
|
results, _ = shaper.run()
|
|
assert results["invocation_context"]["list"] == [1, 2, 3, 4, 5]
|
|
|
|
|
|
@pytest.mark.unit
|
|
def test_join_lists_yaml(tmp_path):
|
|
with open(tmp_path / "tmp_config.yml", "w") as tmp_file:
|
|
tmp_file.write(
|
|
f"""
|
|
version: ignore
|
|
components:
|
|
- name: shaper
|
|
type: Shaper
|
|
params:
|
|
func: join_lists
|
|
inputs:
|
|
lists:
|
|
- documents
|
|
- file_paths
|
|
outputs:
|
|
- single_list
|
|
pipelines:
|
|
- name: query
|
|
nodes:
|
|
- name: shaper
|
|
inputs:
|
|
- Query
|
|
"""
|
|
)
|
|
pipeline = Pipeline.load_from_yaml(path=tmp_path / "tmp_config.yml")
|
|
result = pipeline.run(documents=["first", "second", "third"], file_paths=["file1.txt", "file2.txt"])
|
|
assert result["invocation_context"]["single_list"] == ["first", "second", "third", "file1.txt", "file2.txt"]
|
|
|
|
|
|
#
|
|
# join_strings
|
|
#
|
|
|
|
|
|
@pytest.mark.unit
|
|
def test_join_strings():
|
|
shaper = Shaper(
|
|
func="join_strings", params={"strings": ["first", "second"], "delimiter": " | "}, outputs=["single_string"]
|
|
)
|
|
results, _ = shaper.run()
|
|
assert results["invocation_context"]["single_string"] == "first | second"
|
|
|
|
|
|
@pytest.mark.unit
|
|
def test_join_strings_default_delimiter():
|
|
shaper = Shaper(func="join_strings", params={"strings": ["first", "second"]}, outputs=["single_string"])
|
|
results, _ = shaper.run()
|
|
assert results["invocation_context"]["single_string"] == "first second"
|
|
|
|
|
|
@pytest.mark.unit
|
|
def test_join_strings_yaml(tmp_path):
|
|
with open(tmp_path / "tmp_config.yml", "w") as tmp_file:
|
|
tmp_file.write(
|
|
f"""
|
|
version: ignore
|
|
components:
|
|
- name: shaper
|
|
type: Shaper
|
|
params:
|
|
func: join_strings
|
|
inputs:
|
|
strings: documents
|
|
params:
|
|
delimiter: ' - '
|
|
outputs:
|
|
- single_string
|
|
pipelines:
|
|
- name: query
|
|
nodes:
|
|
- name: shaper
|
|
inputs:
|
|
- Query
|
|
"""
|
|
)
|
|
pipeline = Pipeline.load_from_yaml(path=tmp_path / "tmp_config.yml")
|
|
result = pipeline.run(documents=["first", "second", "third"])
|
|
assert result["invocation_context"]["single_string"] == "first - second - third"
|
|
|
|
|
|
@pytest.mark.unit
|
|
def test_join_strings_default_delimiter_yaml(tmp_path):
|
|
with open(tmp_path / "tmp_config.yml", "w") as tmp_file:
|
|
tmp_file.write(
|
|
f"""
|
|
version: ignore
|
|
components:
|
|
- name: shaper
|
|
type: Shaper
|
|
params:
|
|
func: join_strings
|
|
inputs:
|
|
strings: documents
|
|
outputs:
|
|
- single_string
|
|
pipelines:
|
|
- name: query
|
|
nodes:
|
|
- name: shaper
|
|
inputs:
|
|
- Query
|
|
"""
|
|
)
|
|
pipeline = Pipeline.load_from_yaml(path=tmp_path / "tmp_config.yml")
|
|
result = pipeline.run(documents=["first", "second", "third"])
|
|
assert result["invocation_context"]["single_string"] == "first second third"
|
|
|
|
|
|
#
|
|
# join_documents
|
|
#
|
|
|
|
|
|
@pytest.mark.unit
|
|
def test_join_documents():
|
|
shaper = Shaper(
|
|
func="join_documents", inputs={"documents": "documents"}, params={"delimiter": " | "}, outputs=["documents"]
|
|
)
|
|
results, _ = shaper.run(
|
|
documents=[Document(content="first"), Document(content="second"), Document(content="third")]
|
|
)
|
|
assert results["invocation_context"]["documents"] == [Document(content="first | second | third")]
|
|
assert results["documents"] == [Document(content="first | second | third")]
|
|
|
|
|
|
def test_join_documents_without_publish_outputs():
|
|
shaper = Shaper(
|
|
func="join_documents",
|
|
inputs={"documents": "documents"},
|
|
params={"delimiter": " | "},
|
|
outputs=["documents"],
|
|
publish_outputs=False,
|
|
)
|
|
results, _ = shaper.run(
|
|
documents=[Document(content="first"), Document(content="second"), Document(content="third")]
|
|
)
|
|
assert results["invocation_context"]["documents"] == [Document(content="first | second | third")]
|
|
assert "documents" not in results
|
|
|
|
|
|
def test_join_documents_with_publish_outputs_as_list():
|
|
shaper = Shaper(
|
|
func="join_documents",
|
|
inputs={"documents": "documents"},
|
|
params={"delimiter": " | "},
|
|
outputs=["documents"],
|
|
publish_outputs=["documents"],
|
|
)
|
|
results, _ = shaper.run(
|
|
documents=[Document(content="first"), Document(content="second"), Document(content="third")]
|
|
)
|
|
assert results["invocation_context"]["documents"] == [Document(content="first | second | third")]
|
|
assert results["documents"] == [Document(content="first | second | third")]
|
|
|
|
|
|
@pytest.mark.unit
|
|
def test_join_documents_default_delimiter():
|
|
shaper = Shaper(func="join_documents", inputs={"documents": "documents"}, outputs=["documents"])
|
|
results, _ = shaper.run(
|
|
documents=[Document(content="first"), Document(content="second"), Document(content="third")]
|
|
)
|
|
assert results["invocation_context"]["documents"] == [Document(content="first second third")]
|
|
|
|
|
|
@pytest.mark.unit
|
|
def test_join_documents_yaml(tmp_path):
|
|
with open(tmp_path / "tmp_config.yml", "w") as tmp_file:
|
|
tmp_file.write(
|
|
f"""
|
|
version: ignore
|
|
|
|
components:
|
|
- name: shaper
|
|
type: Shaper
|
|
params:
|
|
func: join_documents
|
|
inputs:
|
|
documents: documents
|
|
params:
|
|
delimiter: ' - '
|
|
outputs:
|
|
- documents
|
|
|
|
pipelines:
|
|
- name: query
|
|
nodes:
|
|
- name: shaper
|
|
inputs:
|
|
- Query
|
|
"""
|
|
)
|
|
pipeline = Pipeline.load_from_yaml(path=tmp_path / "tmp_config.yml")
|
|
result = pipeline.run(
|
|
query="test query", documents=[Document(content="first"), Document(content="second"), Document(content="third")]
|
|
)
|
|
assert result["invocation_context"]["documents"] == [Document(content="first - second - third")]
|
|
assert result["documents"] == [Document(content="first - second - third")]
|
|
|
|
|
|
@pytest.mark.unit
|
|
def test_join_documents_default_delimiter_yaml(tmp_path):
|
|
with open(tmp_path / "tmp_config.yml", "w") as tmp_file:
|
|
tmp_file.write(
|
|
f"""
|
|
version: ignore
|
|
components:
|
|
- name: shaper
|
|
type: Shaper
|
|
params:
|
|
func: join_documents
|
|
inputs:
|
|
documents: documents
|
|
outputs:
|
|
- documents
|
|
pipelines:
|
|
- name: query
|
|
nodes:
|
|
- name: shaper
|
|
inputs:
|
|
- Query
|
|
"""
|
|
)
|
|
pipeline = Pipeline.load_from_yaml(path=tmp_path / "tmp_config.yml")
|
|
result = pipeline.run(
|
|
query="test query", documents=[Document(content="first"), Document(content="second"), Document(content="third")]
|
|
)
|
|
assert result["invocation_context"]["documents"] == [Document(content="first second third")]
|
|
|
|
|
|
#
|
|
# strings_to_answers
|
|
#
|
|
|
|
|
|
@pytest.mark.unit
|
|
def test_strings_to_answers_no_meta_no_hashkeys():
|
|
shaper = Shaper(func="strings_to_answers", inputs={"strings": "responses"}, outputs=["answers"])
|
|
results, _ = shaper.run(invocation_context={"responses": ["first", "second", "third"]})
|
|
assert results["invocation_context"]["answers"] == [
|
|
Answer(answer="first", type="generative"),
|
|
Answer(answer="second", type="generative"),
|
|
Answer(answer="third", type="generative"),
|
|
]
|
|
|
|
|
|
@pytest.mark.unit
|
|
def test_strings_to_answers_yaml(tmp_path):
|
|
with open(tmp_path / "tmp_config.yml", "w") as tmp_file:
|
|
tmp_file.write(
|
|
f"""
|
|
version: ignore
|
|
components:
|
|
- name: shaper
|
|
type: Shaper
|
|
params:
|
|
func: strings_to_answers
|
|
params:
|
|
strings: ['a', 'b', 'c']
|
|
outputs:
|
|
- answers
|
|
pipelines:
|
|
- name: query
|
|
nodes:
|
|
- name: shaper
|
|
inputs:
|
|
- Query
|
|
"""
|
|
)
|
|
pipeline = Pipeline.load_from_yaml(path=tmp_path / "tmp_config.yml")
|
|
result = pipeline.run()
|
|
assert result["invocation_context"]["answers"] == [
|
|
Answer(answer="a", type="generative"),
|
|
Answer(answer="b", type="generative"),
|
|
Answer(answer="c", type="generative"),
|
|
]
|
|
assert result["answers"] == [
|
|
Answer(answer="a", type="generative"),
|
|
Answer(answer="b", type="generative"),
|
|
Answer(answer="c", type="generative"),
|
|
]
|
|
|
|
|
|
#
|
|
# answers_to_strings
|
|
#
|
|
|
|
|
|
@pytest.mark.unit
|
|
def test_answers_to_strings():
|
|
shaper = Shaper(func="answers_to_strings", inputs={"answers": "documents"}, outputs=["strings"])
|
|
results, _ = shaper.run(documents=[Answer(answer="first"), Answer(answer="second"), Answer(answer="third")])
|
|
assert results["invocation_context"]["strings"] == ["first", "second", "third"]
|
|
|
|
|
|
@pytest.mark.unit
|
|
def test_answers_to_strings_yaml(tmp_path):
|
|
with open(tmp_path / "tmp_config.yml", "w") as tmp_file:
|
|
tmp_file.write(
|
|
f"""
|
|
version: ignore
|
|
components:
|
|
- name: shaper
|
|
type: Shaper
|
|
params:
|
|
func: answers_to_strings
|
|
inputs:
|
|
answers: documents
|
|
outputs:
|
|
- strings
|
|
pipelines:
|
|
- name: query
|
|
nodes:
|
|
- name: shaper
|
|
inputs:
|
|
- Query
|
|
"""
|
|
)
|
|
pipeline = Pipeline.load_from_yaml(path=tmp_path / "tmp_config.yml")
|
|
result = pipeline.run(documents=[Answer(answer="a"), Answer(answer="b"), Answer(answer="c")])
|
|
assert result["invocation_context"]["strings"] == ["a", "b", "c"]
|
|
|
|
|
|
#
|
|
# strings_to_documents
|
|
#
|
|
|
|
|
|
@pytest.mark.unit
|
|
def test_strings_to_documents_no_meta_no_hashkeys():
|
|
shaper = Shaper(func="strings_to_documents", inputs={"strings": "responses"}, outputs=["documents"])
|
|
results, _ = shaper.run(invocation_context={"responses": ["first", "second", "third"]})
|
|
assert results["invocation_context"]["documents"] == [
|
|
Document(content="first"),
|
|
Document(content="second"),
|
|
Document(content="third"),
|
|
]
|
|
|
|
|
|
@pytest.mark.unit
|
|
def test_strings_to_documents_single_meta_no_hashkeys():
|
|
shaper = Shaper(
|
|
func="strings_to_documents", inputs={"strings": "responses"}, params={"meta": {"a": "A"}}, outputs=["documents"]
|
|
)
|
|
results, _ = shaper.run(invocation_context={"responses": ["first", "second", "third"]})
|
|
assert results["invocation_context"]["documents"] == [
|
|
Document(content="first", meta={"a": "A"}),
|
|
Document(content="second", meta={"a": "A"}),
|
|
Document(content="third", meta={"a": "A"}),
|
|
]
|
|
|
|
|
|
@pytest.mark.unit
|
|
def test_strings_to_documents_wrong_number_of_meta():
|
|
shaper = Shaper(
|
|
func="strings_to_documents",
|
|
inputs={"strings": "responses"},
|
|
params={"meta": [{"a": "A"}]},
|
|
outputs=["documents"],
|
|
)
|
|
|
|
with pytest.raises(ValueError, match="Not enough metadata dictionaries."):
|
|
shaper.run(invocation_context={"responses": ["first", "second", "third"]})
|
|
|
|
|
|
@pytest.mark.unit
|
|
def test_strings_to_documents_many_meta_no_hashkeys():
|
|
shaper = Shaper(
|
|
func="strings_to_documents",
|
|
inputs={"strings": "responses"},
|
|
params={"meta": [{"a": i + 1} for i in range(3)]},
|
|
outputs=["documents"],
|
|
)
|
|
results, _ = shaper.run(invocation_context={"responses": ["first", "second", "third"]})
|
|
assert results["invocation_context"]["documents"] == [
|
|
Document(content="first", meta={"a": 1}),
|
|
Document(content="second", meta={"a": 2}),
|
|
Document(content="third", meta={"a": 3}),
|
|
]
|
|
|
|
|
|
@pytest.mark.unit
|
|
def test_strings_to_documents_single_meta_with_hashkeys():
|
|
shaper = Shaper(
|
|
func="strings_to_documents",
|
|
inputs={"strings": "responses"},
|
|
params={"meta": {"a": "A"}, "id_hash_keys": ["content", "meta"]},
|
|
outputs=["documents"],
|
|
)
|
|
results, _ = shaper.run(invocation_context={"responses": ["first", "second", "third"]})
|
|
assert results["invocation_context"]["documents"] == [
|
|
Document(content="first", meta={"a": "A"}, id_hash_keys=["content", "meta"]),
|
|
Document(content="second", meta={"a": "A"}, id_hash_keys=["content", "meta"]),
|
|
Document(content="third", meta={"a": "A"}, id_hash_keys=["content", "meta"]),
|
|
]
|
|
|
|
|
|
@pytest.mark.unit
|
|
def test_strings_to_documents_no_meta_no_hashkeys_yaml(tmp_path):
|
|
with open(tmp_path / "tmp_config.yml", "w") as tmp_file:
|
|
tmp_file.write(
|
|
f"""
|
|
version: ignore
|
|
components:
|
|
- name: shaper
|
|
type: Shaper
|
|
params:
|
|
func: strings_to_documents
|
|
params:
|
|
strings: ['a', 'b', 'c']
|
|
outputs:
|
|
- documents
|
|
pipelines:
|
|
- name: query
|
|
nodes:
|
|
- name: shaper
|
|
inputs:
|
|
- Query
|
|
"""
|
|
)
|
|
pipeline = Pipeline.load_from_yaml(path=tmp_path / "tmp_config.yml")
|
|
result = pipeline.run()
|
|
assert result["invocation_context"]["documents"] == [
|
|
Document(content="a"),
|
|
Document(content="b"),
|
|
Document(content="c"),
|
|
]
|
|
|
|
|
|
@pytest.mark.unit
|
|
def test_strings_to_documents_meta_and_hashkeys_yaml(tmp_path):
|
|
with open(tmp_path / "tmp_config.yml", "w") as tmp_file:
|
|
tmp_file.write(
|
|
f"""
|
|
version: ignore
|
|
components:
|
|
- name: shaper
|
|
type: Shaper
|
|
params:
|
|
func: strings_to_documents
|
|
params:
|
|
strings: ['first', 'second', 'third']
|
|
id_hash_keys: ['content', 'meta']
|
|
meta:
|
|
- a: 1
|
|
- a: 2
|
|
- a: 3
|
|
outputs:
|
|
- documents
|
|
pipelines:
|
|
- name: query
|
|
nodes:
|
|
- name: shaper
|
|
inputs:
|
|
- Query
|
|
"""
|
|
)
|
|
pipeline = Pipeline.load_from_yaml(path=tmp_path / "tmp_config.yml")
|
|
result = pipeline.run()
|
|
assert result["invocation_context"]["documents"] == [
|
|
Document(content="first", meta={"a": 1}, id_hash_keys=["content", "meta"]),
|
|
Document(content="second", meta={"a": 2}, id_hash_keys=["content", "meta"]),
|
|
Document(content="third", meta={"a": 3}, id_hash_keys=["content", "meta"]),
|
|
]
|
|
|
|
|
|
#
|
|
# documents_to_strings
|
|
#
|
|
|
|
|
|
@pytest.mark.unit
|
|
def test_documents_to_strings():
|
|
shaper = Shaper(func="documents_to_strings", inputs={"documents": "documents"}, outputs=["strings"])
|
|
results, _ = shaper.run(
|
|
documents=[Document(content="first"), Document(content="second"), Document(content="third")]
|
|
)
|
|
assert results["invocation_context"]["strings"] == ["first", "second", "third"]
|
|
|
|
|
|
@pytest.mark.unit
|
|
def test_documents_to_strings_yaml(tmp_path):
|
|
with open(tmp_path / "tmp_config.yml", "w") as tmp_file:
|
|
tmp_file.write(
|
|
f"""
|
|
version: ignore
|
|
components:
|
|
- name: shaper
|
|
type: Shaper
|
|
params:
|
|
func: documents_to_strings
|
|
inputs:
|
|
documents: documents
|
|
outputs:
|
|
- strings
|
|
pipelines:
|
|
- name: query
|
|
nodes:
|
|
- name: shaper
|
|
inputs:
|
|
- Query
|
|
"""
|
|
)
|
|
pipeline = Pipeline.load_from_yaml(path=tmp_path / "tmp_config.yml")
|
|
result = pipeline.run(documents=[Document(content="a"), Document(content="b"), Document(content="c")])
|
|
assert result["invocation_context"]["strings"] == ["a", "b", "c"]
|
|
|
|
|
|
#
|
|
# Chaining and real-world usage
|
|
#
|
|
|
|
|
|
@pytest.mark.unit
|
|
def test_chain_shapers():
|
|
shaper_1 = Shaper(
|
|
func="join_documents", inputs={"documents": "documents"}, params={"delimiter": " - "}, outputs=["documents"]
|
|
)
|
|
shaper_2 = Shaper(
|
|
func="value_to_list", inputs={"value": "query", "target_list": "documents"}, outputs=["questions"]
|
|
)
|
|
|
|
pipe = Pipeline()
|
|
pipe.add_node(shaper_1, name="shaper_1", inputs=["Query"])
|
|
pipe.add_node(shaper_2, name="shaper_2", inputs=["shaper_1"])
|
|
|
|
results = pipe.run(
|
|
query="test query", documents=[Document(content="first"), Document(content="second"), Document(content="third")]
|
|
)
|
|
|
|
assert results["invocation_context"]["documents"] == [Document(content="first - second - third")]
|
|
assert results["invocation_context"]["questions"] == ["test query"]
|
|
|
|
|
|
@pytest.mark.unit
|
|
def test_chain_shapers_yaml(tmp_path):
|
|
with open(tmp_path / "tmp_config.yml", "w") as tmp_file:
|
|
tmp_file.write(
|
|
f"""
|
|
version: ignore
|
|
components:
|
|
|
|
- name: shaper_1
|
|
type: Shaper
|
|
params:
|
|
func: join_documents
|
|
inputs:
|
|
documents: documents
|
|
params:
|
|
delimiter: ' - '
|
|
outputs:
|
|
- documents
|
|
|
|
- name: shaper_2
|
|
type: Shaper
|
|
params:
|
|
func: value_to_list
|
|
inputs:
|
|
value: query
|
|
target_list: documents
|
|
outputs:
|
|
- questions
|
|
|
|
pipelines:
|
|
- name: query
|
|
nodes:
|
|
- name: shaper_1
|
|
inputs:
|
|
- Query
|
|
- name: shaper_2
|
|
inputs:
|
|
- shaper_1
|
|
"""
|
|
)
|
|
pipe = Pipeline.load_from_yaml(path=tmp_path / "tmp_config.yml")
|
|
|
|
results = pipe.run(
|
|
query="test query", documents=[Document(content="first"), Document(content="second"), Document(content="third")]
|
|
)
|
|
|
|
assert results["invocation_context"]["documents"] == [Document(content="first - second - third")]
|
|
assert results["invocation_context"]["questions"] == ["test query"]
|
|
|
|
|
|
@pytest.mark.unit
|
|
def test_chain_shapers_yaml_2(tmp_path):
|
|
with open(tmp_path / "tmp_config.yml", "w") as tmp_file:
|
|
tmp_file.write(
|
|
f"""
|
|
version: ignore
|
|
components:
|
|
|
|
- name: shaper_1
|
|
type: Shaper
|
|
params:
|
|
func: strings_to_documents
|
|
params:
|
|
strings:
|
|
- first
|
|
- second
|
|
- third
|
|
outputs:
|
|
- string_documents
|
|
|
|
- name: shaper_2
|
|
type: Shaper
|
|
params:
|
|
func: value_to_list
|
|
inputs:
|
|
target_list: string_documents
|
|
params:
|
|
value: hello
|
|
outputs:
|
|
- greetings
|
|
|
|
- name: shaper_3
|
|
type: Shaper
|
|
params:
|
|
func: join_strings
|
|
inputs:
|
|
strings: greetings
|
|
params:
|
|
delimiter: '. '
|
|
outputs:
|
|
- many_greetings
|
|
|
|
- name: expander
|
|
type: Shaper
|
|
params:
|
|
func: value_to_list
|
|
inputs:
|
|
value: many_greetings
|
|
params:
|
|
target_list: [1]
|
|
outputs:
|
|
- many_greetings
|
|
|
|
- name: shaper_4
|
|
type: Shaper
|
|
params:
|
|
func: strings_to_documents
|
|
inputs:
|
|
strings: many_greetings
|
|
outputs:
|
|
- documents_with_greetings
|
|
|
|
pipelines:
|
|
- name: query
|
|
nodes:
|
|
- name: shaper_1
|
|
inputs:
|
|
- Query
|
|
- name: shaper_2
|
|
inputs:
|
|
- shaper_1
|
|
- name: shaper_3
|
|
inputs:
|
|
- shaper_2
|
|
- name: expander
|
|
inputs:
|
|
- shaper_3
|
|
- name: shaper_4
|
|
inputs:
|
|
- expander
|
|
"""
|
|
)
|
|
pipe = Pipeline.load_from_yaml(path=tmp_path / "tmp_config.yml")
|
|
results = pipe.run()
|
|
assert results["invocation_context"]["documents_with_greetings"] == [Document(content="hello. hello. hello")]
|
|
|
|
|
|
@pytest.mark.integration
|
|
def test_with_prompt_node(tmp_path):
|
|
with open(tmp_path / "tmp_config.yml", "w") as tmp_file:
|
|
tmp_file.write(
|
|
f"""
|
|
version: ignore
|
|
components:
|
|
- name: prompt_model
|
|
type: PromptModel
|
|
|
|
- name: shaper
|
|
type: Shaper
|
|
params:
|
|
func: value_to_list
|
|
inputs:
|
|
value: query
|
|
target_list: documents
|
|
outputs:
|
|
- questions
|
|
|
|
- name: prompt_node
|
|
type: PromptNode
|
|
params:
|
|
output_variable: answers
|
|
model_name_or_path: prompt_model
|
|
default_prompt_template: question-answering
|
|
|
|
pipelines:
|
|
- name: query
|
|
nodes:
|
|
- name: shaper
|
|
inputs:
|
|
- Query
|
|
- name: prompt_node
|
|
inputs:
|
|
- shaper
|
|
"""
|
|
)
|
|
pipeline = Pipeline.load_from_yaml(path=tmp_path / "tmp_config.yml")
|
|
result = pipeline.run(
|
|
query="What's Berlin like?",
|
|
documents=[Document("Berlin is an amazing city."), Document("Berlin is a cool city in Germany.")],
|
|
)
|
|
assert len(result["answers"]) == 2
|
|
assert any(word for word in ["berlin", "germany", "cool", "city", "amazing"] if word in result["answers"])
|
|
|
|
assert len(result["invocation_context"]) > 0
|
|
assert len(result["invocation_context"]["questions"]) == 2
|
|
|
|
|
|
@pytest.mark.integration
|
|
def test_with_multiple_prompt_nodes(tmp_path):
|
|
with open(tmp_path / "tmp_config.yml", "w") as tmp_file:
|
|
tmp_file.write(
|
|
f"""
|
|
version: ignore
|
|
components:
|
|
- name: prompt_model
|
|
type: PromptModel
|
|
|
|
- name: shaper
|
|
type: Shaper
|
|
params:
|
|
func: value_to_list
|
|
inputs:
|
|
value: query
|
|
target_list: documents
|
|
outputs: [questions]
|
|
|
|
- name: renamer
|
|
type: Shaper
|
|
params:
|
|
func: rename
|
|
inputs:
|
|
value: new-questions
|
|
outputs:
|
|
- questions
|
|
|
|
- name: prompt_node
|
|
type: PromptNode
|
|
params:
|
|
model_name_or_path: prompt_model
|
|
default_prompt_template: question-answering
|
|
|
|
- name: prompt_node_second
|
|
type: PromptNode
|
|
params:
|
|
model_name_or_path: prompt_model
|
|
default_prompt_template: question-generation
|
|
output_variable: new-questions
|
|
|
|
- name: prompt_node_third
|
|
type: PromptNode
|
|
params:
|
|
output_variable: answers
|
|
model_name_or_path: google/flan-t5-small
|
|
default_prompt_template: question-answering
|
|
|
|
pipelines:
|
|
- name: query
|
|
nodes:
|
|
- name: shaper
|
|
inputs:
|
|
- Query
|
|
- name: prompt_node
|
|
inputs:
|
|
- shaper
|
|
- name: prompt_node_second
|
|
inputs:
|
|
- prompt_node
|
|
- name: renamer
|
|
inputs:
|
|
- prompt_node_second
|
|
- name: prompt_node_third
|
|
inputs:
|
|
- renamer
|
|
"""
|
|
)
|
|
pipeline = Pipeline.load_from_yaml(path=tmp_path / "tmp_config.yml")
|
|
result = pipeline.run(
|
|
query="What's Berlin like?",
|
|
documents=[Document("Berlin is an amazing city."), Document("Berlin is a cool city in Germany.")],
|
|
)
|
|
results = result["answers"]
|
|
assert len(results) == 2
|
|
assert any([True for r in results if "Berlin" in r])
|
|
|
|
|
|
@pytest.mark.unit
|
|
def test_join_query_and_documents_yaml(tmp_path):
|
|
with open(tmp_path / "tmp_config.yml", "w") as tmp_file:
|
|
tmp_file.write(
|
|
f"""
|
|
version: ignore
|
|
|
|
components:
|
|
- name: expander
|
|
type: Shaper
|
|
params:
|
|
func: value_to_list
|
|
inputs:
|
|
value: query
|
|
params:
|
|
target_list: [1]
|
|
outputs:
|
|
- query
|
|
|
|
- name: joiner
|
|
type: Shaper
|
|
params:
|
|
func: join_lists
|
|
inputs:
|
|
lists:
|
|
- documents
|
|
- query
|
|
outputs:
|
|
- query
|
|
|
|
pipelines:
|
|
- name: query
|
|
nodes:
|
|
- name: expander
|
|
inputs:
|
|
- Query
|
|
- name: joiner
|
|
inputs:
|
|
- expander
|
|
"""
|
|
)
|
|
pipeline = Pipeline.load_from_yaml(path=tmp_path / "tmp_config.yml")
|
|
result = pipeline.run(query="What is going on here?", documents=["first", "second", "third"])
|
|
assert result["query"] == ["first", "second", "third", "What is going on here?"]
|
|
|
|
|
|
@pytest.mark.unit
|
|
def test_join_query_and_documents_into_single_string_yaml(tmp_path):
|
|
with open(tmp_path / "tmp_config.yml", "w") as tmp_file:
|
|
tmp_file.write(
|
|
f"""
|
|
version: ignore
|
|
components:
|
|
- name: expander
|
|
type: Shaper
|
|
params:
|
|
func: value_to_list
|
|
inputs:
|
|
value: query
|
|
params:
|
|
target_list: [1]
|
|
outputs:
|
|
- query
|
|
|
|
- name: joiner
|
|
type: Shaper
|
|
params:
|
|
func: join_lists
|
|
inputs:
|
|
lists:
|
|
- documents
|
|
- query
|
|
outputs:
|
|
- query
|
|
|
|
- name: concatenator
|
|
type: Shaper
|
|
params:
|
|
func: join_strings
|
|
inputs:
|
|
strings: query
|
|
outputs:
|
|
- query
|
|
|
|
pipelines:
|
|
- name: query
|
|
nodes:
|
|
- name: expander
|
|
inputs:
|
|
- Query
|
|
- name: joiner
|
|
inputs:
|
|
- expander
|
|
- name: concatenator
|
|
inputs:
|
|
- joiner
|
|
"""
|
|
)
|
|
pipeline = Pipeline.load_from_yaml(path=tmp_path / "tmp_config.yml")
|
|
result = pipeline.run(query="What is going on here?", documents=["first", "second", "third"])
|
|
assert result["query"] == "first second third What is going on here?"
|
|
|
|
|
|
@pytest.mark.unit
|
|
def test_join_query_and_documents_convert_into_documents_yaml(tmp_path):
|
|
with open(tmp_path / "tmp_config.yml", "w") as tmp_file:
|
|
tmp_file.write(
|
|
f"""
|
|
version: ignore
|
|
components:
|
|
- name: expander
|
|
type: Shaper
|
|
params:
|
|
func: value_to_list
|
|
inputs:
|
|
value: query
|
|
params:
|
|
target_list: [1]
|
|
outputs:
|
|
- query
|
|
|
|
- name: joiner
|
|
type: Shaper
|
|
params:
|
|
func: join_lists
|
|
inputs:
|
|
lists:
|
|
- documents
|
|
- query
|
|
outputs:
|
|
- query_and_docs
|
|
|
|
- name: converter
|
|
type: Shaper
|
|
params:
|
|
func: strings_to_documents
|
|
inputs:
|
|
strings: query_and_docs
|
|
outputs:
|
|
- query_and_docs
|
|
|
|
pipelines:
|
|
- name: query
|
|
nodes:
|
|
- name: expander
|
|
inputs:
|
|
- Query
|
|
- name: joiner
|
|
inputs:
|
|
- expander
|
|
- name: converter
|
|
inputs:
|
|
- joiner
|
|
"""
|
|
)
|
|
pipeline = Pipeline.load_from_yaml(path=tmp_path / "tmp_config.yml")
|
|
result = pipeline.run(query="What is going on here?", documents=["first", "second", "third"])
|
|
assert result["invocation_context"]["query_and_docs"]
|
|
assert len(result["invocation_context"]["query_and_docs"]) == 4
|
|
assert isinstance(result["invocation_context"]["query_and_docs"][0], Document)
|
|
|
|
|
|
@pytest.mark.unit
|
|
def test_shaper_publishes_unknown_arg_does_not_break_pipeline():
|
|
documents = [Document(content="test query")]
|
|
shaper = Shaper(func="rename", inputs={"value": "query"}, outputs=["unknown_by_retriever"], publish_outputs=True)
|
|
document_store = InMemoryDocumentStore(use_bm25=True)
|
|
document_store.write_documents(documents)
|
|
retriever = BM25Retriever(document_store=document_store)
|
|
pipeline = Pipeline()
|
|
pipeline.add_node(component=shaper, name="shaper", inputs=["Query"])
|
|
pipeline.add_node(component=retriever, name="retriever", inputs=["shaper"])
|
|
|
|
result = pipeline.run(query="test query")
|
|
assert result["invocation_context"]["unknown_by_retriever"] == "test query"
|
|
assert result["unknown_by_retriever"] == "test query"
|
|
assert len(result["documents"]) == 1
|