haystack/test/prompt/test_handlers.py

from unittest.mock import patch

import pytest

from haystack.nodes.prompt.invocation_layer.handlers import (
    DefaultTokenStreamingHandler,
    DefaultPromptHandler,
    AnthropicTokenStreamingHandler,
)


@pytest.mark.unit
def test_prompt_handler_positive():
    # prompt of length 5 + max_length of 3 = 8, which is less than model_max_length of 10, so no resize
    mock_tokens = ["I", "am", "a", "tokenized", "prompt"]
    mock_prompt = "I am a tokenized prompt"

    with patch(
        "haystack.nodes.prompt.invocation_layer.handlers.AutoTokenizer.from_pretrained", autospec=True
    ) as mock_tokenizer:
        tokenizer_instance = mock_tokenizer.return_value
        tokenizer_instance.tokenize.return_value = mock_tokens
        tokenizer_instance.convert_tokens_to_string.return_value = mock_prompt

        prompt_handler = DefaultPromptHandler("model_path", 10, 3)

        # Test with a prompt that does not exceed model_max_length when tokenized
        result = prompt_handler(mock_prompt)

    assert result == {
        "resized_prompt": mock_prompt,
        "prompt_length": 5,
        "new_prompt_length": 5,
        "model_max_length": 10,
        "max_length": 3,
    }


@pytest.mark.unit
def test_prompt_handler_negative():
    # prompt of length 8 + max_length of 3 = 11, which is more than model_max_length of 10, so we resize to 7
    mock_tokens = ["I", "am", "a", "tokenized", "prompt", "of", "length", "eight"]
    mock_prompt = "I am a tokenized prompt of length"

    with patch(
        "haystack.nodes.prompt.invocation_layer.handlers.AutoTokenizer.from_pretrained", autospec=True
    ) as mock_tokenizer:
        tokenizer_instance = mock_tokenizer.return_value
        tokenizer_instance.tokenize.return_value = mock_tokens
        tokenizer_instance.convert_tokens_to_string.return_value = mock_prompt

        prompt_handler = DefaultPromptHandler("model_path", 10, 3)
        result = prompt_handler(mock_prompt)

    assert result == {
        "resized_prompt": mock_prompt,
        "prompt_length": 8,
        "new_prompt_length": 7,
        "model_max_length": 10,
        "max_length": 3,
    }


@pytest.mark.integration
def test_prompt_handler_basics():
    handler = DefaultPromptHandler(model_name_or_path="gpt2", model_max_length=20, max_length=10)
    assert callable(handler)

    handler = DefaultPromptHandler(model_name_or_path="gpt2", model_max_length=20)
    assert handler.max_length == 100


@pytest.mark.integration
def test_gpt2_prompt_handler():
    # test gpt2 BPE based tokenizer
    handler = DefaultPromptHandler(model_name_or_path="gpt2", model_max_length=20, max_length=10)

    # test no resize
    assert handler("This is a test") == {
        "prompt_length": 4,
        "resized_prompt": "This is a test",
        "max_length": 10,
        "model_max_length": 20,
        "new_prompt_length": 4,
    }

    # test resize
    assert handler("This is a prompt that will be resized because it is longer than allowed") == {
        "prompt_length": 15,
        "resized_prompt": "This is a prompt that will be resized because",
        "max_length": 10,
        "model_max_length": 20,
        "new_prompt_length": 10,
    }


@pytest.mark.integration
def test_flan_prompt_handler_no_resize():
    handler = DefaultPromptHandler(model_name_or_path="google/flan-t5-xxl", model_max_length=20, max_length=10)
    assert handler("This is a test") == {
        "prompt_length": 5,
        "resized_prompt": "This is a test",
        "max_length": 10,
        "model_max_length": 20,
        "new_prompt_length": 5,
    }


@pytest.mark.integration
def test_flan_prompt_handler_resize():
    handler = DefaultPromptHandler(model_name_or_path="google/flan-t5-xxl", model_max_length=20, max_length=10)
    assert handler("This is a prompt that will be resized because it is longer than allowed") == {
        "prompt_length": 17,
        "resized_prompt": "This is a prompt that will be re",
        "max_length": 10,
        "model_max_length": 20,
        "new_prompt_length": 10,
    }


@pytest.mark.integration
def test_flan_prompt_handler_empty_string():
    handler = DefaultPromptHandler(model_name_or_path="google/flan-t5-xxl", model_max_length=20, max_length=10)
    assert handler("") == {
        "prompt_length": 0,
        "resized_prompt": "",
        "max_length": 10,
        "model_max_length": 20,
        "new_prompt_length": 0,
    }


@pytest.mark.integration
def test_flan_prompt_handler_none():
    handler = DefaultPromptHandler(model_name_or_path="google/flan-t5-xxl", model_max_length=20, max_length=10)
    assert handler(None) == {
        "prompt_length": 0,
        "resized_prompt": None,
        "max_length": 10,
        "model_max_length": 20,
        "new_prompt_length": 0,
    }


@pytest.mark.unit
@patch("builtins.print")
def test_anthropic_token_streaming_handler(mock_print):
    handler = AnthropicTokenStreamingHandler(DefaultTokenStreamingHandler())

    res = handler(" This")
    assert res == " This"
    mock_print.assert_called_with(" This", flush=True, end="")

    res = handler(" This is a new")
    assert res == " is a new"
    mock_print.assert_called_with(" is a new", flush=True, end="")

    res = handler(" This is a new token")
    assert res == " token"
    mock_print.assert_called_with(" token", flush=True, end="")

    res = handler("And now")
    assert res == "And now"
    mock_print.assert_called_with("And now", flush=True, end="")

    res = handler("And now something completely different")
    assert res == " something completely different"
    mock_print.assert_called_with(" something completely different", flush=True, end="")
fix: Fix handling of streaming response in AnthropicClaudeInvocationLayer (#4993) * Fix handling of streaming response in AnthropicClaudeInvocationLayer --------- Co-authored-by: Vladimir Blagojevic <dovlex@gmail.com> Co-authored-by: Darja Fokina <daria.f93@gmail.com> 2023-06-07 10:57:36 +02:00			`from unittest.mock import patch`

feat: Add Cohere PromptNode invocation layer (#4827) * Add CohereInvocationLayer --------- Co-authored-by: bogdankostic <bogdankostic@web.de> 2023-05-12 17:50:09 +02:00			`import pytest`

fix: Fix handling of streaming response in AnthropicClaudeInvocationLayer (#4993) * Fix handling of streaming response in AnthropicClaudeInvocationLayer --------- Co-authored-by: Vladimir Blagojevic <dovlex@gmail.com> Co-authored-by: Darja Fokina <daria.f93@gmail.com> 2023-06-07 10:57:36 +02:00			`from haystack.nodes.prompt.invocation_layer.handlers import (`
			`DefaultTokenStreamingHandler,`
			`DefaultPromptHandler,`
			`AnthropicTokenStreamingHandler,`
			`)`
feat: Add Cohere PromptNode invocation layer (#4827) * Add CohereInvocationLayer --------- Co-authored-by: bogdankostic <bogdankostic@web.de> 2023-05-12 17:50:09 +02:00

chore: block all HTTP requests in CI (#5088) 2023-06-13 14:52:24 +02:00			`@pytest.mark.unit`
			`def test_prompt_handler_positive():`
			`# prompt of length 5 + max_length of 3 = 8, which is less than model_max_length of 10, so no resize`
			`mock_tokens = ["I", "am", "a", "tokenized", "prompt"]`
			`mock_prompt = "I am a tokenized prompt"`

			`with patch(`
			`"haystack.nodes.prompt.invocation_layer.handlers.AutoTokenizer.from_pretrained", autospec=True`
			`) as mock_tokenizer:`
			`tokenizer_instance = mock_tokenizer.return_value`
			`tokenizer_instance.tokenize.return_value = mock_tokens`
			`tokenizer_instance.convert_tokens_to_string.return_value = mock_prompt`

			`prompt_handler = DefaultPromptHandler("model_path", 10, 3)`

			`# Test with a prompt that does not exceed model_max_length when tokenized`
			`result = prompt_handler(mock_prompt)`

			`assert result == {`
			`"resized_prompt": mock_prompt,`
			`"prompt_length": 5,`
			`"new_prompt_length": 5,`
			`"model_max_length": 10,`
			`"max_length": 3,`
			`}`


			`@pytest.mark.unit`
			`def test_prompt_handler_negative():`
			`# prompt of length 8 + max_length of 3 = 11, which is more than model_max_length of 10, so we resize to 7`
			`mock_tokens = ["I", "am", "a", "tokenized", "prompt", "of", "length", "eight"]`
			`mock_prompt = "I am a tokenized prompt of length"`

			`with patch(`
			`"haystack.nodes.prompt.invocation_layer.handlers.AutoTokenizer.from_pretrained", autospec=True`
			`) as mock_tokenizer:`
			`tokenizer_instance = mock_tokenizer.return_value`
			`tokenizer_instance.tokenize.return_value = mock_tokens`
			`tokenizer_instance.convert_tokens_to_string.return_value = mock_prompt`

			`prompt_handler = DefaultPromptHandler("model_path", 10, 3)`
			`result = prompt_handler(mock_prompt)`

			`assert result == {`
			`"resized_prompt": mock_prompt,`
			`"prompt_length": 8,`
			`"new_prompt_length": 7,`
			`"model_max_length": 10,`
			`"max_length": 3,`
			`}`


feat: Add Cohere PromptNode invocation layer (#4827) * Add CohereInvocationLayer --------- Co-authored-by: bogdankostic <bogdankostic@web.de> 2023-05-12 17:50:09 +02:00			`@pytest.mark.integration`
			`def test_prompt_handler_basics():`
			`handler = DefaultPromptHandler(model_name_or_path="gpt2", model_max_length=20, max_length=10)`
			`assert callable(handler)`

			`handler = DefaultPromptHandler(model_name_or_path="gpt2", model_max_length=20)`
			`assert handler.max_length == 100`


			`@pytest.mark.integration`
			`def test_gpt2_prompt_handler():`
			`# test gpt2 BPE based tokenizer`
			`handler = DefaultPromptHandler(model_name_or_path="gpt2", model_max_length=20, max_length=10)`

			`# test no resize`
			`assert handler("This is a test") == {`
			`"prompt_length": 4,`
			`"resized_prompt": "This is a test",`
			`"max_length": 10,`
			`"model_max_length": 20,`
			`"new_prompt_length": 4,`
			`}`

			`# test resize`
			`assert handler("This is a prompt that will be resized because it is longer than allowed") == {`
			`"prompt_length": 15,`
			`"resized_prompt": "This is a prompt that will be resized because",`
			`"max_length": 10,`
			`"model_max_length": 20,`
			`"new_prompt_length": 10,`
			`}`


			`@pytest.mark.integration`
chore: Simplify DefaultPromptHandler logic and add tests (#4979) * Simplify DefaultPromptHandler logic and add tests Co-authored-by: Vladimir Blagojevic <dovlex@gmail.com> * Remove commented code * Split single unit test into multiple tests --------- Co-authored-by: Vladimir Blagojevic <dovlex@gmail.com> 2023-05-29 12:13:32 +02:00			`def test_flan_prompt_handler_no_resize():`
feat: Add Cohere PromptNode invocation layer (#4827) * Add CohereInvocationLayer --------- Co-authored-by: bogdankostic <bogdankostic@web.de> 2023-05-12 17:50:09 +02:00			`handler = DefaultPromptHandler(model_name_or_path="google/flan-t5-xxl", model_max_length=20, max_length=10)`
			`assert handler("This is a test") == {`
			`"prompt_length": 5,`
			`"resized_prompt": "This is a test",`
			`"max_length": 10,`
			`"model_max_length": 20,`
			`"new_prompt_length": 5,`
			`}`

chore: Simplify DefaultPromptHandler logic and add tests (#4979) * Simplify DefaultPromptHandler logic and add tests Co-authored-by: Vladimir Blagojevic <dovlex@gmail.com> * Remove commented code * Split single unit test into multiple tests --------- Co-authored-by: Vladimir Blagojevic <dovlex@gmail.com> 2023-05-29 12:13:32 +02:00
			`@pytest.mark.integration`
			`def test_flan_prompt_handler_resize():`
			`handler = DefaultPromptHandler(model_name_or_path="google/flan-t5-xxl", model_max_length=20, max_length=10)`
feat: Add Cohere PromptNode invocation layer (#4827) * Add CohereInvocationLayer --------- Co-authored-by: bogdankostic <bogdankostic@web.de> 2023-05-12 17:50:09 +02:00			`assert handler("This is a prompt that will be resized because it is longer than allowed") == {`
			`"prompt_length": 17,`
			`"resized_prompt": "This is a prompt that will be re",`
			`"max_length": 10,`
			`"model_max_length": 20,`
			`"new_prompt_length": 10,`
			`}`
feat: HFInferenceEndpointInvocationLayer streaming support (#4819) * HFInferenceEndpointInvocationLayer streaming support * Small fixes * Add unit test * PR feedback * Alphabetically sort params * Convert PromptNode tests to HFInferenceEndpointInvocationLayer invoke tests * Rewrite streaming with sseclient * More PR updates * Implement and test _ensure_token_limit * Further optimize DefaultPromptHandler * Fix CohereInvocationLayer mistypes * PR feedback * Break up unit tests, simplify * Simplify unit tests even further * PR feedback on unit test simplification * Proper code identation under patch context manager * More unit tests, slight adjustments * Remove unrelated CohereInvocationLayer change This reverts commit 82337151e8328d982f738e5da9129ff99350ea0c. * Revert "Further optimize DefaultPromptHandler" This reverts commit 606a761b6e3333f27df51a304cfbd1906c806e05. * lg update mostly full stops at the end of docstrings --------- Co-authored-by: Silvano Cerza <3314350+silvanocerza@users.noreply.github.com> Co-authored-by: Silvano Cerza <silvanocerza@gmail.com> Co-authored-by: Darja Fokina <daria.f93@gmail.com> 2023-05-22 14:45:53 +02:00
chore: Simplify DefaultPromptHandler logic and add tests (#4979) * Simplify DefaultPromptHandler logic and add tests Co-authored-by: Vladimir Blagojevic <dovlex@gmail.com> * Remove commented code * Split single unit test into multiple tests --------- Co-authored-by: Vladimir Blagojevic <dovlex@gmail.com> 2023-05-29 12:13:32 +02:00
			`@pytest.mark.integration`
			`def test_flan_prompt_handler_empty_string():`
			`handler = DefaultPromptHandler(model_name_or_path="google/flan-t5-xxl", model_max_length=20, max_length=10)`
feat: HFInferenceEndpointInvocationLayer streaming support (#4819) * HFInferenceEndpointInvocationLayer streaming support * Small fixes * Add unit test * PR feedback * Alphabetically sort params * Convert PromptNode tests to HFInferenceEndpointInvocationLayer invoke tests * Rewrite streaming with sseclient * More PR updates * Implement and test _ensure_token_limit * Further optimize DefaultPromptHandler * Fix CohereInvocationLayer mistypes * PR feedback * Break up unit tests, simplify * Simplify unit tests even further * PR feedback on unit test simplification * Proper code identation under patch context manager * More unit tests, slight adjustments * Remove unrelated CohereInvocationLayer change This reverts commit 82337151e8328d982f738e5da9129ff99350ea0c. * Revert "Further optimize DefaultPromptHandler" This reverts commit 606a761b6e3333f27df51a304cfbd1906c806e05. * lg update mostly full stops at the end of docstrings --------- Co-authored-by: Silvano Cerza <3314350+silvanocerza@users.noreply.github.com> Co-authored-by: Silvano Cerza <silvanocerza@gmail.com> Co-authored-by: Darja Fokina <daria.f93@gmail.com> 2023-05-22 14:45:53 +02:00			`assert handler("") == {`
			`"prompt_length": 0,`
			`"resized_prompt": "",`
			`"max_length": 10,`
			`"model_max_length": 20,`
			`"new_prompt_length": 0,`
			`}`

chore: Simplify DefaultPromptHandler logic and add tests (#4979) * Simplify DefaultPromptHandler logic and add tests Co-authored-by: Vladimir Blagojevic <dovlex@gmail.com> * Remove commented code * Split single unit test into multiple tests --------- Co-authored-by: Vladimir Blagojevic <dovlex@gmail.com> 2023-05-29 12:13:32 +02:00
			`@pytest.mark.integration`
			`def test_flan_prompt_handler_none():`
			`handler = DefaultPromptHandler(model_name_or_path="google/flan-t5-xxl", model_max_length=20, max_length=10)`
feat: HFInferenceEndpointInvocationLayer streaming support (#4819) * HFInferenceEndpointInvocationLayer streaming support * Small fixes * Add unit test * PR feedback * Alphabetically sort params * Convert PromptNode tests to HFInferenceEndpointInvocationLayer invoke tests * Rewrite streaming with sseclient * More PR updates * Implement and test _ensure_token_limit * Further optimize DefaultPromptHandler * Fix CohereInvocationLayer mistypes * PR feedback * Break up unit tests, simplify * Simplify unit tests even further * PR feedback on unit test simplification * Proper code identation under patch context manager * More unit tests, slight adjustments * Remove unrelated CohereInvocationLayer change This reverts commit 82337151e8328d982f738e5da9129ff99350ea0c. * Revert "Further optimize DefaultPromptHandler" This reverts commit 606a761b6e3333f27df51a304cfbd1906c806e05. * lg update mostly full stops at the end of docstrings --------- Co-authored-by: Silvano Cerza <3314350+silvanocerza@users.noreply.github.com> Co-authored-by: Silvano Cerza <silvanocerza@gmail.com> Co-authored-by: Darja Fokina <daria.f93@gmail.com> 2023-05-22 14:45:53 +02:00			`assert handler(None) == {`
			`"prompt_length": 0,`
			`"resized_prompt": None,`
			`"max_length": 10,`
			`"model_max_length": 20,`
			`"new_prompt_length": 0,`
			`}`
fix: Fix handling of streaming response in AnthropicClaudeInvocationLayer (#4993) * Fix handling of streaming response in AnthropicClaudeInvocationLayer --------- Co-authored-by: Vladimir Blagojevic <dovlex@gmail.com> Co-authored-by: Darja Fokina <daria.f93@gmail.com> 2023-06-07 10:57:36 +02:00

			`@pytest.mark.unit`
			`@patch("builtins.print")`
			`def test_anthropic_token_streaming_handler(mock_print):`
			`handler = AnthropicTokenStreamingHandler(DefaultTokenStreamingHandler())`

			`res = handler(" This")`
			`assert res == " This"`
			`mock_print.assert_called_with(" This", flush=True, end="")`

			`res = handler(" This is a new")`
			`assert res == " is a new"`
			`mock_print.assert_called_with(" is a new", flush=True, end="")`

			`res = handler(" This is a new token")`
			`assert res == " token"`
			`mock_print.assert_called_with(" token", flush=True, end="")`

			`res = handler("And now")`
			`assert res == "And now"`
			`mock_print.assert_called_with("And now", flush=True, end="")`

			`res = handler("And now something completely different")`
			`assert res == " something completely different"`
			`mock_print.assert_called_with(" something completely different", flush=True, end="")`