haystack/test/prompt/invocation_layer/test_amazon_bedrock.py

from typing import Optional, Type
from unittest.mock import call, patch, MagicMock

import pytest

from haystack.lazy_imports import LazyImport

from haystack.errors import AmazonBedrockConfigurationError
from haystack.nodes.prompt.invocation_layer import AmazonBedrockInvocationLayer
from haystack.nodes.prompt.invocation_layer.amazon_bedrock import (
    AI21LabsJurassic2Adapter,
    AnthropicClaudeAdapter,
    BedrockModelAdapter,
    CohereCommandAdapter,
    AmazonTitanAdapter,
    MetaLlama2ChatAdapter,
)

with LazyImport() as boto3_import:
    from botocore.exceptions import BotoCoreError


# create a fixture with mocked boto3 client and session
@pytest.fixture
def mock_boto3_session():
    with patch("boto3.Session") as mock_client:
        yield mock_client


@pytest.fixture
def mock_prompt_handler():
    with patch("haystack.nodes.prompt.invocation_layer.handlers.DefaultPromptHandler") as mock_prompt_handler:
        yield mock_prompt_handler


@pytest.mark.unit
def test_default_constructor(mock_auto_tokenizer, mock_boto3_session):
    """
    Test that the default constructor sets the correct values
    """

    layer = AmazonBedrockInvocationLayer(
        model_name_or_path="anthropic.claude-v2",
        max_length=99,
        aws_access_key_id="some_fake_id",
        aws_secret_access_key="some_fake_key",
        aws_session_token="some_fake_token",
        aws_profile_name="some_fake_profile",
        aws_region_name="fake_region",
    )

    assert layer.max_length == 99
    assert layer.model_name_or_path == "anthropic.claude-v2"

    assert layer.prompt_handler is not None
    assert layer.prompt_handler.model_max_length == 4096

    # assert mocked boto3 client called exactly once
    mock_boto3_session.assert_called_once()

    # assert mocked boto3 client was called with the correct parameters
    mock_boto3_session.assert_called_with(
        aws_access_key_id="some_fake_id",
        aws_secret_access_key="some_fake_key",
        aws_session_token="some_fake_token",
        profile_name="some_fake_profile",
        region_name="fake_region",
    )


@pytest.mark.unit
def test_constructor_prompt_handler_initialized(mock_auto_tokenizer, mock_boto3_session):
    """
    Test that the constructor sets the prompt_handler correctly, with the correct model_max_length for llama-2
    """
    layer = AmazonBedrockInvocationLayer(model_name_or_path="anthropic.claude-v2", prompt_handler=mock_prompt_handler)
    assert layer.prompt_handler is not None
    assert layer.prompt_handler.model_max_length == 4096


@pytest.mark.unit
def test_constructor_with_model_kwargs(mock_auto_tokenizer, mock_boto3_session):
    """
    Test that model_kwargs are correctly set in the constructor
    """
    model_kwargs = {"temperature": 0.7}

    layer = AmazonBedrockInvocationLayer(model_name_or_path="anthropic.claude-v2", **model_kwargs)
    assert "temperature" in layer.model_adapter.model_kwargs
    assert layer.model_adapter.model_kwargs["temperature"] == 0.7


@pytest.mark.unit
def test_constructor_with_empty_model_name():
    """
    Test that the constructor raises an error when the model_name_or_path is empty
    """
    with pytest.raises(ValueError, match="cannot be None or empty string"):
        AmazonBedrockInvocationLayer(model_name_or_path="")


@pytest.mark.unit
def test_invoke_with_no_kwargs(mock_auto_tokenizer, mock_boto3_session):
    """
    Test invoke raises an error if no prompt is provided
    """
    layer = AmazonBedrockInvocationLayer(model_name_or_path="anthropic.claude-v2")
    with pytest.raises(ValueError, match="The model anthropic.claude-v2 requires a valid prompt."):
        layer.invoke()


@pytest.mark.unit
def test_short_prompt_is_not_truncated(mock_boto3_session):
    """
    Test that a short prompt is not truncated
    """
    # Define a short mock prompt and its tokenized version
    mock_prompt_text = "I am a tokenized prompt"
    mock_prompt_tokens = mock_prompt_text.split()

    # Mock the tokenizer so it returns our predefined tokens
    mock_tokenizer = MagicMock()
    mock_tokenizer.tokenize.return_value = mock_prompt_tokens

    # We set a small max_length for generated text (3 tokens) and a total model_max_length of 10 tokens
    # Since our mock prompt is 5 tokens long, it doesn't exceed the
    # total limit (5 prompt tokens + 3 generated tokens < 10 tokens)
    max_length_generated_text = 3
    total_model_max_length = 10

    with patch("transformers.AutoTokenizer.from_pretrained", return_value=mock_tokenizer):
        layer = AmazonBedrockInvocationLayer(
            "anthropic.claude-v2", max_length=max_length_generated_text, model_max_length=total_model_max_length
        )
        prompt_after_resize = layer._ensure_token_limit(mock_prompt_text)

    # The prompt doesn't exceed the limit, _ensure_token_limit doesn't truncate it
    assert prompt_after_resize == mock_prompt_text


@pytest.mark.unit
def test_long_prompt_is_truncated(mock_boto3_session):
    """
    Test that a long prompt is truncated
    """
    # Define a long mock prompt and its tokenized version
    long_prompt_text = "I am a tokenized prompt of length eight"
    long_prompt_tokens = long_prompt_text.split()

    # _ensure_token_limit will truncate the prompt to make it fit into the model's max token limit
    truncated_prompt_text = "I am a tokenized prompt of length"

    # Mock the tokenizer to return our predefined tokens
    # convert tokens to our predefined truncated text
    mock_tokenizer = MagicMock()
    mock_tokenizer.tokenize.return_value = long_prompt_tokens
    mock_tokenizer.convert_tokens_to_string.return_value = truncated_prompt_text

    # We set a small max_length for generated text (3 tokens) and a total model_max_length of 10 tokens
    # Our mock prompt is 8 tokens long, so it exceeds the total limit (8 prompt tokens + 3 generated tokens > 10 tokens)
    max_length_generated_text = 3
    total_model_max_length = 10

    with patch("transformers.AutoTokenizer.from_pretrained", return_value=mock_tokenizer):
        layer = AmazonBedrockInvocationLayer(
            "anthropic.claude-v2", max_length=max_length_generated_text, model_max_length=total_model_max_length
        )
        prompt_after_resize = layer._ensure_token_limit(long_prompt_text)

    # The prompt exceeds the limit, _ensure_token_limit truncates it
    assert prompt_after_resize == truncated_prompt_text


@pytest.mark.unit
def test_supports_for_valid_aws_configuration():
    mock_session = MagicMock()
    mock_session.client("bedrock").list_foundation_models.return_value = {
        "modelSummaries": [{"modelId": "anthropic.claude-v2"}]
    }

    # Patch the class method to return the mock session
    with patch(
        "haystack.nodes.prompt.invocation_layer.aws_base.AWSBaseInvocationLayer.get_aws_session",
        return_value=mock_session,
    ):
        supported = AmazonBedrockInvocationLayer.supports(
            model_name_or_path="anthropic.claude-v2", aws_profile_name="some_real_profile"
        )
    args, kwargs = mock_session.client("bedrock").list_foundation_models.call_args
    assert kwargs["byOutputModality"] == "TEXT"

    assert supported


@pytest.mark.unit
def test_supports_raises_on_invalid_aws_profile_name():
    with patch("boto3.Session") as mock_boto3_session:
        mock_boto3_session.side_effect = BotoCoreError()
        with pytest.raises(AmazonBedrockConfigurationError, match="Failed to initialize the session"):
            AmazonBedrockInvocationLayer.supports(
                model_name_or_path="anthropic.claude-v2", aws_profile_name="some_fake_profile"
            )


@pytest.mark.unit
def test_supports_for_invalid_bedrock_config():
    mock_session = MagicMock()
    mock_session.client.side_effect = BotoCoreError()

    # Patch the class method to return the mock session
    with patch(
        "haystack.nodes.prompt.invocation_layer.aws_base.AWSBaseInvocationLayer.get_aws_session",
        return_value=mock_session,
    ), pytest.raises(AmazonBedrockConfigurationError, match="Could not connect to Amazon Bedrock."):
        AmazonBedrockInvocationLayer.supports(
            model_name_or_path="anthropic.claude-v2", aws_profile_name="some_real_profile"
        )


@pytest.mark.unit
def test_supports_for_invalid_bedrock_config_error_on_list_models():
    mock_session = MagicMock()
    mock_session.client("bedrock").list_foundation_models.side_effect = BotoCoreError()

    # Patch the class method to return the mock session
    with patch(
        "haystack.nodes.prompt.invocation_layer.aws_base.AWSBaseInvocationLayer.get_aws_session",
        return_value=mock_session,
    ), pytest.raises(AmazonBedrockConfigurationError, match="Could not connect to Amazon Bedrock."):
        AmazonBedrockInvocationLayer.supports(
            model_name_or_path="anthropic.claude-v2", aws_profile_name="some_real_profile"
        )


@pytest.mark.unit
def test_supports_for_no_aws_params():
    supported = AmazonBedrockInvocationLayer.supports(model_name_or_path="anthropic.claude-v2")

    assert supported == False


@pytest.mark.unit
def test_supports_for_unknown_model():
    supported = AmazonBedrockInvocationLayer.supports(
        model_name_or_path="unknown_model", aws_profile_name="some_real_profile"
    )

    assert supported == False


@pytest.mark.unit
def test_supports_with_stream_true_for_model_that_supports_streaming():
    mock_session = MagicMock()
    mock_session.client("bedrock").list_foundation_models.return_value = {
        "modelSummaries": [{"modelId": "anthropic.claude-v2", "responseStreamingSupported": True}]
    }

    # Patch the class method to return the mock session
    with patch(
        "haystack.nodes.prompt.invocation_layer.aws_base.AWSBaseInvocationLayer.get_aws_session",
        return_value=mock_session,
    ):
        supported = AmazonBedrockInvocationLayer.supports(
            model_name_or_path="anthropic.claude-v2", aws_profile_name="some_real_profile", stream=True
        )

        assert supported == True


@pytest.mark.unit
def test_supports_with_stream_true_for_model_that_does_not_support_streaming():
    mock_session = MagicMock()
    mock_session.client("bedrock").list_foundation_models.return_value = {
        "modelSummaries": [{"modelId": "ai21.j2-mid-v1", "responseStreamingSupported": False}]
    }

    # Patch the class method to return the mock session
    with patch(
        "haystack.nodes.prompt.invocation_layer.aws_base.AWSBaseInvocationLayer.get_aws_session",
        return_value=mock_session,
    ), pytest.raises(AmazonBedrockConfigurationError, match="The model ai21.j2-mid-v1 doesn't support streaming."):
        AmazonBedrockInvocationLayer.supports(
            model_name_or_path="ai21.j2-mid-v1", aws_profile_name="some_real_profile", stream=True
        )


@pytest.mark.unit
@pytest.mark.parametrize(
    "model_name_or_path, expected_model_adapter",
    [
        ("anthropic.claude-v1", AnthropicClaudeAdapter),
        ("anthropic.claude-v2", AnthropicClaudeAdapter),
        ("anthropic.claude-instant-v1", AnthropicClaudeAdapter),
        ("anthropic.claude-super-v5", AnthropicClaudeAdapter),  # artificial
        ("cohere.command-text-v14", CohereCommandAdapter),
        ("cohere.command-light-text-v14", CohereCommandAdapter),
        ("cohere.command-text-v21", CohereCommandAdapter),  # artificial
        ("ai21.j2-mid-v1", AI21LabsJurassic2Adapter),
        ("ai21.j2-ultra-v1", AI21LabsJurassic2Adapter),
        ("ai21.j2-mega-v5", AI21LabsJurassic2Adapter),  # artificial
        ("amazon.titan-text-lite-v1", AmazonTitanAdapter),
        ("amazon.titan-text-express-v1", AmazonTitanAdapter),
        ("amazon.titan-text-agile-v1", AmazonTitanAdapter),
        ("amazon.titan-text-lightning-v8", AmazonTitanAdapter),  # artificial
        ("meta.llama2-13b-chat-v1", MetaLlama2ChatAdapter),
        ("meta.llama2-70b-chat-v1", MetaLlama2ChatAdapter),
        ("meta.llama2-130b-v5", MetaLlama2ChatAdapter),  # artificial
        ("unknown_model", None),
    ],
)
def test_get_model_adapter(model_name_or_path: str, expected_model_adapter: Optional[Type[BedrockModelAdapter]]):
    """
    Test that the correct model adapter is returned for a given model_name_or_path
    """
    model_adapter = AmazonBedrockInvocationLayer.get_model_adapter(model_name_or_path=model_name_or_path)
    assert model_adapter == expected_model_adapter


class TestAnthropicClaudeAdapter:
    def test_prepare_body_with_default_params(self) -> None:
        layer = AnthropicClaudeAdapter(model_kwargs={}, max_length=99)
        prompt = "Hello, how are you?"
        expected_body = {
            "prompt": "\n\nHuman: Hello, how are you?\n\nAssistant:",
            "max_tokens_to_sample": 99,
            "stop_sequences": ["\n\nHuman:"],
        }

        body = layer.prepare_body(prompt)

        assert body == expected_body

    def test_prepare_body_with_custom_inference_params(self) -> None:
        layer = AnthropicClaudeAdapter(model_kwargs={}, max_length=99)
        prompt = "Hello, how are you?"
        expected_body = {
            "prompt": "\n\nHuman: Hello, how are you?\n\nAssistant:",
            "max_tokens_to_sample": 50,
            "stop_sequences": ["CUSTOM_STOP"],
            "temperature": 0.7,
            "top_p": 0.8,
            "top_k": 5,
        }

        body = layer.prepare_body(
            prompt,
            temperature=0.7,
            top_p=0.8,
            top_k=5,
            max_tokens_to_sample=50,
            stop_sequences=["CUSTOM_STOP"],
            unknown_arg="unknown_value",
        )

        assert body == expected_body

    def test_prepare_body_with_model_kwargs(self) -> None:
        layer = AnthropicClaudeAdapter(
            model_kwargs={
                "temperature": 0.7,
                "top_p": 0.8,
                "top_k": 5,
                "max_tokens_to_sample": 50,
                "stop_sequences": ["CUSTOM_STOP"],
                "unknown_arg": "unknown_value",
            },
            max_length=99,
        )
        prompt = "Hello, how are you?"
        expected_body = {
            "prompt": "\n\nHuman: Hello, how are you?\n\nAssistant:",
            "max_tokens_to_sample": 50,
            "stop_sequences": ["CUSTOM_STOP"],
            "temperature": 0.7,
            "top_p": 0.8,
            "top_k": 5,
        }

        body = layer.prepare_body(prompt)

        assert body == expected_body

    def test_prepare_body_with_model_kwargs_and_custom_inference_params(self) -> None:
        layer = AnthropicClaudeAdapter(
            model_kwargs={
                "temperature": 0.6,
                "top_p": 0.7,
                "top_k": 4,
                "max_tokens_to_sample": 49,
                "stop_sequences": ["CUSTOM_STOP_MODEL_KWARGS"],
            },
            max_length=99,
        )
        prompt = "Hello, how are you?"
        expected_body = {
            "prompt": "\n\nHuman: Hello, how are you?\n\nAssistant:",
            "max_tokens_to_sample": 50,
            "stop_sequences": ["CUSTOM_STOP_MODEL_KWARGS"],
            "temperature": 0.7,
            "top_p": 0.8,
            "top_k": 5,
        }

        body = layer.prepare_body(prompt, temperature=0.7, top_p=0.8, top_k=5, max_tokens_to_sample=50)

        assert body == expected_body

    def test_get_responses(self) -> None:
        adapter = AnthropicClaudeAdapter(model_kwargs={}, max_length=99)
        response_body = {"completion": "This is a single response."}
        expected_responses = ["This is a single response."]
        assert adapter.get_responses(response_body) == expected_responses

    def test_get_responses_leading_whitespace(self) -> None:
        adapter = AnthropicClaudeAdapter(model_kwargs={}, max_length=99)
        response_body = {"completion": "\n\t This is a single response."}
        expected_responses = ["This is a single response."]
        assert adapter.get_responses(response_body) == expected_responses

    def test_get_stream_responses(self) -> None:
        stream_mock = MagicMock()
        stream_handler_mock = MagicMock()

        stream_mock.__iter__.return_value = [
            {"chunk": {"bytes": b'{"completion": " This"}'}},
            {"chunk": {"bytes": b'{"completion": " is"}'}},
            {"chunk": {"bytes": b'{"completion": " a"}'}},
            {"chunk": {"bytes": b'{"completion": " single"}'}},
            {"chunk": {"bytes": b'{"completion": " response."}'}},
        ]

        stream_handler_mock.side_effect = lambda token_received, **kwargs: token_received

        adapter = AnthropicClaudeAdapter(model_kwargs={}, max_length=99)
        expected_responses = ["This is a single response."]
        assert adapter.get_stream_responses(stream_mock, stream_handler_mock) == expected_responses

        stream_handler_mock.assert_has_calls(
            [
                call(" This", event_data={"completion": " This"}),
                call(" is", event_data={"completion": " is"}),
                call(" a", event_data={"completion": " a"}),
                call(" single", event_data={"completion": " single"}),
                call(" response.", event_data={"completion": " response."}),
            ]
        )

    def test_get_stream_responses_empty(self) -> None:
        stream_mock = MagicMock()
        stream_handler_mock = MagicMock()

        stream_mock.__iter__.return_value = []

        stream_handler_mock.side_effect = lambda token_received, **kwargs: token_received

        adapter = AnthropicClaudeAdapter(model_kwargs={}, max_length=99)
        expected_responses = [""]
        assert adapter.get_stream_responses(stream_mock, stream_handler_mock) == expected_responses

        stream_handler_mock.assert_not_called()


class TestCohereCommandAdapter:
    def test_prepare_body_with_default_params(self) -> None:
        layer = CohereCommandAdapter(model_kwargs={}, max_length=99)
        prompt = "Hello, how are you?"
        expected_body = {"prompt": "Hello, how are you?", "max_tokens": 99}

        body = layer.prepare_body(prompt)

        assert body == expected_body

    def test_prepare_body_with_custom_inference_params(self) -> None:
        layer = CohereCommandAdapter(model_kwargs={}, max_length=99)
        prompt = "Hello, how are you?"
        expected_body = {
            "prompt": "Hello, how are you?",
            "max_tokens": 50,
            "stop_sequences": ["CUSTOM_STOP"],
            "temperature": 0.7,
            "p": 0.8,
            "k": 5,
            "return_likelihoods": "GENERATION",
            "stream": True,
            "logit_bias": {"token_id": 10.0},
            "num_generations": 1,
            "truncate": "START",
        }

        body = layer.prepare_body(
            prompt,
            temperature=0.7,
            p=0.8,
            k=5,
            max_tokens=50,
            stop_sequences=["CUSTOM_STOP"],
            return_likelihoods="GENERATION",
            stream=True,
            logit_bias={"token_id": 10.0},
            num_generations=1,
            truncate="START",
            unknown_arg="unknown_value",
        )

        assert body == expected_body

    def test_prepare_body_with_model_kwargs(self) -> None:
        layer = CohereCommandAdapter(
            model_kwargs={
                "temperature": 0.7,
                "p": 0.8,
                "k": 5,
                "max_tokens": 50,
                "stop_sequences": ["CUSTOM_STOP"],
                "return_likelihoods": "GENERATION",
                "stream": True,
                "logit_bias": {"token_id": 10.0},
                "num_generations": 1,
                "truncate": "START",
                "unknown_arg": "unknown_value",
            },
            max_length=99,
        )
        prompt = "Hello, how are you?"
        expected_body = {
            "prompt": "Hello, how are you?",
            "max_tokens": 50,
            "stop_sequences": ["CUSTOM_STOP"],
            "temperature": 0.7,
            "p": 0.8,
            "k": 5,
            "return_likelihoods": "GENERATION",
            "stream": True,
            "logit_bias": {"token_id": 10.0},
            "num_generations": 1,
            "truncate": "START",
        }

        body = layer.prepare_body(prompt)

        assert body == expected_body

    def test_prepare_body_with_model_kwargs_and_custom_inference_params(self) -> None:
        layer = CohereCommandAdapter(
            model_kwargs={
                "temperature": 0.6,
                "p": 0.7,
                "k": 4,
                "max_tokens": 49,
                "stop_sequences": ["CUSTOM_STOP_MODEL_KWARGS"],
                "return_likelihoods": "ALL",
                "stream": False,
                "logit_bias": {"token_id": 9.0},
                "num_generations": 2,
                "truncate": "NONE",
            },
            max_length=99,
        )
        prompt = "Hello, how are you?"
        expected_body = {
            "prompt": "Hello, how are you?",
            "max_tokens": 50,
            "stop_sequences": ["CUSTOM_STOP_MODEL_KWARGS"],
            "temperature": 0.7,
            "p": 0.8,
            "k": 5,
            "return_likelihoods": "GENERATION",
            "stream": True,
            "logit_bias": {"token_id": 10.0},
            "num_generations": 1,
            "truncate": "START",
        }

        body = layer.prepare_body(
            prompt,
            temperature=0.7,
            p=0.8,
            k=5,
            max_tokens=50,
            return_likelihoods="GENERATION",
            stream=True,
            logit_bias={"token_id": 10.0},
            num_generations=1,
            truncate="START",
        )

        assert body == expected_body

    def test_get_responses(self) -> None:
        adapter = CohereCommandAdapter(model_kwargs={}, max_length=99)
        response_body = {"generations": [{"text": "This is a single response."}]}
        expected_responses = ["This is a single response."]
        assert adapter.get_responses(response_body) == expected_responses

    def test_get_responses_leading_whitespace(self) -> None:
        adapter = CohereCommandAdapter(model_kwargs={}, max_length=99)
        response_body = {"generations": [{"text": "\n\t This is a single response."}]}
        expected_responses = ["This is a single response."]
        assert adapter.get_responses(response_body) == expected_responses

    def test_get_responses_multiple_responses(self) -> None:
        adapter = CohereCommandAdapter(model_kwargs={}, max_length=99)
        response_body = {
            "generations": [{"text": "This is a single response."}, {"text": "This is a second response."}]
        }
        expected_responses = ["This is a single response.", "This is a second response."]
        assert adapter.get_responses(response_body) == expected_responses

    def test_get_stream_responses(self) -> None:
        stream_mock = MagicMock()
        stream_handler_mock = MagicMock()

        stream_mock.__iter__.return_value = [
            {"chunk": {"bytes": b'{"text": " This"}'}},
            {"chunk": {"bytes": b'{"text": " is"}'}},
            {"chunk": {"bytes": b'{"text": " a"}'}},
            {"chunk": {"bytes": b'{"text": " single"}'}},
            {"chunk": {"bytes": b'{"text": " response."}'}},
            {"chunk": {"bytes": b'{"finish_reason": "MAX_TOKENS", "is_finished": true}'}},
        ]

        stream_handler_mock.side_effect = lambda token_received, **kwargs: token_received

        adapter = CohereCommandAdapter(model_kwargs={}, max_length=99)
        expected_responses = ["This is a single response."]
        assert adapter.get_stream_responses(stream_mock, stream_handler_mock) == expected_responses

        stream_handler_mock.assert_has_calls(
            [
                call(" This", event_data={"text": " This"}),
                call(" is", event_data={"text": " is"}),
                call(" a", event_data={"text": " a"}),
                call(" single", event_data={"text": " single"}),
                call(" response.", event_data={"text": " response."}),
                call("", event_data={"finish_reason": "MAX_TOKENS", "is_finished": True}),
            ]
        )

    def test_get_stream_responses_empty(self) -> None:
        stream_mock = MagicMock()
        stream_handler_mock = MagicMock()

        stream_mock.__iter__.return_value = []

        stream_handler_mock.side_effect = lambda token_received, **kwargs: token_received

        adapter = CohereCommandAdapter(model_kwargs={}, max_length=99)
        expected_responses = [""]
        assert adapter.get_stream_responses(stream_mock, stream_handler_mock) == expected_responses

        stream_handler_mock.assert_not_called()


class TestAI21LabsJurrasic2Adapter:
    def test_prepare_body_with_default_params(self) -> None:
        layer = AI21LabsJurassic2Adapter(model_kwargs={}, max_length=99)
        prompt = "Hello, how are you?"
        expected_body = {"prompt": "Hello, how are you?", "maxTokens": 99}

        body = layer.prepare_body(prompt)

        assert body == expected_body

    def test_prepare_body_with_custom_inference_params(self) -> None:
        layer = AI21LabsJurassic2Adapter(model_kwargs={}, max_length=99)
        prompt = "Hello, how are you?"
        expected_body = {
            "prompt": "Hello, how are you?",
            "maxTokens": 50,
            "stopSequences": ["CUSTOM_STOP"],
            "temperature": 0.7,
            "topP": 0.8,
            "countPenalty": {"scale": 1.0},
            "presencePenalty": {"scale": 5.0},
            "frequencyPenalty": {"scale": 500.0},
            "numResults": 1,
        }

        body = layer.prepare_body(
            prompt,
            maxTokens=50,
            stopSequences=["CUSTOM_STOP"],
            temperature=0.7,
            topP=0.8,
            countPenalty={"scale": 1.0},
            presencePenalty={"scale": 5.0},
            frequencyPenalty={"scale": 500.0},
            numResults=1,
            unknown_arg="unknown_value",
        )

        assert body == expected_body

    def test_prepare_body_with_model_kwargs(self) -> None:
        layer = AI21LabsJurassic2Adapter(
            model_kwargs={
                "maxTokens": 50,
                "stopSequences": ["CUSTOM_STOP"],
                "temperature": 0.7,
                "topP": 0.8,
                "countPenalty": {"scale": 1.0},
                "presencePenalty": {"scale": 5.0},
                "frequencyPenalty": {"scale": 500.0},
                "numResults": 1,
                "unknown_arg": "unknown_value",
            },
            max_length=99,
        )
        prompt = "Hello, how are you?"
        expected_body = {
            "prompt": "Hello, how are you?",
            "maxTokens": 50,
            "stopSequences": ["CUSTOM_STOP"],
            "temperature": 0.7,
            "topP": 0.8,
            "countPenalty": {"scale": 1.0},
            "presencePenalty": {"scale": 5.0},
            "frequencyPenalty": {"scale": 500.0},
            "numResults": 1,
        }

        body = layer.prepare_body(prompt)

        assert body == expected_body

    def test_prepare_body_with_model_kwargs_and_custom_inference_params(self) -> None:
        layer = AI21LabsJurassic2Adapter(
            model_kwargs={
                "maxTokens": 49,
                "stopSequences": ["CUSTOM_STOP_MODEL_KWARGS"],
                "temperature": 0.6,
                "topP": 0.7,
                "countPenalty": {"scale": 0.9},
                "presencePenalty": {"scale": 4.0},
                "frequencyPenalty": {"scale": 499.0},
                "numResults": 2,
                "unknown_arg": "unknown_value",
            },
            max_length=99,
        )
        prompt = "Hello, how are you?"
        expected_body = {
            "prompt": "Hello, how are you?",
            "maxTokens": 50,
            "stopSequences": ["CUSTOM_STOP_MODEL_KWARGS"],
            "temperature": 0.7,
            "topP": 0.8,
            "countPenalty": {"scale": 1.0},
            "presencePenalty": {"scale": 5.0},
            "frequencyPenalty": {"scale": 500.0},
            "numResults": 1,
        }

        body = layer.prepare_body(
            prompt,
            temperature=0.7,
            topP=0.8,
            maxTokens=50,
            countPenalty={"scale": 1.0},
            presencePenalty={"scale": 5.0},
            frequencyPenalty={"scale": 500.0},
            numResults=1,
        )

        assert body == expected_body

    def test_get_responses(self) -> None:
        adapter = AI21LabsJurassic2Adapter(model_kwargs={}, max_length=99)
        response_body = {"completions": [{"data": {"text": "This is a single response."}}]}
        expected_responses = ["This is a single response."]
        assert adapter.get_responses(response_body) == expected_responses

    def test_get_responses_leading_whitespace(self) -> None:
        adapter = AI21LabsJurassic2Adapter(model_kwargs={}, max_length=99)
        response_body = {"completions": [{"data": {"text": "\n\t This is a single response."}}]}
        expected_responses = ["This is a single response."]
        assert adapter.get_responses(response_body) == expected_responses

    def test_get_responses_multiple_responses(self) -> None:
        adapter = AI21LabsJurassic2Adapter(model_kwargs={}, max_length=99)
        response_body = {
            "completions": [
                {"data": {"text": "This is a single response."}},
                {"data": {"text": "This is a second response."}},
            ]
        }
        expected_responses = ["This is a single response.", "This is a second response."]
        assert adapter.get_responses(response_body) == expected_responses


class TestAmazonTitanAdapter:
    def test_prepare_body_with_default_params(self) -> None:
        layer = AmazonTitanAdapter(model_kwargs={}, max_length=99)
        prompt = "Hello, how are you?"
        expected_body = {"inputText": "Hello, how are you?", "textGenerationConfig": {"maxTokenCount": 99}}

        body = layer.prepare_body(prompt)

        assert body == expected_body

    def test_prepare_body_with_custom_inference_params(self) -> None:
        layer = AmazonTitanAdapter(model_kwargs={}, max_length=99)
        prompt = "Hello, how are you?"
        expected_body = {
            "inputText": "Hello, how are you?",
            "textGenerationConfig": {
                "maxTokenCount": 50,
                "stopSequences": ["CUSTOM_STOP"],
                "temperature": 0.7,
                "topP": 0.8,
            },
        }

        body = layer.prepare_body(
            prompt,
            maxTokenCount=50,
            stopSequences=["CUSTOM_STOP"],
            temperature=0.7,
            topP=0.8,
            unknown_arg="unknown_value",
        )

        assert body == expected_body

    def test_prepare_body_with_model_kwargs(self) -> None:
        layer = AmazonTitanAdapter(
            model_kwargs={
                "maxTokenCount": 50,
                "stopSequences": ["CUSTOM_STOP"],
                "temperature": 0.7,
                "topP": 0.8,
                "unknown_arg": "unknown_value",
            },
            max_length=99,
        )
        prompt = "Hello, how are you?"
        expected_body = {
            "inputText": "Hello, how are you?",
            "textGenerationConfig": {
                "maxTokenCount": 50,
                "stopSequences": ["CUSTOM_STOP"],
                "temperature": 0.7,
                "topP": 0.8,
            },
        }

        body = layer.prepare_body(prompt)

        assert body == expected_body

    def test_prepare_body_with_model_kwargs_and_custom_inference_params(self) -> None:
        layer = AmazonTitanAdapter(
            model_kwargs={
                "maxTokenCount": 49,
                "stopSequences": ["CUSTOM_STOP_MODEL_KWARGS"],
                "temperature": 0.6,
                "topP": 0.7,
            },
            max_length=99,
        )
        prompt = "Hello, how are you?"
        expected_body = {
            "inputText": "Hello, how are you?",
            "textGenerationConfig": {
                "maxTokenCount": 50,
                "stopSequences": ["CUSTOM_STOP_MODEL_KWARGS"],
                "temperature": 0.7,
                "topP": 0.8,
            },
        }

        body = layer.prepare_body(prompt, temperature=0.7, topP=0.8, maxTokenCount=50)

        assert body == expected_body

    def test_get_responses(self) -> None:
        adapter = AmazonTitanAdapter(model_kwargs={}, max_length=99)
        response_body = {"results": [{"outputText": "This is a single response."}]}
        expected_responses = ["This is a single response."]
        assert adapter.get_responses(response_body) == expected_responses

    def test_get_responses_leading_whitespace(self) -> None:
        adapter = AmazonTitanAdapter(model_kwargs={}, max_length=99)
        response_body = {"results": [{"outputText": "\n\t This is a single response."}]}
        expected_responses = ["This is a single response."]
        assert adapter.get_responses(response_body) == expected_responses

    def test_get_responses_multiple_responses(self) -> None:
        adapter = AmazonTitanAdapter(model_kwargs={}, max_length=99)
        response_body = {
            "results": [{"outputText": "This is a single response."}, {"outputText": "This is a second response."}]
        }
        expected_responses = ["This is a single response.", "This is a second response."]
        assert adapter.get_responses(response_body) == expected_responses

    def test_get_stream_responses(self) -> None:
        stream_mock = MagicMock()
        stream_handler_mock = MagicMock()

        stream_mock.__iter__.return_value = [
            {"chunk": {"bytes": b'{"outputText": " This"}'}},
            {"chunk": {"bytes": b'{"outputText": " is"}'}},
            {"chunk": {"bytes": b'{"outputText": " a"}'}},
            {"chunk": {"bytes": b'{"outputText": " single"}'}},
            {"chunk": {"bytes": b'{"outputText": " response."}'}},
        ]

        stream_handler_mock.side_effect = lambda token_received, **kwargs: token_received

        adapter = AmazonTitanAdapter(model_kwargs={}, max_length=99)
        expected_responses = ["This is a single response."]
        assert adapter.get_stream_responses(stream_mock, stream_handler_mock) == expected_responses

        stream_handler_mock.assert_has_calls(
            [
                call(" This", event_data={"outputText": " This"}),
                call(" is", event_data={"outputText": " is"}),
                call(" a", event_data={"outputText": " a"}),
                call(" single", event_data={"outputText": " single"}),
                call(" response.", event_data={"outputText": " response."}),
            ]
        )

    def test_get_stream_responses_empty(self) -> None:
        stream_mock = MagicMock()
        stream_handler_mock = MagicMock()

        stream_mock.__iter__.return_value = []

        stream_handler_mock.side_effect = lambda token_received, **kwargs: token_received

        adapter = AmazonTitanAdapter(model_kwargs={}, max_length=99)
        expected_responses = [""]
        assert adapter.get_stream_responses(stream_mock, stream_handler_mock) == expected_responses

        stream_handler_mock.assert_not_called()


class TestMetaLlama2ChatAdapter:
    def test_prepare_body_with_default_params(self) -> None:
        layer = MetaLlama2ChatAdapter(model_kwargs={}, max_length=99)
        prompt = "Hello, how are you?"
        expected_body = {"prompt": "Hello, how are you?", "max_gen_len": 99}

        body = layer.prepare_body(prompt)

        assert body == expected_body

    def test_prepare_body_with_custom_inference_params(self) -> None:
        layer = MetaLlama2ChatAdapter(model_kwargs={}, max_length=99)
        prompt = "Hello, how are you?"
        expected_body = {"prompt": "Hello, how are you?", "max_gen_len": 50, "temperature": 0.7, "top_p": 0.8}

        body = layer.prepare_body(prompt, temperature=0.7, top_p=0.8, max_gen_len=50, unknown_arg="unknown_value")

        assert body == expected_body

    def test_prepare_body_with_model_kwargs(self) -> None:
        layer = MetaLlama2ChatAdapter(
            model_kwargs={"temperature": 0.7, "top_p": 0.8, "max_gen_len": 50, "unknown_arg": "unknown_value"},
            max_length=99,
        )
        prompt = "Hello, how are you?"
        expected_body = {"prompt": "Hello, how are you?", "max_gen_len": 50, "temperature": 0.7, "top_p": 0.8}

        body = layer.prepare_body(prompt)

        assert body == expected_body

    def test_prepare_body_with_model_kwargs_and_custom_inference_params(self) -> None:
        layer = MetaLlama2ChatAdapter(
            model_kwargs={"temperature": 0.6, "top_p": 0.7, "top_k": 4, "max_gen_len": 49}, max_length=99
        )
        prompt = "Hello, how are you?"
        expected_body = {"prompt": "Hello, how are you?", "max_gen_len": 50, "temperature": 0.7, "top_p": 0.7}

        body = layer.prepare_body(prompt, temperature=0.7, max_gen_len=50)

        assert body == expected_body

    def test_get_responses(self) -> None:
        adapter = MetaLlama2ChatAdapter(model_kwargs={}, max_length=99)
        response_body = {"generation": "This is a single response."}
        expected_responses = ["This is a single response."]
        assert adapter.get_responses(response_body) == expected_responses

    def test_get_responses_leading_whitespace(self) -> None:
        adapter = MetaLlama2ChatAdapter(model_kwargs={}, max_length=99)
        response_body = {"generation": "\n\t This is a single response."}
        expected_responses = ["This is a single response."]
        assert adapter.get_responses(response_body) == expected_responses

    def test_get_stream_responses(self) -> None:
        stream_mock = MagicMock()
        stream_handler_mock = MagicMock()

        stream_mock.__iter__.return_value = [
            {"chunk": {"bytes": b'{"generation": " This"}'}},
            {"chunk": {"bytes": b'{"generation": " is"}'}},
            {"chunk": {"bytes": b'{"generation": " a"}'}},
            {"chunk": {"bytes": b'{"generation": " single"}'}},
            {"chunk": {"bytes": b'{"generation": " response."}'}},
        ]

        stream_handler_mock.side_effect = lambda token_received, **kwargs: token_received

        adapter = MetaLlama2ChatAdapter(model_kwargs={}, max_length=99)
        expected_responses = ["This is a single response."]
        assert adapter.get_stream_responses(stream_mock, stream_handler_mock) == expected_responses

        stream_handler_mock.assert_has_calls(
            [
                call(" This", event_data={"generation": " This"}),
                call(" is", event_data={"generation": " is"}),
                call(" a", event_data={"generation": " a"}),
                call(" single", event_data={"generation": " single"}),
                call(" response.", event_data={"generation": " response."}),
            ]
        )

    def test_get_stream_responses_empty(self) -> None:
        stream_mock = MagicMock()
        stream_handler_mock = MagicMock()

        stream_mock.__iter__.return_value = []

        stream_handler_mock.side_effect = lambda token_received, **kwargs: token_received

        adapter = MetaLlama2ChatAdapter(model_kwargs={}, max_length=99)
        expected_responses = [""]
        assert adapter.get_stream_responses(stream_mock, stream_handler_mock) == expected_responses

        stream_handler_mock.assert_not_called()