2023-05-11 10:14:33 +02:00
|
|
|
from unittest.mock import patch, Mock, call
|
|
|
|
import json
|
|
|
|
import os
|
|
|
|
|
|
|
|
import pytest
|
|
|
|
|
|
|
|
from haystack.nodes.prompt.invocation_layer.handlers import DefaultTokenStreamingHandler
|
|
|
|
from haystack.nodes.prompt.invocation_layer import AnthropicClaudeInvocationLayer
|
|
|
|
|
|
|
|
|
2023-06-13 14:52:24 +02:00
|
|
|
@pytest.fixture
|
|
|
|
def mock_claude_tokenizer():
|
|
|
|
with patch("haystack.nodes.prompt.invocation_layer.anthropic_claude.Tokenizer", autospec=True) as mock_tokenizer:
|
|
|
|
yield mock_tokenizer
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.fixture
|
|
|
|
def mock_claude_request():
|
|
|
|
with patch("haystack.nodes.prompt.invocation_layer.anthropic_claude.request_with_retry") as mock_request:
|
|
|
|
yield mock_request
|
|
|
|
|
|
|
|
|
2023-05-11 10:14:33 +02:00
|
|
|
@pytest.mark.unit
|
2023-06-13 14:52:24 +02:00
|
|
|
def test_default_constructor(mock_claude_tokenizer, mock_claude_request):
|
|
|
|
layer = AnthropicClaudeInvocationLayer(api_key="some_fake_key")
|
2023-05-11 10:14:33 +02:00
|
|
|
|
|
|
|
assert layer.api_key == "some_fake_key"
|
|
|
|
assert layer.max_length == 200
|
2023-07-21 13:33:07 +02:00
|
|
|
assert layer.max_tokens_limit == 100000
|
2023-05-11 10:14:33 +02:00
|
|
|
assert layer.model_input_kwargs == {}
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.unit
|
2023-06-13 14:52:24 +02:00
|
|
|
def test_ignored_kwargs_are_filtered_in_init(mock_claude_tokenizer, mock_claude_request):
|
2023-05-11 10:14:33 +02:00
|
|
|
kwargs = {
|
|
|
|
"temperature": 1,
|
|
|
|
"top_p": 5,
|
|
|
|
"top_k": 2,
|
|
|
|
"stop_sequences": ["\n\nHuman: "],
|
|
|
|
"stream": True,
|
|
|
|
"stream_handler": DefaultTokenStreamingHandler(),
|
|
|
|
"unkwnown_args": "this will be filtered out",
|
|
|
|
}
|
2023-06-13 14:52:24 +02:00
|
|
|
|
|
|
|
layer = AnthropicClaudeInvocationLayer(api_key="some_fake_key", **kwargs)
|
2023-05-11 10:14:33 +02:00
|
|
|
|
|
|
|
# Verify unexpected kwargs are filtered out
|
|
|
|
assert len(layer.model_input_kwargs) == 6
|
|
|
|
assert "temperature" in layer.model_input_kwargs
|
|
|
|
assert "top_p" in layer.model_input_kwargs
|
|
|
|
assert "top_k" in layer.model_input_kwargs
|
|
|
|
assert "stop_sequences" in layer.model_input_kwargs
|
|
|
|
assert "stream" in layer.model_input_kwargs
|
|
|
|
assert "stream_handler" in layer.model_input_kwargs
|
|
|
|
assert "unkwnown_args" not in layer.model_input_kwargs
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.unit
|
2023-06-13 14:52:24 +02:00
|
|
|
def test_invoke_with_no_kwargs(mock_claude_tokenizer, mock_claude_request):
|
|
|
|
layer = AnthropicClaudeInvocationLayer(api_key="some_fake_key")
|
2023-05-11 10:14:33 +02:00
|
|
|
|
|
|
|
with pytest.raises(ValueError) as e:
|
|
|
|
layer.invoke()
|
|
|
|
assert e.match("No prompt provided.")
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.unit
|
2023-06-13 14:52:24 +02:00
|
|
|
def test_invoke_with_prompt_only(mock_claude_tokenizer, mock_claude_request):
|
|
|
|
layer = AnthropicClaudeInvocationLayer(api_key="some_fake_key")
|
2023-05-11 10:14:33 +02:00
|
|
|
|
|
|
|
# Create a fake response
|
|
|
|
mock_response = Mock(**{"status_code": 200, "ok": True, "json.return_value": {"completion": "some_result "}})
|
2023-06-13 14:52:24 +02:00
|
|
|
mock_claude_request.return_value = mock_response
|
2023-05-11 10:14:33 +02:00
|
|
|
|
|
|
|
res = layer.invoke(prompt="Some prompt")
|
|
|
|
assert len(res) == 1
|
|
|
|
assert res[0] == "some_result"
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.unit
|
2023-06-13 14:52:24 +02:00
|
|
|
def test_invoke_with_kwargs(mock_claude_tokenizer, mock_claude_request):
|
|
|
|
layer = AnthropicClaudeInvocationLayer(api_key="some_fake_key")
|
2023-05-11 10:14:33 +02:00
|
|
|
|
|
|
|
# Create a fake response
|
|
|
|
mock_response = Mock(**{"status_code": 200, "ok": True, "json.return_value": {"completion": "some_result "}})
|
2023-06-13 14:52:24 +02:00
|
|
|
with patch("haystack.nodes.prompt.invocation_layer.anthropic_claude.request_with_retry") as mock_invocation_request:
|
|
|
|
mock_invocation_request.return_value = mock_response
|
2023-05-11 10:14:33 +02:00
|
|
|
res = layer.invoke(prompt="Some prompt", max_length=300, stop_words=["stop", "here"])
|
|
|
|
assert len(res) == 1
|
|
|
|
assert res[0] == "some_result"
|
|
|
|
|
|
|
|
expected_data = {
|
2023-07-21 13:33:07 +02:00
|
|
|
"model": "claude-2",
|
2023-05-11 10:14:33 +02:00
|
|
|
"prompt": "\n\nHuman: Some prompt\n\nAssistant: ",
|
|
|
|
"max_tokens_to_sample": 300,
|
|
|
|
"temperature": 1,
|
|
|
|
"top_p": -1,
|
|
|
|
"top_k": -1,
|
|
|
|
"stream": False,
|
|
|
|
"stop_sequences": ["stop", "here", "\n\nHuman: "],
|
|
|
|
}
|
2023-06-13 14:52:24 +02:00
|
|
|
mock_invocation_request.assert_called_once()
|
|
|
|
assert mock_invocation_request.call_args.kwargs["data"] == json.dumps(expected_data)
|
2023-05-11 10:14:33 +02:00
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.unit
|
2023-06-13 14:52:24 +02:00
|
|
|
def test_invoke_with_none_stop_words(mock_claude_tokenizer, mock_claude_request):
|
|
|
|
layer = AnthropicClaudeInvocationLayer(api_key="some_fake_key")
|
2023-05-11 10:14:33 +02:00
|
|
|
|
|
|
|
# Create a fake response
|
|
|
|
mock_response = Mock(**{"status_code": 200, "ok": True, "json.return_value": {"completion": "some_result "}})
|
2023-06-13 14:52:24 +02:00
|
|
|
with patch("haystack.nodes.prompt.invocation_layer.anthropic_claude.request_with_retry") as mock_invocation_request:
|
|
|
|
mock_invocation_request.return_value = mock_response
|
2023-05-11 10:14:33 +02:00
|
|
|
res = layer.invoke(prompt="Some prompt", max_length=300, stop_words=None)
|
|
|
|
assert len(res) == 1
|
|
|
|
assert res[0] == "some_result"
|
|
|
|
|
|
|
|
expected_data = {
|
2023-07-21 13:33:07 +02:00
|
|
|
"model": "claude-2",
|
2023-05-11 10:14:33 +02:00
|
|
|
"prompt": "\n\nHuman: Some prompt\n\nAssistant: ",
|
|
|
|
"max_tokens_to_sample": 300,
|
|
|
|
"temperature": 1,
|
|
|
|
"top_p": -1,
|
|
|
|
"top_k": -1,
|
|
|
|
"stream": False,
|
|
|
|
"stop_sequences": ["\n\nHuman: "],
|
|
|
|
}
|
2023-06-13 14:52:24 +02:00
|
|
|
mock_invocation_request.assert_called_once()
|
|
|
|
assert mock_invocation_request.call_args.kwargs["data"] == json.dumps(expected_data)
|
2023-05-11 10:14:33 +02:00
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.unit
|
2023-06-13 14:52:24 +02:00
|
|
|
def test_invoke_with_stream(mock_claude_tokenizer, mock_claude_request):
|
|
|
|
layer = AnthropicClaudeInvocationLayer(api_key="some_fake_key")
|
2023-05-11 10:14:33 +02:00
|
|
|
|
|
|
|
# Create a fake streamed response
|
|
|
|
def mock_iter(self):
|
|
|
|
fake_data = json.dumps({"completion": " The sky appears"})
|
|
|
|
yield f"data: {fake_data}\n\n".encode()
|
2023-07-21 13:33:07 +02:00
|
|
|
fake_data = json.dumps({"completion": " blue to"})
|
2023-05-11 10:14:33 +02:00
|
|
|
yield f"data: {fake_data}\n\n".encode()
|
2023-07-21 13:33:07 +02:00
|
|
|
fake_data = json.dumps({"completion": " us due to how"})
|
2023-05-11 10:14:33 +02:00
|
|
|
yield f"data: {fake_data}\n\n".encode()
|
2023-07-21 13:33:07 +02:00
|
|
|
# Done was removed from the stream
|
|
|
|
# https://docs.anthropic.com/claude/reference/versioning
|
2023-05-11 10:14:33 +02:00
|
|
|
|
|
|
|
mock_response = Mock(**{"__iter__": mock_iter})
|
|
|
|
|
|
|
|
# Verifies expected result is returned
|
2023-06-13 14:52:24 +02:00
|
|
|
with patch("haystack.nodes.prompt.invocation_layer.anthropic_claude.request_with_retry") as mock_invocation_request:
|
|
|
|
mock_invocation_request.return_value = mock_response
|
2023-05-11 10:14:33 +02:00
|
|
|
res = layer.invoke(prompt="Some prompt", stream=True)
|
|
|
|
|
|
|
|
assert len(res) == 1
|
2023-06-07 10:57:36 +02:00
|
|
|
assert res[0] == " The sky appears blue to us due to how"
|
2023-05-11 10:14:33 +02:00
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.unit
|
2023-06-13 14:52:24 +02:00
|
|
|
def test_invoke_with_custom_stream_handler(mock_claude_tokenizer, mock_claude_request):
|
2023-07-21 13:33:07 +02:00
|
|
|
# Create a generator that will yield the expected return values in order
|
|
|
|
def mock_handler_responses():
|
|
|
|
yield " The sky appears"
|
|
|
|
yield " blue to"
|
|
|
|
yield " us due to how"
|
|
|
|
|
|
|
|
handler_responses = mock_handler_responses()
|
|
|
|
|
|
|
|
# Create a mock stream handler that will return the next value from the generator when called
|
|
|
|
mock_stream_handler = Mock(side_effect=lambda x: next(handler_responses))
|
2023-05-11 10:14:33 +02:00
|
|
|
|
|
|
|
# Create a layer with a mocked stream handler
|
2023-06-13 14:52:24 +02:00
|
|
|
layer = AnthropicClaudeInvocationLayer(api_key="some_fake_key", stream_handler=mock_stream_handler)
|
2023-05-11 10:14:33 +02:00
|
|
|
|
|
|
|
# Create a fake streamed response
|
|
|
|
def mock_iter(self):
|
|
|
|
fake_data = json.dumps({"completion": " The sky appears"})
|
|
|
|
yield f"data: {fake_data}\n\n".encode()
|
2023-07-21 13:33:07 +02:00
|
|
|
fake_data = json.dumps({"completion": " blue to"})
|
2023-05-11 10:14:33 +02:00
|
|
|
yield f"data: {fake_data}\n\n".encode()
|
2023-07-21 13:33:07 +02:00
|
|
|
fake_data = json.dumps({"completion": " us due to how"})
|
2023-05-11 10:14:33 +02:00
|
|
|
yield f"data: {fake_data}\n\n".encode()
|
2023-07-21 13:33:07 +02:00
|
|
|
# Done was removed from the stream
|
|
|
|
# https://docs.anthropic.com/claude/reference/versioning
|
2023-05-11 10:14:33 +02:00
|
|
|
|
|
|
|
mock_response = Mock(**{"__iter__": mock_iter})
|
|
|
|
|
2023-06-13 14:52:24 +02:00
|
|
|
with patch("haystack.nodes.prompt.invocation_layer.anthropic_claude.request_with_retry") as mock_invocation_request:
|
|
|
|
mock_invocation_request.return_value = mock_response
|
2023-05-11 10:14:33 +02:00
|
|
|
res = layer.invoke(prompt="Some prompt")
|
|
|
|
|
|
|
|
assert len(res) == 1
|
|
|
|
# This is not the real result but the values returned by the mock handler
|
2023-06-07 10:57:36 +02:00
|
|
|
assert res[0] == " The sky appears blue to us due to how"
|
2023-05-11 10:14:33 +02:00
|
|
|
|
|
|
|
# Verifies the handler has been called the expected times with the expected args
|
|
|
|
assert mock_stream_handler.call_count == 3
|
2023-06-07 10:57:36 +02:00
|
|
|
expected_call_list = [call(" The sky appears"), call(" blue to"), call(" us due to how")]
|
2023-05-11 10:14:33 +02:00
|
|
|
assert mock_stream_handler.call_args_list == expected_call_list
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.unit
|
2023-06-13 14:52:24 +02:00
|
|
|
def test_ensure_token_limit_fails_if_called_with_list(mock_claude_tokenizer, mock_claude_request):
|
2023-05-11 10:14:33 +02:00
|
|
|
layer = AnthropicClaudeInvocationLayer(api_key="some_fake_key")
|
|
|
|
with pytest.raises(ValueError):
|
|
|
|
layer._ensure_token_limit(prompt=[])
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.integration
|
|
|
|
def test_ensure_token_limit_with_small_max_length(caplog):
|
|
|
|
layer = AnthropicClaudeInvocationLayer(api_key="some_fake_key", max_length=10)
|
|
|
|
res = layer._ensure_token_limit(prompt="Short prompt")
|
|
|
|
|
|
|
|
assert res == "Short prompt"
|
|
|
|
assert not caplog.records
|
|
|
|
|
|
|
|
res = layer._ensure_token_limit(prompt="This is a very very very very very much longer prompt")
|
|
|
|
assert res == "This is a very very very very very much longer prompt"
|
|
|
|
assert not caplog.records
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.integration
|
|
|
|
def test_ensure_token_limit_with_huge_max_length(caplog):
|
2023-07-21 13:33:07 +02:00
|
|
|
layer = AnthropicClaudeInvocationLayer(api_key="some_fake_key", max_length=(100000 - 5))
|
2023-05-11 10:14:33 +02:00
|
|
|
res = layer._ensure_token_limit(prompt="Short prompt")
|
|
|
|
|
|
|
|
assert res == "Short prompt"
|
|
|
|
assert not caplog.records
|
|
|
|
|
|
|
|
res = layer._ensure_token_limit(prompt="This is a very very very very very much longer prompt")
|
2023-07-21 13:33:07 +02:00
|
|
|
assert res == "This is a very very"
|
2023-05-11 10:14:33 +02:00
|
|
|
assert len(caplog.records) == 1
|
|
|
|
expected_message_log = (
|
2023-07-21 13:33:07 +02:00
|
|
|
"The prompt has been truncated from 7 tokens to 5 tokens so that the prompt length and "
|
|
|
|
"answer length (99995 tokens) fits within the max token limit (100000 tokens). "
|
2023-05-11 10:14:33 +02:00
|
|
|
"Reduce the length of the prompt to prevent it from being cut off."
|
|
|
|
)
|
|
|
|
assert caplog.records[0].message == expected_message_log
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.unit
|
2023-06-13 14:52:24 +02:00
|
|
|
def test_supports(mock_claude_tokenizer, mock_claude_request):
|
2023-05-11 10:14:33 +02:00
|
|
|
layer = AnthropicClaudeInvocationLayer(api_key="some_fake_key")
|
|
|
|
|
|
|
|
assert not layer.supports("claude")
|
|
|
|
|
|
|
|
assert layer.supports("claude-v1")
|
|
|
|
assert layer.supports("claude-v1.0")
|
|
|
|
assert layer.supports("claude-v1.2")
|
|
|
|
assert layer.supports("claude-v1.3")
|
2023-07-21 13:33:07 +02:00
|
|
|
assert layer.supports("claude-v2.0")
|
2023-05-11 10:14:33 +02:00
|
|
|
assert layer.supports("claude-instant-v1")
|
|
|
|
assert layer.supports("claude-instant-v1.0")
|
|
|
|
assert layer.supports("claude-instant-v1.1")
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.integration
|
|
|
|
@pytest.mark.skipif(os.environ.get("ANTHROPIC_CLAUDE_API_KEY", "") == "", reason="Anthropic Claude API key not found")
|
|
|
|
def test_invoke_non_streamed():
|
|
|
|
api_key = os.environ.get("ANTHROPIC_CLAUDE_API_KEY")
|
|
|
|
layer = AnthropicClaudeInvocationLayer(api_key=api_key)
|
|
|
|
|
|
|
|
res = layer.invoke(prompt="Why is the sky blue?")
|
|
|
|
|
|
|
|
# Verifies answer has been received
|
|
|
|
assert len(res) == 1
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.integration
|
|
|
|
@pytest.mark.skipif(os.environ.get("ANTHROPIC_CLAUDE_API_KEY", "") == "", reason="Anthropic Claude API key not found")
|
|
|
|
def test_invoke_streamed():
|
|
|
|
api_key = os.environ.get("ANTHROPIC_CLAUDE_API_KEY")
|
|
|
|
layer = AnthropicClaudeInvocationLayer(api_key=api_key)
|
|
|
|
|
|
|
|
res = layer.invoke(prompt="Why is the sky blue?", stream=True)
|
|
|
|
|
|
|
|
# Verifies answer has been received
|
|
|
|
assert len(res) == 1
|