2024-05-09 15:40:36 +02:00
|
|
|
# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
|
|
|
|
#
|
|
|
|
# SPDX-License-Identifier: Apache-2.0
|
2023-10-23 16:08:05 +02:00
|
|
|
import pytest
|
2023-11-06 09:48:45 +01:00
|
|
|
from transformers import AutoTokenizer
|
2023-10-23 16:08:05 +02:00
|
|
|
|
2023-11-24 14:48:43 +01:00
|
|
|
from haystack.dataclasses import ChatMessage, ChatRole
|
2023-10-23 16:08:05 +02:00
|
|
|
|
|
|
|
|
|
|
|
def test_from_assistant_with_valid_content():
|
|
|
|
content = "Hello, how can I assist you?"
|
|
|
|
message = ChatMessage.from_assistant(content)
|
|
|
|
assert message.content == content
|
|
|
|
assert message.role == ChatRole.ASSISTANT
|
|
|
|
|
|
|
|
|
|
|
|
def test_from_user_with_valid_content():
|
|
|
|
content = "I have a question."
|
|
|
|
message = ChatMessage.from_user(content)
|
|
|
|
assert message.content == content
|
|
|
|
assert message.role == ChatRole.USER
|
|
|
|
|
|
|
|
|
|
|
|
def test_from_system_with_valid_content():
|
|
|
|
content = "System message."
|
|
|
|
message = ChatMessage.from_system(content)
|
|
|
|
assert message.content == content
|
|
|
|
assert message.role == ChatRole.SYSTEM
|
|
|
|
|
|
|
|
|
|
|
|
def test_with_empty_content():
|
2023-11-06 09:48:45 +01:00
|
|
|
message = ChatMessage.from_user("")
|
2023-10-23 16:08:05 +02:00
|
|
|
assert message.content == ""
|
|
|
|
|
|
|
|
|
|
|
|
def test_from_function_with_empty_name():
|
|
|
|
content = "Function call"
|
|
|
|
message = ChatMessage.from_function(content, "")
|
|
|
|
assert message.content == content
|
|
|
|
assert message.name == ""
|
2023-11-06 09:48:45 +01:00
|
|
|
|
|
|
|
|
2024-03-22 09:36:56 +01:00
|
|
|
def test_to_openai_format():
|
|
|
|
message = ChatMessage.from_system("You are good assistant")
|
|
|
|
assert message.to_openai_format() == {"role": "system", "content": "You are good assistant"}
|
|
|
|
|
|
|
|
message = ChatMessage.from_user("I have a question")
|
|
|
|
assert message.to_openai_format() == {"role": "user", "content": "I have a question"}
|
|
|
|
|
|
|
|
message = ChatMessage.from_function("Function call", "function_name")
|
|
|
|
assert message.to_openai_format() == {"role": "function", "content": "Function call", "name": "function_name"}
|
|
|
|
|
|
|
|
|
2023-11-06 09:48:45 +01:00
|
|
|
@pytest.mark.integration
|
|
|
|
def test_apply_chat_templating_on_chat_message():
|
|
|
|
messages = [ChatMessage.from_system("You are good assistant"), ChatMessage.from_user("I have a question")]
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-beta")
|
2024-03-22 09:36:56 +01:00
|
|
|
formatted_messages = [m.to_openai_format() for m in messages]
|
|
|
|
tokenized_messages = tokenizer.apply_chat_template(formatted_messages, tokenize=False)
|
2023-11-06 09:48:45 +01:00
|
|
|
assert tokenized_messages == "<|system|>\nYou are good assistant</s>\n<|user|>\nI have a question</s>\n"
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.integration
|
|
|
|
def test_apply_custom_chat_templating_on_chat_message():
|
|
|
|
anthropic_template = (
|
|
|
|
"{%- for message in messages %}"
|
|
|
|
"{%- if message.role == 'user' %}\n\nHuman: {{ message.content.strip() }}"
|
|
|
|
"{%- elif message.role == 'assistant' %}\n\nAssistant: {{ message.content.strip() }}"
|
|
|
|
"{%- elif message.role == 'function' %}{{ raise('anthropic does not support function calls.') }}"
|
|
|
|
"{%- elif message.role == 'system' and loop.index == 1 %}{{ message.content }}"
|
|
|
|
"{%- else %}{{ raise('Invalid message role: ' + message.role) }}"
|
|
|
|
"{%- endif %}"
|
|
|
|
"{%- endfor %}"
|
|
|
|
"\n\nAssistant:"
|
|
|
|
)
|
|
|
|
messages = [ChatMessage.from_system("You are good assistant"), ChatMessage.from_user("I have a question")]
|
|
|
|
# could be any tokenizer, let's use the one we already likely have in cache
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-beta")
|
2024-03-22 09:36:56 +01:00
|
|
|
formatted_messages = [m.to_openai_format() for m in messages]
|
|
|
|
tokenized_messages = tokenizer.apply_chat_template(
|
|
|
|
formatted_messages, chat_template=anthropic_template, tokenize=False
|
|
|
|
)
|
2023-11-06 09:48:45 +01:00
|
|
|
assert tokenized_messages == "You are good assistant\nHuman: I have a question\nAssistant:"
|
2024-06-20 13:20:52 +02:00
|
|
|
|
|
|
|
|
|
|
|
def test_to_dict():
|
|
|
|
message = ChatMessage.from_user("content")
|
|
|
|
message.meta["some"] = "some"
|
|
|
|
|
|
|
|
assert message.to_dict() == {"content": "content", "role": "user", "name": None, "meta": {"some": "some"}}
|
|
|
|
|
|
|
|
|
|
|
|
def test_from_dict():
|
|
|
|
assert ChatMessage.from_dict(data={"content": "text", "role": "user", "name": None}) == ChatMessage(
|
|
|
|
content="text", role=ChatRole("user"), name=None, meta={}
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
def test_from_dict_with_meta():
|
|
|
|
assert ChatMessage.from_dict(
|
|
|
|
data={"content": "text", "role": "user", "name": None, "meta": {"something": "something"}}
|
|
|
|
) == ChatMessage(content="text", role=ChatRole("user"), name=None, meta={"something": "something"})
|