Beibin Li b41b366549
Large Multimodal Models in AgentChat (#554)
* LMM Code added

* LLaVA notebook update

* Test cases and Notebook modified for OpenAI v1

* Move LMM into contrib
To resolve test issues and deploy issues
In the future, we can install pillow by default, and then move back
LMM agents into agentchat

* LMM test setup update

* try...except... clause for LMM tests

* disable patch for llava agent test
To resolve dependencies issue for build

* Add LMM Blog

* Change docstring for LMM agents

* Docstring update patch

* llava: insert reply at position 1 now
So, it can still handle human_input_mode
and max_consecutive_reply

* Resolve comments
Fixing: typos, blogs, yml, and add OpenAIWrapper

* Signature typo fix for LMM agent: system_message

* Update LMM "content" from latest OpenAI release
Reference  https://platform.openai.com/docs/guides/vision

* update LMM test according to latest OpenAI release

* Fully support GPT-4V now
1. Add a notebook for GPT-4V. LLava notebook also updated.
2. img_utils updated
3. GPT-4V formatter now return base64 image with mime type
4. Infer mime type directly from b64 image content (while loading
   without suffix)
5. Test cases modified according to all the related changes.

* GPT-4V link updated in blog

---------

Co-authored-by: Chi Wang <wang.chi@microsoft.com>
2023-11-06 21:33:51 +00:00

84 lines
2.7 KiB
Python

import unittest
from unittest.mock import MagicMock
import pytest
import autogen
from autogen.agentchat.agent import Agent
try:
from autogen.agentchat.contrib.multimodal_conversable_agent import MultimodalConversableAgent
except ImportError:
skip = True
else:
skip = False
base64_encoded_image = (
"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAUAAAAFCAYAAACNbyblAAAAHElEQVQI12P4"
"//8/w38GIAXDIBKE0DHxgljNBAAO9TXL0Y4OHwAAAABJRU5ErkJggg=="
)
@pytest.mark.skipif(skip, reason="dependency is not installed")
class TestMultimodalConversableAgent(unittest.TestCase):
def setUp(self):
self.agent = MultimodalConversableAgent(
name="TestAgent",
llm_config={
"timeout": 600,
"seed": 42,
"config_list": [{"model": "gpt-4-vision-preview", "api_key": "sk-fake"}],
},
)
def test_system_message(self):
# Test default system message
self.assertEqual(
self.agent.system_message,
[
{
"type": "text",
"text": "You are a helpful AI assistant.",
}
],
)
# Test updating system message
new_message = f"We will discuss <img {base64_encoded_image}> in this conversation."
self.agent.update_system_message(new_message)
self.assertEqual(
self.agent.system_message,
[
{"type": "text", "text": "We will discuss "},
{"type": "image_url", "image_url": {"url": base64_encoded_image}},
{"type": "text", "text": " in this conversation."},
],
)
def test_message_to_dict(self):
# Test string message
message_str = "Hello"
expected_dict = {"content": [{"type": "text", "text": "Hello"}]}
self.assertDictEqual(self.agent._message_to_dict(message_str), expected_dict)
# Test list message
message_list = [{"type": "text", "text": "Hello"}]
expected_dict = {"content": message_list}
self.assertDictEqual(self.agent._message_to_dict(message_list), expected_dict)
# Test dictionary message
message_dict = {"content": [{"type": "text", "text": "Hello"}]}
self.assertDictEqual(self.agent._message_to_dict(message_dict), message_dict)
def test_print_received_message(self):
sender = Agent(name="SenderAgent")
message_str = "Hello"
self.agent._print_received_message = MagicMock() # Mocking print method to avoid actual print
self.agent._print_received_message(message_str, sender)
self.agent._print_received_message.assert_called_with(message_str, sender)
if __name__ == "__main__":
unittest.main()