autogen/dotnet/sample/AutoGen.BasicSamples/GettingStart/Image_Chat_With_Agent.cs

// Copyright (c) Microsoft Corporation. All rights reserved.
// Image_Chat_With_Agent.cs

using AutoGen.Core;
using AutoGen.OpenAI;
using AutoGen.OpenAI.Extension;
using Azure.AI.OpenAI;
using FluentAssertions;

namespace AutoGen.BasicSample;

public class Image_Chat_With_Agent
{
    public static async Task RunAsync()
    {
        #region Create_Agent
        var apiKey = Environment.GetEnvironmentVariable("OPENAI_API_KEY") ?? throw new Exception("Please set OPENAI_API_KEY environment variable.");
        var model = "gpt-4o"; // The model needs to support multimodal inputs
        var openaiClient = new OpenAIClient(apiKey);

        var agent = new OpenAIChatAgent(
            openAIClient: openaiClient,
            name: "agent",
            modelName: model,
            systemMessage: "You are a helpful AI assistant")
            .RegisterMessageConnector() // convert OpenAI message to AutoGen message
            .RegisterPrintMessage();
        #endregion Create_Agent

        #region Prepare_Image_Input
        var backgoundImagePath = Path.Combine("resource", "images", "background.png");
        var imageBytes = File.ReadAllBytes(backgoundImagePath);
        var imageMessage = new ImageMessage(Role.User, BinaryData.FromBytes(imageBytes, "image/png"));
        #endregion Prepare_Image_Input

        #region Chat_With_Agent
        var reply = await agent.SendAsync("what's in the picture", chatHistory: [imageMessage]);
        #endregion Chat_With_Agent

        #region Prepare_Multimodal_Input
        var textMessage = new TextMessage(Role.User, "what's in the picture");
        var multimodalMessage = new MultiModalMessage(Role.User, [textMessage, imageMessage]);
        reply = await agent.SendAsync(multimodalMessage);
        #endregion Prepare_Multimodal_Input

        #region verify_reply
        reply.Should().BeOfType<TextMessage>();
        #endregion verify_reply
    }
}