autogen/dotnet/samples/AgentChat/AutoGen.Basic.Sample/Example05_Dalle_And_GPT4V.cs
Griffin Bassman 850377c74a
fix: Various fixes and cleanups to dotnet autogen core (#5242)
Co-authored-by: Jack Gerrits <jack@jackgerrits.com>
Co-authored-by: Ryan Sweet <rysweet@microsoft.com>
2025-01-28 17:13:36 -05:00

127 lines
5.1 KiB
C#

// Copyright (c) Microsoft Corporation. All rights reserved.
// Example05_Dalle_And_GPT4V.cs
using AutoGen.Core;
using AutoGen.OpenAI;
using AutoGen.OpenAI.Extension;
using FluentAssertions;
using OpenAI;
using OpenAI.Images;
public partial class Example05_Dalle_And_GPT4V
{
private readonly OpenAIClient openAIClient;
public Example05_Dalle_And_GPT4V(OpenAIClient openAIClient)
{
this.openAIClient = openAIClient;
}
/// <summary>
/// Generate image from prompt using DALL-E.
/// </summary>
/// <param name="prompt">prompt with feedback</param>
/// <returns></returns>
[Function]
public async Task<string> GenerateImage(string prompt)
{
// TODO
// generate image from prompt using DALL-E
// and return url.
var option = new ImageGenerationOptions
{
Size = GeneratedImageSize.W1024xH1024,
Style = GeneratedImageStyle.Vivid,
};
var imageResponse = await openAIClient.GetImageClient("dall-e-3").GenerateImageAsync(prompt, option);
var imageUrl = imageResponse.Value.ImageUri.OriginalString;
return $@"// ignore this line [IMAGE_GENERATION]
The image is generated from prompt {prompt}
{imageUrl}";
}
public static async Task RunAsync()
{
// This example shows how to use DALL-E and GPT-4V to generate image from prompt and feedback.
// The DALL-E agent will generate image from prompt.
// The GPT-4V agent will provide feedback to DALL-E agent to help it generate better image.
// The conversation will be terminated when the image satisfies the condition.
// The image will be saved to image.jpg in current directory.
// get OpenAI Key and create config
var openAIKey = Environment.GetEnvironmentVariable("OPENAI_API_KEY") ?? throw new Exception("Please set OPENAI_API_KEY environment variable.");
var openAIClient = new OpenAIClient(openAIKey);
var instance = new Example05_Dalle_And_GPT4V(openAIClient);
var imagePath = Path.Combine("resource", "images", "background.png");
if (File.Exists(imagePath))
{
File.Delete(imagePath);
}
var generateImageFunctionMiddleware = new FunctionCallMiddleware(
functions: [instance.GenerateImageFunctionContract],
functionMap: new Dictionary<string, Func<string, Task<string>>>
{
{ nameof(GenerateImage), instance.GenerateImageWrapper },
});
var dalleAgent = new OpenAIChatAgent(
chatClient: openAIClient.GetChatClient("gpt-4o-mini"),
name: "dalle",
systemMessage: "You are a DALL-E agent that generate image from prompt, when conversation is terminated, return the most recent image url")
.RegisterMessageConnector()
.RegisterStreamingMiddleware(generateImageFunctionMiddleware)
.RegisterMiddleware(async (msgs, option, agent, ct) =>
{
if (msgs.Any(msg => msg.GetContent()?.ToLower().Contains("approve") is true))
{
return new TextMessage(Role.Assistant, $"The image satisfies the condition, conversation is terminated. {GroupChatExtension.TERMINATE}");
}
var msgsWithoutImage = msgs.Where(msg => msg is not ImageMessage).ToList();
var reply = await agent.GenerateReplyAsync(msgsWithoutImage, option, ct);
if (reply.GetContent() is string content && content.Contains("IMAGE_GENERATION"))
{
var imageUrl = content.Split("\n").Last();
var imageMessage = new ImageMessage(Role.Assistant, imageUrl, from: reply.From, mimeType: "image/png");
Console.WriteLine($"download image from {imageUrl} to {imagePath}");
var httpClient = new HttpClient();
var imageBytes = await httpClient.GetByteArrayAsync(imageUrl, ct);
File.WriteAllBytes(imagePath, imageBytes);
return imageMessage;
}
else
{
return reply;
}
})
.RegisterPrintMessage();
var gpt4VAgent = new OpenAIChatAgent(
chatClient: openAIClient.GetChatClient("gpt-4o-mini"),
name: "gpt-4o-mini",
systemMessage: @"You are a critism that provide feedback to DALL-E agent.
Carefully check the image generated by DALL-E agent and provide feedback.
If the image satisfies the condition, then say [APPROVE].
Otherwise, provide detailed feedback to DALL-E agent so it can generate better image.
The image should satisfy the following conditions:
- There should be a cat and a mouse in the image
- The cat should be chasing after the mouse")
.RegisterMessageConnector()
.RegisterPrintMessage();
await gpt4VAgent.InitiateChatAsync(
receiver: dalleAgent,
message: "Hey dalle, please generate image from prompt: English short hair blue cat chase after a mouse",
maxRound: 10);
File.Exists(imagePath).Should().BeTrue();
}
}