mirror of
https://github.com/microsoft/autogen.git
synced 2025-07-08 01:21:13 +00:00

Co-authored-by: Jack Gerrits <jack@jackgerrits.com> Co-authored-by: Ryan Sweet <rysweet@microsoft.com>
127 lines
5.1 KiB
C#
127 lines
5.1 KiB
C#
// Copyright (c) Microsoft Corporation. All rights reserved.
|
|
// Example05_Dalle_And_GPT4V.cs
|
|
|
|
using AutoGen.Core;
|
|
using AutoGen.OpenAI;
|
|
using AutoGen.OpenAI.Extension;
|
|
using FluentAssertions;
|
|
using OpenAI;
|
|
using OpenAI.Images;
|
|
|
|
public partial class Example05_Dalle_And_GPT4V
|
|
{
|
|
private readonly OpenAIClient openAIClient;
|
|
|
|
public Example05_Dalle_And_GPT4V(OpenAIClient openAIClient)
|
|
{
|
|
this.openAIClient = openAIClient;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Generate image from prompt using DALL-E.
|
|
/// </summary>
|
|
/// <param name="prompt">prompt with feedback</param>
|
|
/// <returns></returns>
|
|
[Function]
|
|
public async Task<string> GenerateImage(string prompt)
|
|
{
|
|
// TODO
|
|
// generate image from prompt using DALL-E
|
|
// and return url.
|
|
var option = new ImageGenerationOptions
|
|
{
|
|
Size = GeneratedImageSize.W1024xH1024,
|
|
Style = GeneratedImageStyle.Vivid,
|
|
};
|
|
|
|
var imageResponse = await openAIClient.GetImageClient("dall-e-3").GenerateImageAsync(prompt, option);
|
|
var imageUrl = imageResponse.Value.ImageUri.OriginalString;
|
|
|
|
return $@"// ignore this line [IMAGE_GENERATION]
|
|
The image is generated from prompt {prompt}
|
|
|
|
{imageUrl}";
|
|
}
|
|
|
|
public static async Task RunAsync()
|
|
{
|
|
// This example shows how to use DALL-E and GPT-4V to generate image from prompt and feedback.
|
|
// The DALL-E agent will generate image from prompt.
|
|
// The GPT-4V agent will provide feedback to DALL-E agent to help it generate better image.
|
|
// The conversation will be terminated when the image satisfies the condition.
|
|
// The image will be saved to image.jpg in current directory.
|
|
|
|
// get OpenAI Key and create config
|
|
var openAIKey = Environment.GetEnvironmentVariable("OPENAI_API_KEY") ?? throw new Exception("Please set OPENAI_API_KEY environment variable.");
|
|
var openAIClient = new OpenAIClient(openAIKey);
|
|
var instance = new Example05_Dalle_And_GPT4V(openAIClient);
|
|
var imagePath = Path.Combine("resource", "images", "background.png");
|
|
if (File.Exists(imagePath))
|
|
{
|
|
File.Delete(imagePath);
|
|
}
|
|
|
|
var generateImageFunctionMiddleware = new FunctionCallMiddleware(
|
|
functions: [instance.GenerateImageFunctionContract],
|
|
functionMap: new Dictionary<string, Func<string, Task<string>>>
|
|
{
|
|
{ nameof(GenerateImage), instance.GenerateImageWrapper },
|
|
});
|
|
var dalleAgent = new OpenAIChatAgent(
|
|
chatClient: openAIClient.GetChatClient("gpt-4o-mini"),
|
|
name: "dalle",
|
|
systemMessage: "You are a DALL-E agent that generate image from prompt, when conversation is terminated, return the most recent image url")
|
|
.RegisterMessageConnector()
|
|
.RegisterStreamingMiddleware(generateImageFunctionMiddleware)
|
|
.RegisterMiddleware(async (msgs, option, agent, ct) =>
|
|
{
|
|
if (msgs.Any(msg => msg.GetContent()?.ToLower().Contains("approve") is true))
|
|
{
|
|
return new TextMessage(Role.Assistant, $"The image satisfies the condition, conversation is terminated. {GroupChatExtension.TERMINATE}");
|
|
}
|
|
|
|
var msgsWithoutImage = msgs.Where(msg => msg is not ImageMessage).ToList();
|
|
var reply = await agent.GenerateReplyAsync(msgsWithoutImage, option, ct);
|
|
|
|
if (reply.GetContent() is string content && content.Contains("IMAGE_GENERATION"))
|
|
{
|
|
var imageUrl = content.Split("\n").Last();
|
|
var imageMessage = new ImageMessage(Role.Assistant, imageUrl, from: reply.From, mimeType: "image/png");
|
|
|
|
Console.WriteLine($"download image from {imageUrl} to {imagePath}");
|
|
var httpClient = new HttpClient();
|
|
var imageBytes = await httpClient.GetByteArrayAsync(imageUrl, ct);
|
|
File.WriteAllBytes(imagePath, imageBytes);
|
|
|
|
return imageMessage;
|
|
}
|
|
else
|
|
{
|
|
return reply;
|
|
}
|
|
})
|
|
.RegisterPrintMessage();
|
|
|
|
var gpt4VAgent = new OpenAIChatAgent(
|
|
chatClient: openAIClient.GetChatClient("gpt-4o-mini"),
|
|
name: "gpt-4o-mini",
|
|
systemMessage: @"You are a critism that provide feedback to DALL-E agent.
|
|
Carefully check the image generated by DALL-E agent and provide feedback.
|
|
If the image satisfies the condition, then say [APPROVE].
|
|
Otherwise, provide detailed feedback to DALL-E agent so it can generate better image.
|
|
|
|
The image should satisfy the following conditions:
|
|
- There should be a cat and a mouse in the image
|
|
- The cat should be chasing after the mouse")
|
|
.RegisterMessageConnector()
|
|
.RegisterPrintMessage();
|
|
|
|
await gpt4VAgent.InitiateChatAsync(
|
|
receiver: dalleAgent,
|
|
message: "Hey dalle, please generate image from prompt: English short hair blue cat chase after a mouse",
|
|
maxRound: 10);
|
|
|
|
File.Exists(imagePath).Should().BeTrue();
|
|
}
|
|
}
|