// Copyright (c) Microsoft Corporation. All rights reserved. // Example05_Dalle_And_GPT4V.cs using AutoGen.Core; using AutoGen.OpenAI; using AutoGen.OpenAI.Extension; using FluentAssertions; using OpenAI; using OpenAI.Images; public partial class Example05_Dalle_And_GPT4V { private readonly OpenAIClient openAIClient; public Example05_Dalle_And_GPT4V(OpenAIClient openAIClient) { this.openAIClient = openAIClient; } /// /// Generate image from prompt using DALL-E. /// /// prompt with feedback /// [Function] public async Task GenerateImage(string prompt) { // TODO // generate image from prompt using DALL-E // and return url. var option = new ImageGenerationOptions { Size = GeneratedImageSize.W1024xH1024, Style = GeneratedImageStyle.Vivid, }; var imageResponse = await openAIClient.GetImageClient("dall-e-3").GenerateImageAsync(prompt, option); var imageUrl = imageResponse.Value.ImageUri.OriginalString; return $@"// ignore this line [IMAGE_GENERATION] The image is generated from prompt {prompt} {imageUrl}"; } public static async Task RunAsync() { // This example shows how to use DALL-E and GPT-4V to generate image from prompt and feedback. // The DALL-E agent will generate image from prompt. // The GPT-4V agent will provide feedback to DALL-E agent to help it generate better image. // The conversation will be terminated when the image satisfies the condition. // The image will be saved to image.jpg in current directory. // get OpenAI Key and create config var openAIKey = Environment.GetEnvironmentVariable("OPENAI_API_KEY") ?? throw new Exception("Please set OPENAI_API_KEY environment variable."); var openAIClient = new OpenAIClient(openAIKey); var instance = new Example05_Dalle_And_GPT4V(openAIClient); var imagePath = Path.Combine("resource", "images", "background.png"); if (File.Exists(imagePath)) { File.Delete(imagePath); } var generateImageFunctionMiddleware = new FunctionCallMiddleware( functions: [instance.GenerateImageFunctionContract], functionMap: new Dictionary>> { { nameof(GenerateImage), instance.GenerateImageWrapper }, }); var dalleAgent = new OpenAIChatAgent( chatClient: openAIClient.GetChatClient("gpt-4o-mini"), name: "dalle", systemMessage: "You are a DALL-E agent that generate image from prompt, when conversation is terminated, return the most recent image url") .RegisterMessageConnector() .RegisterStreamingMiddleware(generateImageFunctionMiddleware) .RegisterMiddleware(async (msgs, option, agent, ct) => { if (msgs.Any(msg => msg.GetContent()?.ToLower().Contains("approve") is true)) { return new TextMessage(Role.Assistant, $"The image satisfies the condition, conversation is terminated. {GroupChatExtension.TERMINATE}"); } var msgsWithoutImage = msgs.Where(msg => msg is not ImageMessage).ToList(); var reply = await agent.GenerateReplyAsync(msgsWithoutImage, option, ct); if (reply.GetContent() is string content && content.Contains("IMAGE_GENERATION")) { var imageUrl = content.Split("\n").Last(); var imageMessage = new ImageMessage(Role.Assistant, imageUrl, from: reply.From, mimeType: "image/png"); Console.WriteLine($"download image from {imageUrl} to {imagePath}"); var httpClient = new HttpClient(); var imageBytes = await httpClient.GetByteArrayAsync(imageUrl, ct); File.WriteAllBytes(imagePath, imageBytes); return imageMessage; } else { return reply; } }) .RegisterPrintMessage(); var gpt4VAgent = new OpenAIChatAgent( chatClient: openAIClient.GetChatClient("gpt-4o-mini"), name: "gpt-4o-mini", systemMessage: @"You are a critism that provide feedback to DALL-E agent. Carefully check the image generated by DALL-E agent and provide feedback. If the image satisfies the condition, then say [APPROVE]. Otherwise, provide detailed feedback to DALL-E agent so it can generate better image. The image should satisfy the following conditions: - There should be a cat and a mouse in the image - The cat should be chasing after the mouse") .RegisterMessageConnector() .RegisterPrintMessage(); await gpt4VAgent.InitiateChatAsync( receiver: dalleAgent, message: "Hey dalle, please generate image from prompt: English short hair blue cat chase after a mouse", maxRound: 10); File.Exists(imagePath).Should().BeTrue(); } }