mirror of
				https://github.com/microsoft/autogen.git
				synced 2025-10-31 09:50:11 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			127 lines
		
	
	
		
			5.1 KiB
		
	
	
	
		
			C#
		
	
	
	
	
	
			
		
		
	
	
			127 lines
		
	
	
		
			5.1 KiB
		
	
	
	
		
			C#
		
	
	
	
	
	
| // Copyright (c) Microsoft Corporation. All rights reserved.
 | |
| // Example05_Dalle_And_GPT4V.cs
 | |
| 
 | |
| using AutoGen.Core;
 | |
| using AutoGen.OpenAI;
 | |
| using AutoGen.OpenAI.Extension;
 | |
| using FluentAssertions;
 | |
| using OpenAI;
 | |
| using OpenAI.Images;
 | |
| 
 | |
| public partial class Example05_Dalle_And_GPT4V
 | |
| {
 | |
|     private readonly OpenAIClient openAIClient;
 | |
| 
 | |
|     public Example05_Dalle_And_GPT4V(OpenAIClient openAIClient)
 | |
|     {
 | |
|         this.openAIClient = openAIClient;
 | |
|     }
 | |
| 
 | |
|     /// <summary>
 | |
|     /// Generate image from prompt using DALL-E.
 | |
|     /// </summary>
 | |
|     /// <param name="prompt">prompt with feedback</param>
 | |
|     /// <returns></returns>
 | |
|     [Function]
 | |
|     public async Task<string> GenerateImage(string prompt)
 | |
|     {
 | |
|         // TODO
 | |
|         // generate image from prompt using DALL-E
 | |
|         // and return url.
 | |
|         var option = new ImageGenerationOptions
 | |
|         {
 | |
|             Size = GeneratedImageSize.W1024xH1024,
 | |
|             Style = GeneratedImageStyle.Vivid,
 | |
|         };
 | |
| 
 | |
|         var imageResponse = await openAIClient.GetImageClient("dall-e-3").GenerateImageAsync(prompt, option);
 | |
|         var imageUrl = imageResponse.Value.ImageUri.OriginalString;
 | |
| 
 | |
|         return $@"// ignore this line [IMAGE_GENERATION]
 | |
| The image is generated from prompt {prompt}
 | |
| 
 | |
| {imageUrl}";
 | |
|     }
 | |
| 
 | |
|     public static async Task RunAsync()
 | |
|     {
 | |
|         // This example shows how to use DALL-E and GPT-4V to generate image from prompt and feedback.
 | |
|         // The DALL-E agent will generate image from prompt.
 | |
|         // The GPT-4V agent will provide feedback to DALL-E agent to help it generate better image.
 | |
|         // The conversation will be terminated when the image satisfies the condition.
 | |
|         // The image will be saved to image.jpg in current directory.
 | |
| 
 | |
|         // get OpenAI Key and create config
 | |
|         var openAIKey = Environment.GetEnvironmentVariable("OPENAI_API_KEY") ?? throw new Exception("Please set OPENAI_API_KEY environment variable.");
 | |
|         var openAIClient = new OpenAIClient(openAIKey);
 | |
|         var instance = new Example05_Dalle_And_GPT4V(openAIClient);
 | |
|         var imagePath = Path.Combine("resource", "images", "background.png");
 | |
|         if (File.Exists(imagePath))
 | |
|         {
 | |
|             File.Delete(imagePath);
 | |
|         }
 | |
| 
 | |
|         var generateImageFunctionMiddleware = new FunctionCallMiddleware(
 | |
|             functions: [instance.GenerateImageFunctionContract],
 | |
|             functionMap: new Dictionary<string, Func<string, Task<string>>>
 | |
|             {
 | |
|                 { nameof(GenerateImage), instance.GenerateImageWrapper },
 | |
|             });
 | |
|         var dalleAgent = new OpenAIChatAgent(
 | |
|             chatClient: openAIClient.GetChatClient("gpt-4o-mini"),
 | |
|             name: "dalle",
 | |
|             systemMessage: "You are a DALL-E agent that generate image from prompt, when conversation is terminated, return the most recent image url")
 | |
|             .RegisterMessageConnector()
 | |
|             .RegisterStreamingMiddleware(generateImageFunctionMiddleware)
 | |
|             .RegisterMiddleware(async (msgs, option, agent, ct) =>
 | |
|             {
 | |
|                 if (msgs.Any(msg => msg.GetContent()?.ToLower().Contains("approve") is true))
 | |
|                 {
 | |
|                     return new TextMessage(Role.Assistant, $"The image satisfies the condition, conversation is terminated. {GroupChatExtension.TERMINATE}");
 | |
|                 }
 | |
| 
 | |
|                 var msgsWithoutImage = msgs.Where(msg => msg is not ImageMessage).ToList();
 | |
|                 var reply = await agent.GenerateReplyAsync(msgsWithoutImage, option, ct);
 | |
| 
 | |
|                 if (reply.GetContent() is string content && content.Contains("IMAGE_GENERATION"))
 | |
|                 {
 | |
|                     var imageUrl = content.Split("\n").Last();
 | |
|                     var imageMessage = new ImageMessage(Role.Assistant, imageUrl, from: reply.From, mimeType: "image/png");
 | |
| 
 | |
|                     Console.WriteLine($"download image from {imageUrl} to {imagePath}");
 | |
|                     var httpClient = new HttpClient();
 | |
|                     var imageBytes = await httpClient.GetByteArrayAsync(imageUrl, ct);
 | |
|                     File.WriteAllBytes(imagePath, imageBytes);
 | |
| 
 | |
|                     return imageMessage;
 | |
|                 }
 | |
|                 else
 | |
|                 {
 | |
|                     return reply;
 | |
|                 }
 | |
|             })
 | |
|             .RegisterPrintMessage();
 | |
| 
 | |
|         var gpt4VAgent = new OpenAIChatAgent(
 | |
|             chatClient: openAIClient.GetChatClient("gpt-4o-mini"),
 | |
|             name: "gpt-4o-mini",
 | |
|             systemMessage: @"You are a critism that provide feedback to DALL-E agent.
 | |
| Carefully check the image generated by DALL-E agent and provide feedback.
 | |
| If the image satisfies the condition, then say [APPROVE].
 | |
| Otherwise, provide detailed feedback to DALL-E agent so it can generate better image.
 | |
| 
 | |
| The image should satisfy the following conditions:
 | |
| - There should be a cat and a mouse in the image
 | |
| - The cat should be chasing after the mouse")
 | |
|             .RegisterMessageConnector()
 | |
|             .RegisterPrintMessage();
 | |
| 
 | |
|         await gpt4VAgent.InitiateChatAsync(
 | |
|             receiver: dalleAgent,
 | |
|             message: "Hey dalle, please generate image from prompt: English short hair blue cat chase after a mouse",
 | |
|             maxRound: 10);
 | |
| 
 | |
|         File.Exists(imagePath).Should().BeTrue();
 | |
|     }
 | |
| }
 | 
