mirror of
https://github.com/microsoft/autogen.git
synced 2025-12-13 16:01:10 +00:00
[.Net] Support raw-data in ImageMessage (#2552)
* update * add sample project * revert notebook change back * update * update interactive version * add nuget package * refactor Message * update example * add azure nightly build pipeline * Set up CI with Azure Pipelines [skip ci] * Update nightly-build.yml for Azure Pipelines * add dotnet interactive package * add dotnet interactive package * update pipeline * add nuget feed back * remove dotnet-tool feed * remove dotnet-tool feed comment * update pipeline * update build name * Update nightly-build.yml * Delete .github/workflows/dotnet-ci.yml * update * add working_dir to use step * add initateChat api * update oai package * Update dotnet-build.yml * Update dotnet-run-openai-test-and-notebooks.yml * update build workflow * update build workflow * update nuget feed * update nuget feed * update aoai and sk version * Update InteractiveService.cs * add support for GPT 4V * add DalleAndGPT4V example * update example * add user proxy agent * add readme * bump version * update example * add dotnet interactive hook * update * udpate tests * add website * update index.md * add docs * update doc * move sk dependency out of core package * udpate doc * Update Use-function-call.md * add type safe function call document * update doc * update doc * add dock * Update Use-function-call.md * add GenerateReplyOptions * remove IChatLLM * update version * update doc * update website * add sample * fix link * add middleware agent * clean up doc * bump version * update doc * update * add Other Language * remove warnings * add sign.props * add sign step * fix pipelien * auth * real sign * disable PR trigger * update * disable PR trigger * use microbuild machine * update build pipeline to add publish to internal feed * add internal feed * fix build pipeline * add dotnet prefix * update ci * add build number * update run number * update source * update token * update * remove adding source * add publish to github package * try again * try again * ask for write pacakge * disable package when branch is not main * update * implement streaming agent * add test for streaming function call * update * fix #1588 * enable PR check for dotnet branch * add website readme * only publish to dotnet feed when pushing to dotnet branch * remove openai-test-and-notebooks workflow * update readme * update readme * update workflow * update getting-start * upgrade test and sample proejct to use .net 8 * fix global.json format && make loadFromConfig API internal only before implementing * update * add support for LM studio * add doc * Update README.md * add push and workflow_dispatch trigger * disable PR for main * add dotnet env * Update Installation.md * add nuget * refer to newtonsoft 13 * update branch to dotnet in docfx * Update Installation.md * pull out HumanInputMiddleware and FunctionCallMiddleware * fix tests * add link to sample folder * refactor message * refactor over IMessage * add more tests * add more test * fix build error * rename header * add semantic kernel project * update sk example * update dotnet version * add LMStudio function call example * rename LLaMAFunctin * remove dotnet run openai test and notebook workflow * add FunctionContract and test * update doc * add documents * add workflow * update * update sample * fix warning in test * reult length can be less then maximumOutputToKeep (#1804) * merge with main * add option to retrieve inner agent and middlewares from MiddlewareAgent * update doc * adjust namespace * update readme * fix test * use IMessage * more updates * update * fix test * add comments * use FunctionContract to replace FunctionDefinition * move AutoGen contrac to AutoGen.Core * update installation * refactor streamingAgent by adding StreamingMessage type * update sample * update samples * update * update * add test * fix test * bump version * add openaichat test * update * Update Example03_Agent_FunctionCall.cs * [.Net] improve docs (#1862) * add doc * add doc * add doc * add doc * add doc * add doc * update * fix test error * fix some error * fix test * fix test * add more tests * edits --------- Co-authored-by: ekzhu <ekzhu@users.noreply.github.com> * [.Net] Add fill form example (#1911) * add form filler example * update * fix ci error * [.Net] Add using AutoGen.Core in source generator (#1983) * fix using namespace bug in source generator * remove using in sourcegenerator test * disable PR test * Add .idea to .gitignore (#1988) * [.Net] publish to nuget.org feed (#1987) * publish to nuget * update ci * update dotnet-release * update release pipeline * add source * remove empty symbol package * update pipeline * remove tag * update installation guide * [.Net] Rename some classes && APIs based on doc review (#1980) * rename sequential group chat to round robin group chat * rename to sendInstruction * rename workflow to graph * rename some api * bump version * move Graph to GroupChat folder * rename fill application example * [.Net] Improve package description (#2161) * add discord link and update package description * Update getting-start.md * [.Net] Fix document comment from the most recent AutoGen.Net engineer sync (#2231) * update * rename RegisterPrintMessageHook to RegisterPrintMessage * update website * update update.md * fix link error * [.Net] Enable JsonMode and deterministic output in AutoGen.OpenAI OpenAIChatAgent (#2347) * update openai version && add sample for json output * add example in web * update update.md * update image url * [.Net] Add AutoGen.Mistral package (#2330) * add mstral client * enable streaming support * add mistralClientAgent * add test for function call * add extension * add support for toolcall and toolcall result message * add support for aggregate message * implement streaming function call * track (#2471) * [.Net] add mistral example (#2482) * update existing examples to use messageCOnnector * add overview * add function call document * add example 14 * add mistral token count usage example * update version * Update dotnet-release.yml (#2488) * update * revert gitattributes * WIP : Binary ImageMessage * WIP : Able to pass unit test * Add example, cover more usages * Rename File --------- Co-authored-by: XiaoYun Zhang <xiaoyuz@microsoft.com> Co-authored-by: Xiaoyun Zhang <bigmiao.zhang@gmail.com> Co-authored-by: mhensen <mh@webvize.nl> Co-authored-by: ekzhu <ekzhu@users.noreply.github.com> Co-authored-by: Krzysztof Kasprowicz <60486987+Krzysztof318@users.noreply.github.com> Co-authored-by: luongdavid <luongdavid@microsoft.com>
This commit is contained in:
parent
1f501b210c
commit
f4a07ff0ed
@ -16,4 +16,10 @@
|
|||||||
<PackageReference Include="FluentAssertions" Version="$(FluentAssertionVersion)" />
|
<PackageReference Include="FluentAssertions" Version="$(FluentAssertionVersion)" />
|
||||||
<PackageReference Include="Microsoft.SemanticKernel.Plugins.Web" Version="$(SemanticKernelExperimentalVersion)" />
|
<PackageReference Include="Microsoft.SemanticKernel.Plugins.Web" Version="$(SemanticKernelExperimentalVersion)" />
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
|
|
||||||
|
<ItemGroup>
|
||||||
|
<None Update="ImageResources\square.png">
|
||||||
|
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
||||||
|
</None>
|
||||||
|
</ItemGroup>
|
||||||
</Project>
|
</Project>
|
||||||
|
|||||||
@ -0,0 +1,62 @@
|
|||||||
|
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||||
|
// Example15_ImageMessage.cs
|
||||||
|
|
||||||
|
using AutoGen.Core;
|
||||||
|
using AutoGen.OpenAI;
|
||||||
|
|
||||||
|
namespace AutoGen.BasicSample;
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// This example shows usage of ImageMessage. The image is loaded as BinaryData and sent to GPT-4V
|
||||||
|
/// <br>
|
||||||
|
/// <br>
|
||||||
|
/// Add additional images to the ImageResources to load and send more images to GPT-4V
|
||||||
|
/// </summary>
|
||||||
|
public static class Example15_GPT4V_BinaryDataImageMessage
|
||||||
|
{
|
||||||
|
private static readonly string ImageResourcePath = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "ImageResources");
|
||||||
|
|
||||||
|
private static Dictionary<string, string> _mediaTypeMappings = new()
|
||||||
|
{
|
||||||
|
{ ".png", "image/png" },
|
||||||
|
{ ".jpeg", "image/jpeg" },
|
||||||
|
{ ".jpg", "image/jpeg" },
|
||||||
|
{ ".gif", "image/gif" },
|
||||||
|
{ ".webp", "image/webp" }
|
||||||
|
};
|
||||||
|
|
||||||
|
public static async Task RunAsync()
|
||||||
|
{
|
||||||
|
var openAIKey = Environment.GetEnvironmentVariable("OPENAI_API_KEY") ?? throw new Exception("Please set OPENAI_API_KEY environment variable.");
|
||||||
|
var openAiConfig = new OpenAIConfig(openAIKey, "gpt-4-vision-preview");
|
||||||
|
|
||||||
|
var visionAgent = new GPTAgent(
|
||||||
|
name: "gpt",
|
||||||
|
systemMessage: "You are a helpful AI assistant",
|
||||||
|
config: openAiConfig,
|
||||||
|
temperature: 0);
|
||||||
|
|
||||||
|
List<IMessage> messages =
|
||||||
|
[new TextMessage(Role.User, "What is this image?", from: "user")];
|
||||||
|
AddMessagesFromResource(ImageResourcePath, messages);
|
||||||
|
|
||||||
|
var multiModalMessage = new MultiModalMessage(Role.User, messages, from: "user");
|
||||||
|
var response = await visionAgent.SendAsync(multiModalMessage);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void AddMessagesFromResource(string imageResourcePath, List<IMessage> messages)
|
||||||
|
{
|
||||||
|
foreach (string file in Directory.GetFiles(imageResourcePath))
|
||||||
|
{
|
||||||
|
if (!_mediaTypeMappings.TryGetValue(Path.GetExtension(file).ToLowerInvariant(), out var mediaType))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
using var fs = new FileStream(file, FileMode.Open, FileAccess.Read);
|
||||||
|
var ms = new MemoryStream();
|
||||||
|
fs.CopyTo(ms);
|
||||||
|
ms.Seek(0, SeekOrigin.Begin);
|
||||||
|
var imageData = BinaryData.FromStream(ms, mediaType);
|
||||||
|
messages.Add(new ImageMessage(Role.Assistant, imageData, from: "user"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
BIN
dotnet/sample/AutoGen.BasicSamples/ImageResources/square.png
(Stored with Git LFS)
Normal file
BIN
dotnet/sample/AutoGen.BasicSamples/ImageResources/square.png
(Stored with Git LFS)
Normal file
Binary file not shown.
@ -16,6 +16,7 @@
|
|||||||
|
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<PackageReference Include="JsonSchema.Net.Generation" Version="$(JsonSchemaVersion)" />
|
<PackageReference Include="JsonSchema.Net.Generation" Version="$(JsonSchemaVersion)" />
|
||||||
|
<PackageReference Include="System.Memory.Data" Version="8.0.0" />
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
|
|
||||||
</Project>
|
</Project>
|
||||||
|
|||||||
@ -21,14 +21,41 @@ public class ImageMessage : IMessage
|
|||||||
this.Url = uri.ToString();
|
this.Url = uri.ToString();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public ImageMessage(Role role, BinaryData data, string? from = null)
|
||||||
|
{
|
||||||
|
if (data.IsEmpty)
|
||||||
|
{
|
||||||
|
throw new ArgumentException("Data cannot be empty", nameof(data));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (string.IsNullOrWhiteSpace(data.MediaType))
|
||||||
|
{
|
||||||
|
throw new ArgumentException("MediaType is needed for DataUri Images", nameof(data));
|
||||||
|
}
|
||||||
|
|
||||||
|
this.Role = role;
|
||||||
|
this.From = from;
|
||||||
|
this.Data = data;
|
||||||
|
}
|
||||||
|
|
||||||
public Role Role { get; set; }
|
public Role Role { get; set; }
|
||||||
|
|
||||||
public string Url { get; set; }
|
public string? Url { get; set; }
|
||||||
|
|
||||||
public string? From { get; set; }
|
public string? From { get; set; }
|
||||||
|
|
||||||
|
public BinaryData? Data { get; set; }
|
||||||
|
|
||||||
|
public string BuildDataUri()
|
||||||
|
{
|
||||||
|
if (this.Data is null)
|
||||||
|
throw new NullReferenceException($"{nameof(Data)}");
|
||||||
|
|
||||||
|
return $"data:{this.Data.MediaType};base64,{Convert.ToBase64String(this.Data.ToArray())}";
|
||||||
|
}
|
||||||
|
|
||||||
public override string ToString()
|
public override string ToString()
|
||||||
{
|
{
|
||||||
return $"ImageMessage({this.Role}, {this.Url}, {this.From})";
|
return $"ImageMessage({this.Role}, {(this.Data != null ? BuildDataUri() : this.Url) ?? string.Empty}, {this.From})";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||||
// ChatCompletionResponse.cs
|
// ChatCompletionResponse.cs
|
||||||
|
|
||||||
using System.Collections.Generic;
|
using System.Collections.Generic;
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||||
// Error.cs
|
// Error.cs
|
||||||
|
|
||||||
using System.Text.Json.Serialization;
|
using System.Text.Json.Serialization;
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||||
// Model.cs
|
// Model.cs
|
||||||
|
|
||||||
using System;
|
using System;
|
||||||
|
|||||||
@ -77,7 +77,7 @@ public static class MessageExtension
|
|||||||
else if (message is ImageMessage imageMessage)
|
else if (message is ImageMessage imageMessage)
|
||||||
{
|
{
|
||||||
// multi-modal
|
// multi-modal
|
||||||
var msg = new ChatRequestUserMessage(new ChatMessageImageContentItem(new Uri(imageMessage.Url)));
|
var msg = new ChatRequestUserMessage(new ChatMessageImageContentItem(new Uri(imageMessage.Url ?? imageMessage.BuildDataUri())));
|
||||||
|
|
||||||
return [msg];
|
return [msg];
|
||||||
}
|
}
|
||||||
@ -101,7 +101,7 @@ public static class MessageExtension
|
|||||||
return m switch
|
return m switch
|
||||||
{
|
{
|
||||||
TextMessage textMessage => new ChatMessageTextContentItem(textMessage.Content),
|
TextMessage textMessage => new ChatMessageTextContentItem(textMessage.Content),
|
||||||
ImageMessage imageMessage => new ChatMessageImageContentItem(new Uri(imageMessage.Url)),
|
ImageMessage imageMessage => new ChatMessageImageContentItem(new Uri(imageMessage.Url ?? imageMessage.BuildDataUri())),
|
||||||
_ => throw new ArgumentException($"Unknown message type: {m.GetType()}")
|
_ => throw new ArgumentException($"Unknown message type: {m.GetType()}")
|
||||||
};
|
};
|
||||||
});
|
});
|
||||||
|
|||||||
@ -336,7 +336,7 @@ public class OpenAIChatRequestMessageConnector : IMiddleware, IStreamingMiddlewa
|
|||||||
private IEnumerable<ChatRequestMessage> ProcessIncomingMessagesForOther(ImageMessage message)
|
private IEnumerable<ChatRequestMessage> ProcessIncomingMessagesForOther(ImageMessage message)
|
||||||
{
|
{
|
||||||
return new[] { new ChatRequestUserMessage([
|
return new[] { new ChatRequestUserMessage([
|
||||||
new ChatMessageImageContentItem(new Uri(message.Url)),
|
new ChatMessageImageContentItem(new Uri(message.Url ?? message.BuildDataUri())),
|
||||||
])};
|
])};
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -345,7 +345,7 @@ public class OpenAIChatRequestMessageConnector : IMiddleware, IStreamingMiddlewa
|
|||||||
IEnumerable<ChatMessageContentItem> items = message.Content.Select<IMessage, ChatMessageContentItem>(ci => ci switch
|
IEnumerable<ChatMessageContentItem> items = message.Content.Select<IMessage, ChatMessageContentItem>(ci => ci switch
|
||||||
{
|
{
|
||||||
TextMessage text => new ChatMessageTextContentItem(text.Content),
|
TextMessage text => new ChatMessageTextContentItem(text.Content),
|
||||||
ImageMessage image => new ChatMessageImageContentItem(new Uri(image.Url)),
|
ImageMessage image => new ChatMessageImageContentItem(new Uri(image.Url ?? image.BuildDataUri())),
|
||||||
_ => throw new NotImplementedException(),
|
_ => throw new NotImplementedException(),
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|||||||
@ -92,7 +92,7 @@ public class SemanticKernelChatMessageContentConnector : IMiddleware, IStreaming
|
|||||||
{
|
{
|
||||||
TextContent txt => new TextMessage(Role.Assistant, txt.Text!, messageEnvelope.From),
|
TextContent txt => new TextMessage(Role.Assistant, txt.Text!, messageEnvelope.From),
|
||||||
ImageContent img when img.Uri is Uri uri => new ImageMessage(Role.Assistant, uri.ToString(), from: messageEnvelope.From),
|
ImageContent img when img.Uri is Uri uri => new ImageMessage(Role.Assistant, uri.ToString(), from: messageEnvelope.From),
|
||||||
ImageContent img when img.Uri is null => throw new InvalidOperationException("ImageContent.Uri is null"),
|
ImageContent img when img.Data is ReadOnlyMemory<byte> data => new ImageMessage(Role.Assistant, BinaryData.FromBytes(data), from: messageEnvelope.From),
|
||||||
_ => throw new InvalidOperationException("Unsupported content type"),
|
_ => throw new InvalidOperationException("Unsupported content type"),
|
||||||
});
|
});
|
||||||
|
|
||||||
@ -185,9 +185,8 @@ public class SemanticKernelChatMessageContentConnector : IMiddleware, IStreaming
|
|||||||
|
|
||||||
private IEnumerable<ChatMessageContent> ProcessMessageForOthers(ImageMessage message)
|
private IEnumerable<ChatMessageContent> ProcessMessageForOthers(ImageMessage message)
|
||||||
{
|
{
|
||||||
var imageContent = new ImageContent(new Uri(message.Url));
|
|
||||||
var collectionItems = new ChatMessageContentItemCollection();
|
var collectionItems = new ChatMessageContentItemCollection();
|
||||||
collectionItems.Add(imageContent);
|
collectionItems.Add(new ImageContent(new Uri(message.Url ?? message.BuildDataUri())));
|
||||||
return [new ChatMessageContent(AuthorRole.User, collectionItems)];
|
return [new ChatMessageContent(AuthorRole.User, collectionItems)];
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -207,7 +206,7 @@ public class SemanticKernelChatMessageContentConnector : IMiddleware, IStreaming
|
|||||||
}
|
}
|
||||||
else if (item is ImageMessage imageContent)
|
else if (item is ImageMessage imageContent)
|
||||||
{
|
{
|
||||||
collections.Add(new ImageContent(new Uri(imageContent.Url)));
|
collections.Add(new ImageContent(new Uri(imageContent.Url ?? imageContent.BuildDataUri())));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
|||||||
BIN
dotnet/test/AutoGen.Tests/ApprovalTests/square.png
(Stored with Git LFS)
Normal file
BIN
dotnet/test/AutoGen.Tests/ApprovalTests/square.png
(Stored with Git LFS)
Normal file
Binary file not shown.
@ -21,4 +21,10 @@
|
|||||||
<ProjectReference Include="..\..\src\AutoGen\AutoGen.csproj" />
|
<ProjectReference Include="..\..\src\AutoGen\AutoGen.csproj" />
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
|
|
||||||
|
<ItemGroup>
|
||||||
|
<None Update="ApprovalTests\square.png">
|
||||||
|
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
||||||
|
</None>
|
||||||
|
</ItemGroup>
|
||||||
|
|
||||||
</Project>
|
</Project>
|
||||||
|
|||||||
@ -68,6 +68,12 @@ namespace AutoGen.Tests
|
|||||||
await Example05_Dalle_And_GPT4V.RunAsync();
|
await Example05_Dalle_And_GPT4V.RunAsync();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[ApiKeyFact("OPENAI_API_KEY")]
|
||||||
|
public async Task GPT4ImageMessage()
|
||||||
|
{
|
||||||
|
await Example15_GPT4V_BinaryDataImageMessage.RunAsync();
|
||||||
|
}
|
||||||
|
|
||||||
public class ConsoleWriter : StringWriter
|
public class ConsoleWriter : StringWriter
|
||||||
{
|
{
|
||||||
private ITestOutputHelper output;
|
private ITestOutputHelper output;
|
||||||
|
|||||||
@ -3,6 +3,7 @@
|
|||||||
|
|
||||||
using System;
|
using System;
|
||||||
using System.Collections.Generic;
|
using System.Collections.Generic;
|
||||||
|
using System.IO;
|
||||||
using System.Linq;
|
using System.Linq;
|
||||||
using System.Threading.Tasks;
|
using System.Threading.Tasks;
|
||||||
using AutoGen.OpenAI;
|
using AutoGen.OpenAI;
|
||||||
@ -80,11 +81,24 @@ namespace AutoGen.Tests
|
|||||||
|
|
||||||
var imageMessage = new ImageMessage(Role.User, imageUri, from: "user");
|
var imageMessage = new ImageMessage(Role.User, imageUri, from: "user");
|
||||||
|
|
||||||
|
string imagePath = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "ApprovalTests", "square.png");
|
||||||
|
ImageMessage imageMessageData;
|
||||||
|
using (var fs = new FileStream(imagePath, FileMode.Open, FileAccess.Read))
|
||||||
|
{
|
||||||
|
var ms = new MemoryStream();
|
||||||
|
await fs.CopyToAsync(ms);
|
||||||
|
ms.Seek(0, SeekOrigin.Begin);
|
||||||
|
var imageData = await BinaryData.FromStreamAsync(ms, "image/png");
|
||||||
|
imageMessageData = new ImageMessage(Role.Assistant, imageData, from: "user");
|
||||||
|
}
|
||||||
|
|
||||||
IMessage[] messages = [
|
IMessage[] messages = [
|
||||||
MessageEnvelope.Create(oaiMessage),
|
MessageEnvelope.Create(oaiMessage),
|
||||||
multiModalMessage,
|
multiModalMessage,
|
||||||
imageMessage,
|
imageMessage,
|
||||||
|
imageMessageData
|
||||||
];
|
];
|
||||||
|
|
||||||
foreach (var message in messages)
|
foreach (var message in messages)
|
||||||
{
|
{
|
||||||
var response = await visionAgent.SendAsync(message);
|
var response = await visionAgent.SendAsync(message);
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user