mirror of
https://github.com/microsoft/autogen.git
synced 2025-07-05 16:10:50 +00:00
63 lines
2.3 KiB
C#
63 lines
2.3 KiB
C#
using System.Reflection;
|
|
using Microsoft.Extensions.Logging;
|
|
using Microsoft.SemanticKernel.Connectors.OpenAI;
|
|
using Microsoft.SemanticKernel.Connectors.Qdrant;
|
|
using Microsoft.SemanticKernel.Memory;
|
|
using UglyToad.PdfPig;
|
|
using UglyToad.PdfPig.DocumentLayoutAnalysis.TextExtractor;
|
|
|
|
public sealed class Program
|
|
{
|
|
private const string WafFileName = "azure-well-architected.pdf";
|
|
static async Task Main()
|
|
{
|
|
var kernelSettings = KernelSettings.LoadSettings();
|
|
|
|
using ILoggerFactory loggerFactory = LoggerFactory.Create(builder =>
|
|
{
|
|
builder
|
|
.SetMinimumLevel(kernelSettings.LogLevel ?? LogLevel.Warning)
|
|
.AddConsole()
|
|
.AddDebug();
|
|
});
|
|
|
|
var memoryBuilder = new MemoryBuilder();
|
|
var memory = memoryBuilder.WithLoggerFactory(loggerFactory)
|
|
.WithQdrantMemoryStore(kernelSettings.QdrantEndpoint, 1536)
|
|
.WithAzureOpenAITextEmbeddingGeneration(kernelSettings.EmbeddingDeploymentOrModelId, kernelSettings.Endpoint, kernelSettings.ApiKey)
|
|
.Build();
|
|
|
|
await ImportDocumentAsync(memory, WafFileName).ConfigureAwait(false);
|
|
}
|
|
|
|
public static async Task ImportDocumentAsync(ISemanticTextMemory memory, string filename)
|
|
{
|
|
var asm = Assembly.GetExecutingAssembly();
|
|
var currentDirectory = Path.GetDirectoryName(asm.Location);
|
|
if (currentDirectory is null)
|
|
{
|
|
throw new DirectoryNotFoundException($"Could not find directory for assembly '{asm}'.");
|
|
}
|
|
|
|
var filePath = Path.Combine(currentDirectory, filename);
|
|
using var pdfDocument = PdfDocument.Open(File.OpenRead(filePath));
|
|
var pages = pdfDocument.GetPages();
|
|
foreach (var page in pages)
|
|
{
|
|
try
|
|
{
|
|
var text = ContentOrderTextExtractor.GetText(page);
|
|
var descr = text.Take(100);
|
|
await memory.SaveInformationAsync(
|
|
collection: "waf",
|
|
text: text,
|
|
id: $"{Guid.NewGuid()}",
|
|
description: $"Document: {descr}").ConfigureAwait(false);
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
Console.WriteLine(ex.Message);
|
|
}
|
|
}
|
|
}
|
|
} |