Example of llama cloud integration

alkampfergit · alkampfergit · commit fe5432d80793 · 2025-03-08T11:36:25.000+01:00
diff --git a/src/KernelMemory.Extensions.ConsoleTest/Program.cs b/src/KernelMemory.Extensions.ConsoleTest/Program.cs
@@ -22,6 +22,7 @@ static async Task Main(string[] args)
         services.AddSingleton<CustomSearchPipelineBase>();
         services.AddSingleton<AnthropicSample>();
         services.AddSingleton<ContextualRetrievalSample>();
+        services.AddSingleton<CustomParsersSample>();
         services.AddHttpClient();
 
         var serviceProvider = services.BuildServiceProvider();
@@ -35,6 +36,7 @@ static async Task Main(string[] args)
             ["Custom Search pipeline (Basic)"] = typeof(CustomSearchPipelineBase),
             ["Anthropic"] = typeof(AnthropicSample),
             ["Contextual retrieval"] = typeof(ContextualRetrievalSample),
+            ["Advanced Parsing"] = typeof(CustomParsersSample),
             ["Exit"] = null
         };
 
@@ -64,6 +66,7 @@ static async Task Main(string[] args)
                             @"c:\temp\advancedapisecurity.pdf", 
                             @"S:\OneDrive\B19553_11.pdf",
                             @"c:\temp\blackhatpython.pdf",
+                            @"c:\temp\manualeDreame.pdf",
                             @"/Users/gianmariaricci/Downloads/llchaindata/blackhatpython.pdf"]));
                     await sampleInstance1.RunSample(book);
                 }
diff --git a/src/KernelMemory.Extensions.ConsoleTest/Samples/CustomParsersSample.cs b/src/KernelMemory.Extensions.ConsoleTest/Samples/CustomParsersSample.cs
@@ -0,0 +1,164 @@
+﻿using KernelMemory.Extensions.ConsoleTest.Helper;
+using Microsoft.Extensions.DependencyInjection;
+using Microsoft.Extensions.Http.Resilience;
+using Microsoft.Extensions.Logging;
+using Microsoft.KernelMemory;
+using Microsoft.KernelMemory.Context;
+using Microsoft.KernelMemory.DataFormats;
+using Microsoft.KernelMemory.DocumentStorage.DevTools;
+using Microsoft.KernelMemory.FileSystem.DevTools;
+using Microsoft.KernelMemory.Handlers;
+using Microsoft.KernelMemory.MemoryStorage.DevTools;
+
+namespace SemanticMemory.Samples;
+
+internal class CustomParsersSample : ISample
+{
+    public async Task RunSample(string fileToParse)
+    {
+        var services = new ServiceCollection();
+
+        services.AddLogging(l => l
+            .SetMinimumLevel(LogLevel.Trace)
+            .AddConsole()
+            .AddDebug()
+        );
+
+        var builder = CreateBasicKernelMemoryBuilder(services);
+
+        var serviceProvider = services.BuildServiceProvider();
+        var parserClient = serviceProvider.GetRequiredService<LLamaCloudParserClient>();
+
+        //This is not so goot, but it seems that when we build the ServerlessMemory object
+        //it cannot access the http services registered in the service collection
+        builder.Services.AddSingleton(parserClient);
+
+        var kernelMemory = builder.Build<MemoryServerless>();
+
+        var orchestrator = builder.GetOrchestrator();
+
+        var decoders = serviceProvider.GetServices<IContentDecoder>();
+
+        // Add pipeline handlers
+        Console.WriteLine("* Defining pipeline handlers...");
+
+        TextExtractionHandler textExtraction = new("extract", orchestrator, decoders);
+        await orchestrator.AddHandlerAsync(textExtraction);
+
+        TextPartitioningHandler textPartitioning = new("partition", orchestrator);
+        await orchestrator.AddHandlerAsync(textPartitioning);
+
+        GenerateEmbeddingsHandler textEmbedding = new("gen_embeddings", orchestrator);
+        await orchestrator.AddHandlerAsync(textEmbedding);
+
+        SaveRecordsHandler saveRecords = new("save_records", orchestrator);
+        await orchestrator.AddHandlerAsync(saveRecords);
+
+        var fileName = Path.GetFileName(fileToParse);
+
+        var contextProvider = serviceProvider.GetRequiredService<IContextProvider>();
+
+        // now we are going to index document, llamacloud can use caching so we can avoid asking for file.
+        var pipelineBuilder = orchestrator
+            .PrepareNewDocumentUpload(
+                index: "llamacloud",
+                documentId: fileName,
+                new TagCollection { { "example", "books" } })
+            .AddUploadFile(fileName, fileName, fileToParse)
+            .Then("extract")
+            .Then("partition")
+            .Then("gen_embeddings")
+            .Then("save_records");
+
+        contextProvider.AddLLamaCloudParserOptions(fileName, "This is a manual for Dreame vacuum cleaner, I need you to extract a series of sections that can be useful for an helpdesk to answer user questions. You will create sections where each sections contains a question and an answer taken from the text.");
+
+        var pipeline = pipelineBuilder.Build();
+        await orchestrator.RunPipelineAsync(pipeline);
+
+        // now ask a question to the user continuously until the user ask an empty question
+        string question;
+        do
+        {
+            Console.WriteLine("Ask a question to the kernel memory:");
+            question = Console.ReadLine();
+            if (!string.IsNullOrWhiteSpace(question))
+            {
+                var response = await kernelMemory.AskAsync(question);
+                Console.WriteLine(response.Result);
+            }
+        } while (!string.IsNullOrWhiteSpace(question));
+    }
+
+    private static IKernelMemoryBuilder CreateBasicKernelMemoryBuilder(
+        ServiceCollection services)
+    {
+        // we need a series of services to use Kernel Memory, the first one is
+        // an embedding service that will be used to create dense vector for
+        // pieces of test. We can use standard ADA embedding service
+        var embeddingConfig = new AzureOpenAIConfig
+        {
+            APIKey = Dotenv.Get("OPENAI_API_KEY"),
+            Deployment = "text-embedding-ada-002",
+            Endpoint = Dotenv.Get("AZURE_ENDPOINT"),
+            APIType = AzureOpenAIConfig.APITypes.EmbeddingGeneration,
+            Auth = AzureOpenAIConfig.AuthTypes.APIKey
+        };
+
+        // Now kenel memory needs the LLM data to be able to pass question
+        // and retreived segments to the model. We can Use GPT35
+        var chatConfig = new AzureOpenAIConfig
+        {
+            APIKey = Dotenv.Get("OPENAI_API_KEY"),
+            Deployment = Dotenv.Get("KERNEL_MEMORY_DEPLOYMENT_NAME"),
+            Endpoint = Dotenv.Get("AZURE_ENDPOINT"),
+            APIType = AzureOpenAIConfig.APITypes.ChatCompletion,
+            Auth = AzureOpenAIConfig.AuthTypes.APIKey,
+            MaxTokenTotal = 4096
+        };
+
+        var kernelMemoryBuilder = new KernelMemoryBuilder(services)
+            .WithAzureOpenAITextGeneration(chatConfig)
+            .WithAzureOpenAITextEmbeddingGeneration(embeddingConfig);
+
+        kernelMemoryBuilder
+           .WithSimpleFileStorage(new SimpleFileStorageConfig()
+           {
+               Directory = "c:\\temp\\kmcps\\storage",
+               StorageType = FileSystemTypes.Disk
+           })
+           .WithSimpleVectorDb(new SimpleVectorDbConfig()
+           {
+               Directory = "c:\\temp\\kmcps\\vectorstorage",
+               StorageType = FileSystemTypes.Disk
+           });
+
+        kernelMemoryBuilder.WithContentDecoder<LLamaCloudParserDocumentDecoder>();
+
+        var llamaApiKey = Environment.GetEnvironmentVariable("LLAMA_API_KEY");
+        if (string.IsNullOrEmpty(llamaApiKey))
+        {
+            throw new Exception("LLAMA_API_KEY is not set");
+        }
+
+        //Create llamaparser client
+        services.AddSingleton(new CloudParserConfiguration
+        {
+            ApiKey = llamaApiKey,
+        });
+
+        services.AddHttpClient<LLamaCloudParserClient>()
+            .AddStandardResilienceHandler(options =>
+            {
+                // Configure standard resilience options here
+                options.TotalRequestTimeout = new HttpTimeoutStrategyOptions()
+                {
+                    Timeout = TimeSpan.FromMinutes(10),
+                };
+            });
+
+        services.AddSingleton(sp => sp.GetRequiredService<ILoggerFactory>().CreateLogger<LLamaCloudParserClient>());
+
+        services.AddSingleton<IKernelMemoryBuilder>(kernelMemoryBuilder);
+        return kernelMemoryBuilder;
+    }
+}
diff --git a/src/KernelMemory.Extensions/llamaindex/LLamaCloudParserClient.cs b/src/KernelMemory.Extensions/llamaindex/LLamaCloudParserClient.cs
@@ -117,8 +117,8 @@ public async Task<bool> WaitForJobSuccessAsync(string jobId, TimeSpan timeout)
 
 public class CloudParserConfiguration
 {
-    public string? ApiKey { get; internal set; }
-    public string? BaseUrl { get; internal set; } = "https://api.cloud.llamaindex.ai";
+    public string? ApiKey { get; set; }
+    public string? BaseUrl { get; set; } = "https://api.cloud.llamaindex.ai";
 }
 
 /// <summary>
diff --git a/src/KernelMemory.Extensions/llamaindex/LLamaCloudParserDocumentDecoder.cs b/src/KernelMemory.Extensions/llamaindex/LLamaCloudParserDocumentDecoder.cs
@@ -0,0 +1,152 @@
+using Microsoft.Extensions.Logging;
+using Microsoft.KernelMemory.Context;
+using Microsoft.KernelMemory.DataFormats;
+using Microsoft.KernelMemory.Diagnostics;
+using Microsoft.KernelMemory.Pipeline;
+using System;
+using System.IO;
+using System.Threading;
+using System.Threading.Tasks;
+
+public class LLamaCloudParserDocumentDecoder : IContentDecoder
+{
+    private readonly ILogger<LLamaCloudParserDocumentDecoder> _log;
+    private readonly LLamaCloudParserClient _client;
+    private readonly IContextProvider _contextProvider;
+
+    public LLamaCloudParserDocumentDecoder(
+        LLamaCloudParserClient client,
+        IContextProvider contextProvider,
+        ILoggerFactory? loggerFactory = null)
+    {
+        _log = (loggerFactory ?? DefaultLogger.Factory).CreateLogger<LLamaCloudParserDocumentDecoder>();
+        _client = client;
+        _contextProvider = contextProvider;
+    }
+
+    /// <inheritdoc />
+    public bool SupportsMimeType(string mimeType)
+    {
+        //Here we can add more mime types
+        return mimeType != null
+            && (
+                mimeType.StartsWith(MimeTypes.Pdf, StringComparison.OrdinalIgnoreCase)
+                || mimeType.StartsWith(MimeTypes.OpenDocumentText, StringComparison.OrdinalIgnoreCase)
+            );
+    }
+
+    /// <inheritdoc />
+    public Task<FileContent> DecodeAsync(string filename, CancellationToken cancellationToken = default)
+    {
+        using var stream = File.OpenRead(filename);
+        return this.DecodeAsync(stream, filename, cancellationToken);
+    }
+
+    /// <inheritdoc />
+    public Task<FileContent> DecodeAsync(BinaryData data, CancellationToken cancellationToken = default)
+    {
+        using var stream = data.ToStream();
+        return DecodeAsync(stream, null, cancellationToken);
+    }
+
+    /// <inheritdoc />
+    public Task<FileContent> DecodeAsync(Stream data, CancellationToken cancellationToken = default)
+    {
+        return DecodeAsync(data, null, cancellationToken);
+    }
+
+    public async Task<FileContent> DecodeAsync(Stream data, string? fileName, CancellationToken cancellationToken = default)
+    {
+        _log.LogDebug("Extracting structured text with llamacloud from file");
+
+        //retrieve filename and parsing instructions from context
+        var context = _contextProvider.GetContext();
+        string parsingInstructions = string.Empty;
+        if (context.Arguments.TryGetValue(LLamaCloudParserDocumentDecoderExtensions.FileNameKey, out var fileNameContext))
+        {
+            fileName = fileNameContext as string ?? string.Empty;
+        }
+        if (context.Arguments.TryGetValue(LLamaCloudParserDocumentDecoderExtensions.ParsingInstructionsKey, out var parsingInstructionsContext))
+        {
+            parsingInstructions = parsingInstructionsContext as string ?? string.Empty;
+        }
+
+        // ok we need a way to find the correct instruction for the file, so we can use a different configuration
+        // for each file that we are going to parse.
+        var parameters = new UploadParameters();
+
+        //file name must not be null
+        if (string.IsNullOrEmpty(fileName))
+        {
+            throw new Exception("LLAMA Cloud error: file name is missing");
+        }
+
+        //ok we need a temporary file name that we need to use to upload the file, we need a seekable stream
+        var tempFileName = Path.Combine(Path.GetTempPath(), Guid.NewGuid().ToString() + "_" + fileName);
+        using (var writeFileStream = File.Create(tempFileName))
+        {
+            await data.CopyToAsync(writeFileStream, cancellationToken);
+        }
+
+        parameters.WithParsingInstructions(parsingInstructions);
+
+        var response = await PerformCall(fileName, tempFileName, parameters);
+
+        if (response != null && response.ErrorCode != null)
+        {
+            throw new Exception($"LLAMA Cloud error: {response.ErrorCode} - {response.ErrorMessage}");
+        }
+
+        if (response == null)
+        {
+            throw new Exception("LLAMA Cloud error: no response");
+        }
+
+        var jobId = response.Id.ToString();
+
+        //now wait for the job to be completed
+        var jobResponse = await _client.WaitForJobSuccessAsync(jobId, TimeSpan.FromMinutes(5));
+
+        if (!jobResponse)
+        {
+            throw new Exception("LLAMA Cloud error: job not completed");
+        }
+
+        // ok now the job is completed, we can get the markdown
+        var markdown = await _client.GetJobRawMarkdownAsync(jobId);
+
+        var result = new FileContent(MimeTypes.MarkDown);
+        result.Sections.Add(new FileSection(1, markdown, false));
+
+        return result;
+    }
+
+    private async Task<UploadResponse> PerformCall(
+        string fileName,
+        string physicalTempFileName,
+        UploadParameters parameters)
+    {
+        try
+        {
+            await using var tempFileStream = File.OpenRead(physicalTempFileName);
+            var uploadResponse = await _client.UploadAsync(tempFileStream, fileName, parameters);
+            return uploadResponse;
+        }
+        finally
+        {
+            File.Delete(physicalTempFileName);
+        }
+    }
+}
+
+public static class LLamaCloudParserDocumentDecoderExtensions
+{
+    internal const string FileNameKey = "llamacloud.filename";
+    internal const string ParsingInstructionsKey = "llamacloud.parsing_instructions";
+
+    public static void AddLLamaCloudParserOptions(this IContextProvider contextProvider, string filename, string parseInstructions)
+    {
+        contextProvider.SetContextArg(FileNameKey, filename);
+        contextProvider.SetContextArg(ParsingInstructionsKey, parseInstructions);
+    }
+}

Original file line number	Diff line number	Diff line change
`@@ -117,8 +117,8 @@ public async Task<bool> WaitForJobSuccessAsync(string jobId, TimeSpan timeout)`
`117`	`117`
`118`	`118`	`public class CloudParserConfiguration`
`119`	`119`	`{`
`120`		`- public string? ApiKey { get; internal set; }`
`121`		`- public string? BaseUrl { get; internal set; } = "https://api.cloud.llamaindex.ai";`
	`120`	`+ public string? ApiKey { get; set; }`
	`121`	`+ public string? BaseUrl { get; set; } = "https://api.cloud.llamaindex.ai";`
`122`	`122`	`}`
`123`	`123`
`124`	`124`	`/// <summary>`