From ec1d99d71e26ec44162b144397ec116f91deb034 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 26 Apr 2026 06:12:53 +0000 Subject: [PATCH 1/3] Extend IngestionPipeline: remove reader from ctor, add ProcessAsync(IngestionDocument), update file-system methods to take reader param - Remove IngestionDocumentReader from constructor and field - Add new ProcessAsync(IngestionDocument) overload returning Task - Add IngestionDocumentReader reader parameter to file-system ProcessAsync methods - Add ProcessDocument activity constant to DiagnosticsConstants - Replace var with explicit types in pipeline and tests - Update all DataIngestor.cs template/snapshot files - Add CanProcessDocumentWithoutReader test - Update README.md with pipeline usage examples Agent-Logs-Url: https://github.com/dotnet/extensions/sessions/54f5e258-8414-40a0-b8b5-953677d1cce2 Co-authored-by: adamsitnik <6011991+adamsitnik@users.noreply.github.com> --- .../DiagnosticsConstants.cs | 5 ++ .../IngestionPipeline.cs | 41 +++++++--- .../README.md | 32 ++++++++ .../Services/Ingestion/DataIngestor.cs | 4 +- .../IngestionPipelineTests.cs | 80 +++++++++++++++---- .../Services/Ingestion/DataIngestor.cs | 4 +- .../Services/Ingestion/DataIngestor.cs | 4 +- .../Services/Ingestion/DataIngestor.cs | 4 +- .../Services/Ingestion/DataIngestor.cs | 4 +- .../Services/Ingestion/DataIngestor.cs | 4 +- 10 files changed, 141 insertions(+), 41 deletions(-) diff --git a/src/Libraries/Microsoft.Extensions.DataIngestion/DiagnosticsConstants.cs b/src/Libraries/Microsoft.Extensions.DataIngestion/DiagnosticsConstants.cs index 4251bef6ae3..cbf9bf5aa4e 100644 --- a/src/Libraries/Microsoft.Extensions.DataIngestion/DiagnosticsConstants.cs +++ b/src/Libraries/Microsoft.Extensions.DataIngestion/DiagnosticsConstants.cs @@ -32,4 +32,9 @@ internal static class ProcessFile internal const string ActivityName = "ProcessFile"; internal const string FilePathTagName = "rag.file.path"; } + + internal static class ProcessDocument + { + internal const string ActivityName = "ProcessDocument"; + } } diff --git a/src/Libraries/Microsoft.Extensions.DataIngestion/IngestionPipeline.cs b/src/Libraries/Microsoft.Extensions.DataIngestion/IngestionPipeline.cs index 1eeb94058ee..fa2b3db7a8b 100644 --- a/src/Libraries/Microsoft.Extensions.DataIngestion/IngestionPipeline.cs +++ b/src/Libraries/Microsoft.Extensions.DataIngestion/IngestionPipeline.cs @@ -24,7 +24,6 @@ namespace Microsoft.Extensions.DataIngestion; /// The type of the chunk content. public sealed class IngestionPipeline : IDisposable { - private readonly IngestionDocumentReader _reader; private readonly IngestionChunker _chunker; private readonly IngestionChunkWriter _writer; private readonly ActivitySource _activitySource; @@ -33,19 +32,16 @@ public sealed class IngestionPipeline : IDisposable /// /// Initializes a new instance of the class. /// - /// The reader for ingestion documents. /// The chunker to split documents into chunks. /// The writer for processing chunks. /// The options for the ingestion pipeline. /// The logger factory for creating loggers. public IngestionPipeline( - IngestionDocumentReader reader, IngestionChunker chunker, IngestionChunkWriter writer, IngestionPipelineOptions? options = default, ILoggerFactory? loggerFactory = default) { - _reader = Throw.IfNull(reader); _chunker = Throw.IfNull(chunker); _writer = Throw.IfNull(writer); _activitySource = new((options ?? new()).ActivitySourceName); @@ -69,17 +65,36 @@ public void Dispose() /// public IList> ChunkProcessors { get; } = []; + /// + /// Processes the specified document through the pipeline. + /// + /// The document to process. + /// The cancellation token for the operation. + /// A task representing the asynchronous operation, returning the processed document. + public async Task ProcessAsync(IngestionDocument document, CancellationToken cancellationToken = default) + { + Throw.IfNull(document); + + using (Activity? activity = _activitySource.StartActivity(ProcessDocument.ActivityName)) + { + activity?.SetTag(ProcessSource.DocumentIdTagName, document.Identifier); + return await IngestAsync(document, activity, cancellationToken).ConfigureAwait(false); + } + } + /// /// Processes all files in the specified directory that match the given search pattern and option. /// + /// The reader for ingestion documents. /// The directory to process. /// The search pattern for file selection. /// The search option for directory traversal. /// The cancellation token for the operation. /// A task representing the asynchronous operation. - public async IAsyncEnumerable ProcessAsync(DirectoryInfo directory, string searchPattern = "*.*", + public async IAsyncEnumerable ProcessAsync(IngestionDocumentReader reader, DirectoryInfo directory, string searchPattern = "*.*", SearchOption searchOption = SearchOption.TopDirectoryOnly, [EnumeratorCancellation] CancellationToken cancellationToken = default) { + Throw.IfNull(reader); Throw.IfNull(directory); Throw.IfNullOrEmpty(searchPattern); Throw.IfOutOfRange((int)searchOption, (int)SearchOption.TopDirectoryOnly, (int)SearchOption.AllDirectories); @@ -91,7 +106,7 @@ public async IAsyncEnumerable ProcessAsync(DirectoryInfo direct .SetTag(ProcessDirectory.SearchOptionTagName, searchOption.ToString()); _logger?.ProcessingDirectory(directory.FullName, searchPattern, searchOption); - await foreach (var ingestionResult in ProcessAsync(directory.EnumerateFiles(searchPattern, searchOption), rootActivity, cancellationToken).ConfigureAwait(false)) + await foreach (IngestionResult ingestionResult in ProcessAsync(reader, directory.EnumerateFiles(searchPattern, searchOption), rootActivity, cancellationToken).ConfigureAwait(false)) { yield return ingestionResult; } @@ -101,16 +116,18 @@ public async IAsyncEnumerable ProcessAsync(DirectoryInfo direct /// /// Processes the specified files. /// + /// The reader for ingestion documents. /// The collection of files to process. /// The cancellation token for the operation. /// A task representing the asynchronous operation. - public async IAsyncEnumerable ProcessAsync(IEnumerable files, [EnumeratorCancellation] CancellationToken cancellationToken = default) + public async IAsyncEnumerable ProcessAsync(IngestionDocumentReader reader, IEnumerable files, [EnumeratorCancellation] CancellationToken cancellationToken = default) { + Throw.IfNull(reader); Throw.IfNull(files); using (Activity? rootActivity = _activitySource.StartActivity(ProcessFiles.ActivityName)) { - await foreach (var ingestionResult in ProcessAsync(files, rootActivity, cancellationToken).ConfigureAwait(false)) + await foreach (IngestionResult ingestionResult in ProcessAsync(reader, files, rootActivity, cancellationToken).ConfigureAwait(false)) { yield return ingestionResult; } @@ -125,7 +142,7 @@ private static void TraceException(Activity? activity, Exception ex) .SetStatus(ActivityStatusCode.Error, ex.Message); } - private async IAsyncEnumerable ProcessAsync(IEnumerable files, Activity? rootActivity, + private async IAsyncEnumerable ProcessAsync(IngestionDocumentReader reader, IEnumerable files, Activity? rootActivity, [EnumeratorCancellation] CancellationToken cancellationToken) { #if NET @@ -143,13 +160,13 @@ private async IAsyncEnumerable ProcessAsync(IEnumerable IngestAsync(IngestionDocument document, Ac } IAsyncEnumerable> chunks = _chunker.ProcessAsync(document, cancellationToken); - foreach (var processor in ChunkProcessors) + foreach (IngestionChunkProcessor processor in ChunkProcessors) { chunks = processor.ProcessAsync(chunks, cancellationToken); } diff --git a/src/Libraries/Microsoft.Extensions.DataIngestion/README.md b/src/Libraries/Microsoft.Extensions.DataIngestion/README.md index 030ac8da43b..5aabf7b3721 100644 --- a/src/Libraries/Microsoft.Extensions.DataIngestion/README.md +++ b/src/Libraries/Microsoft.Extensions.DataIngestion/README.md @@ -44,6 +44,38 @@ using VectorStoreWriter> writer = new await writer.WriteAsync(chunks); ``` +## Using the IngestionPipeline + +### Processing documents from the file system + +To process documents from the file system, create an `IngestionPipeline` and pass a reader to the `ProcessAsync` method: + +```csharp +using IngestionPipeline pipeline = new(chunker, writer); + +IngestionDocumentReader reader = new MarkdownReader(); +await foreach (IngestionResult result in pipeline.ProcessAsync(reader, directory, "*.md")) +{ + Console.WriteLine($"Processed '{result.DocumentId}'. Succeeded: {result.Succeeded}"); +} +``` + +### Processing documents without a reader + +The `IngestionPipeline` can also process documents that are already in memory, without requiring a reader: + +```csharp +using IngestionPipeline pipeline = new(chunker, writer); + +IngestionDocument document = new("my-document-id"); +IngestionDocumentSection section = new("Main"); +section.Elements.Add(new IngestionDocumentHeader("Introduction", level: 1)); +section.Elements.Add(new IngestionDocumentParagraph("This is the content of my document.")); +document.Sections.Add(section); + +IngestionDocument processedDocument = await pipeline.ProcessAsync(document); +``` + ### Custom metadata To store custom metadata alongside each chunk, create a type derived from `IngestionChunkVectorRecord` with additional properties, and a `VectorStoreWriter` subclass that overrides `SetMetadata`: diff --git a/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Services/Ingestion/DataIngestor.cs b/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Services/Ingestion/DataIngestor.cs index 76168b6e632..6d8fa4e90a2 100644 --- a/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Services/Ingestion/DataIngestor.cs +++ b/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Services/Ingestion/DataIngestor.cs @@ -19,13 +19,13 @@ public async Task IngestDataAsync(DirectoryInfo directory, string searchPattern) IncrementalIngestion = false, }); + DocumentReader reader = new(directory); using var pipeline = new IngestionPipeline( - reader: new DocumentReader(directory), chunker: new SemanticSimilarityChunker(embeddingGenerator, new(TiktokenTokenizer.CreateForModel("gpt-4o"))), writer: writer, loggerFactory: loggerFactory); - await foreach (var result in pipeline.ProcessAsync(directory, searchPattern)) + await foreach (var result in pipeline.ProcessAsync(reader, directory, searchPattern)) { logger.LogInformation("Completed processing '{id}'. Succeeded: '{succeeded}'.", result.DocumentId, result.Succeeded); } diff --git a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/IngestionPipelineTests.cs b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/IngestionPipelineTests.cs index e865ff39d9b..5eef3fd4efc 100644 --- a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/IngestionPipelineTests.cs +++ b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/IngestionPipelineTests.cs @@ -86,19 +86,20 @@ public async Task CanProcessDocuments() TestEmbeddingGenerator embeddingGenerator = new(); using InMemoryVectorStore testVectorStore = new(new() { EmbeddingGenerator = embeddingGenerator }); - var collection = testVectorStore.GetIngestionRecordCollection, string>( + VectorStoreCollection> collection = testVectorStore.GetIngestionRecordCollection, string>( "chunks", TestEmbeddingGenerator.DimensionCount); using VectorStoreWriter> vectorStoreWriter = new(collection); - using IngestionPipeline pipeline = new(CreateReader(), CreateChunker(), vectorStoreWriter); - List ingestionResults = await pipeline.ProcessAsync(_sampleFiles).ToListAsync(); + IngestionDocumentReader reader = CreateReader(); + using IngestionPipeline pipeline = new(CreateChunker(), vectorStoreWriter); + List ingestionResults = await pipeline.ProcessAsync(reader, _sampleFiles).ToListAsync(); Assert.Equal(_sampleFiles.Count, ingestionResults.Count); AssertAllIngestionsSucceeded(ingestionResults); Assert.True(embeddingGenerator.WasCalled, "Embedding generator should have been called."); - var retrieved = await vectorStoreWriter.VectorStoreCollection + List> retrieved = await vectorStoreWriter.VectorStoreCollection .GetAsync(record => _sampleFiles.Any(info => info.FullName == record.DocumentId), top: 1000) .ToListAsync(); @@ -122,20 +123,21 @@ public async Task CanProcessDocumentsInDirectory() TestEmbeddingGenerator embeddingGenerator = new(); using InMemoryVectorStore testVectorStore = new(new() { EmbeddingGenerator = embeddingGenerator }); - var collection = testVectorStore.GetIngestionRecordCollection, string>( + VectorStoreCollection> collection = testVectorStore.GetIngestionRecordCollection, string>( "chunks-dir", TestEmbeddingGenerator.DimensionCount); using VectorStoreWriter> vectorStoreWriter = new(collection); - using IngestionPipeline pipeline = new(CreateReader(), CreateChunker(), vectorStoreWriter); + IngestionDocumentReader reader = CreateReader(); + using IngestionPipeline pipeline = new(CreateChunker(), vectorStoreWriter); DirectoryInfo directory = new("TestFiles"); - List ingestionResults = await pipeline.ProcessAsync(directory, "*.md").ToListAsync(); + List ingestionResults = await pipeline.ProcessAsync(reader, directory, "*.md").ToListAsync(); Assert.Equal(directory.EnumerateFiles("*.md").Count(), ingestionResults.Count); AssertAllIngestionsSucceeded(ingestionResults); Assert.True(embeddingGenerator.WasCalled, "Embedding generator should have been called."); - var retrieved = await vectorStoreWriter.VectorStoreCollection + List> retrieved = await vectorStoreWriter.VectorStoreCollection .GetAsync(record => record.DocumentId.StartsWith(directory.FullName), top: 1000) .ToListAsync(); @@ -159,16 +161,17 @@ public async Task ChunksCanBeMoreThanJustText() TestEmbeddingGenerator embeddingGenerator = new(); using InMemoryVectorStore testVectorStore = new(new() { EmbeddingGenerator = embeddingGenerator }); - var collection = testVectorStore.GetIngestionRecordCollection, DataContent>( + VectorStoreCollection> collection = testVectorStore.GetIngestionRecordCollection, DataContent>( "chunks-img", TestEmbeddingGenerator.DimensionCount); using VectorStoreWriter> vectorStoreWriter = new(collection); - using IngestionPipeline pipeline = new(CreateReader(), new ImageChunker(), vectorStoreWriter); + IngestionDocumentReader reader = CreateReader(); + using IngestionPipeline pipeline = new(new ImageChunker(), vectorStoreWriter); Assert.False(embeddingGenerator.WasCalled); - var ingestionResults = await pipeline.ProcessAsync(_sampleFiles).ToListAsync(); + List ingestionResults = await pipeline.ProcessAsync(reader, _sampleFiles).ToListAsync(); AssertAllIngestionsSucceeded(ingestionResults); - var retrieved = await vectorStoreWriter.VectorStoreCollection + List> retrieved = await vectorStoreWriter.VectorStoreCollection .GetAsync(record => record.DocumentId.EndsWith(_withImage.Name), top: 100) .ToListAsync(); @@ -211,14 +214,14 @@ public async Task SingleFailureDoesNotTearDownEntirePipeline() TestEmbeddingGenerator embeddingGenerator = new(); using InMemoryVectorStore testVectorStore = new(new() { EmbeddingGenerator = embeddingGenerator }); - var collection = testVectorStore.GetIngestionRecordCollection, string>( + VectorStoreCollection> collection = testVectorStore.GetIngestionRecordCollection, string>( "chunks-fail", TestEmbeddingGenerator.DimensionCount); using VectorStoreWriter> vectorStoreWriter = new(collection); - using IngestionPipeline pipeline = new(failingForFirstReader, CreateChunker(), vectorStoreWriter); + using IngestionPipeline pipeline = new(CreateChunker(), vectorStoreWriter); - await Verify(pipeline.ProcessAsync(_sampleFiles)); - await Verify(pipeline.ProcessAsync(_sampleDirectory)); + await Verify(pipeline.ProcessAsync(failingForFirstReader, _sampleFiles)); + await Verify(pipeline.ProcessAsync(failingForFirstReader, _sampleDirectory)); async Task Verify(IAsyncEnumerable results) { @@ -235,6 +238,49 @@ async Task Verify(IAsyncEnumerable results) } } + [Fact] + public async Task CanProcessDocumentWithoutReader() + { + List activities = []; + using TracerProvider tracerProvider = CreateTraceProvider(activities); + + TestEmbeddingGenerator embeddingGenerator = new(); + using InMemoryVectorStore testVectorStore = new(new() { EmbeddingGenerator = embeddingGenerator }); + + VectorStoreCollection> collection = testVectorStore.GetIngestionRecordCollection, string>( + "chunks-doc", TestEmbeddingGenerator.DimensionCount); + using VectorStoreWriter> vectorStoreWriter = new(collection); + + using IngestionPipeline pipeline = new(CreateChunker(), vectorStoreWriter); + + IngestionDocument document = new("test-document-id"); + IngestionDocumentSection section = new("Main"); + section.Elements.Add(new IngestionDocumentHeader("Introduction", level: 1)); + section.Elements.Add(new IngestionDocumentParagraph("This is the content of a manually created document.")); + document.Sections.Add(section); + + IngestionDocument processedDocument = await pipeline.ProcessAsync(document); + + Assert.NotNull(processedDocument); + Assert.Equal("test-document-id", processedDocument.Identifier); + Assert.True(embeddingGenerator.WasCalled, "Embedding generator should have been called."); + + List> retrieved = await vectorStoreWriter.VectorStoreCollection + .GetAsync(record => record.DocumentId == "test-document-id", top: 100) + .ToListAsync(); + + Assert.NotEmpty(retrieved); + for (int i = 0; i < retrieved.Count; i++) + { + Assert.NotEqual(Guid.Empty, retrieved[i].Key); + Assert.NotEmpty(retrieved[i].Content!); + Assert.Equal("test-document-id", retrieved[i].DocumentId); + } + + Assert.NotEmpty(activities); + Assert.Contains(activities, a => a.OperationName == "ProcessDocument"); + } + private static IngestionDocumentReader CreateReader() => new MarkdownReader(); private static IngestionChunker CreateChunker() => new HeaderChunker(new(TiktokenTokenizer.CreateForModel("gpt-4"))); @@ -268,7 +314,7 @@ private static void AssertErrorActivities(List activities, int expecte Assert.NotEmpty(activities); Assert.All(activities, a => Assert.Equal("Experimental.Microsoft.Extensions.DataIngestion", a.Source.Name)); - var failed = activities.Where(act => act.Status == ActivityStatusCode.Error).ToList(); + List failed = activities.Where(act => act.Status == ActivityStatusCode.Error).ToList(); Assert.Equal(expectedFailedActivitiesCount, failed.Count); Assert.All(failed, a => Assert.Equal(ExpectedException.ExceptionMessage, a.StatusDescription)); } diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A.verified/aichatweb/aichatweb.Web/Services/Ingestion/DataIngestor.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A.verified/aichatweb/aichatweb.Web/Services/Ingestion/DataIngestor.cs index 61088b1225d..7ed1a84bddf 100644 --- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A.verified/aichatweb/aichatweb.Web/Services/Ingestion/DataIngestor.cs +++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A.verified/aichatweb/aichatweb.Web/Services/Ingestion/DataIngestor.cs @@ -19,13 +19,13 @@ public async Task IngestDataAsync(DirectoryInfo directory, string searchPattern) IncrementalIngestion = false, }); + DocumentReader reader = new(directory); using var pipeline = new IngestionPipeline( - reader: new DocumentReader(directory), chunker: new SemanticSimilarityChunker(embeddingGenerator, new(TiktokenTokenizer.CreateForModel("gpt-4o"))), writer: writer, loggerFactory: loggerFactory); - await foreach (var result in pipeline.ProcessAsync(directory, searchPattern)) + await foreach (var result in pipeline.ProcessAsync(reader, directory, searchPattern)) { logger.LogInformation("Completed processing '{id}'. Succeeded: '{succeeded}'.", result.DocumentId, result.Succeeded); } diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A_aoai_aais.verified/aichatweb/aichatweb.Web/Services/Ingestion/DataIngestor.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A_aoai_aais.verified/aichatweb/aichatweb.Web/Services/Ingestion/DataIngestor.cs index 61088b1225d..7ed1a84bddf 100644 --- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A_aoai_aais.verified/aichatweb/aichatweb.Web/Services/Ingestion/DataIngestor.cs +++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A_aoai_aais.verified/aichatweb/aichatweb.Web/Services/Ingestion/DataIngestor.cs @@ -19,13 +19,13 @@ public async Task IngestDataAsync(DirectoryInfo directory, string searchPattern) IncrementalIngestion = false, }); + DocumentReader reader = new(directory); using var pipeline = new IngestionPipeline( - reader: new DocumentReader(directory), chunker: new SemanticSimilarityChunker(embeddingGenerator, new(TiktokenTokenizer.CreateForModel("gpt-4o"))), writer: writer, loggerFactory: loggerFactory); - await foreach (var result in pipeline.ProcessAsync(directory, searchPattern)) + await foreach (var result in pipeline.ProcessAsync(reader, directory, searchPattern)) { logger.LogInformation("Completed processing '{id}'. Succeeded: '{succeeded}'.", result.DocumentId, result.Succeeded); } diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb._defaults.verified/aichatweb/Services/Ingestion/DataIngestor.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb._defaults.verified/aichatweb/Services/Ingestion/DataIngestor.cs index b4675927d47..13260fd5370 100644 --- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb._defaults.verified/aichatweb/Services/Ingestion/DataIngestor.cs +++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb._defaults.verified/aichatweb/Services/Ingestion/DataIngestor.cs @@ -19,13 +19,13 @@ public async Task IngestDataAsync(DirectoryInfo directory, string searchPattern) IncrementalIngestion = false, }); + DocumentReader reader = new(directory); using var pipeline = new IngestionPipeline( - reader: new DocumentReader(directory), chunker: new SemanticSimilarityChunker(embeddingGenerator, new(TiktokenTokenizer.CreateForModel("gpt-4o"))), writer: writer, loggerFactory: loggerFactory); - await foreach (var result in pipeline.ProcessAsync(directory, searchPattern)) + await foreach (var result in pipeline.ProcessAsync(reader, directory, searchPattern)) { logger.LogInformation("Completed processing '{id}'. Succeeded: '{succeeded}'.", result.DocumentId, result.Succeeded); } diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.o_q.verified/aichatweb/aichatweb.Web/Services/Ingestion/DataIngestor.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.o_q.verified/aichatweb/aichatweb.Web/Services/Ingestion/DataIngestor.cs index 61088b1225d..7ed1a84bddf 100644 --- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.o_q.verified/aichatweb/aichatweb.Web/Services/Ingestion/DataIngestor.cs +++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.o_q.verified/aichatweb/aichatweb.Web/Services/Ingestion/DataIngestor.cs @@ -19,13 +19,13 @@ public async Task IngestDataAsync(DirectoryInfo directory, string searchPattern) IncrementalIngestion = false, }); + DocumentReader reader = new(directory); using var pipeline = new IngestionPipeline( - reader: new DocumentReader(directory), chunker: new SemanticSimilarityChunker(embeddingGenerator, new(TiktokenTokenizer.CreateForModel("gpt-4o"))), writer: writer, loggerFactory: loggerFactory); - await foreach (var result in pipeline.ProcessAsync(directory, searchPattern)) + await foreach (var result in pipeline.ProcessAsync(reader, directory, searchPattern)) { logger.LogInformation("Completed processing '{id}'. Succeeded: '{succeeded}'.", result.DocumentId, result.Succeeded); } diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.oai_aais.verified/aichatweb/Services/Ingestion/DataIngestor.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.oai_aais.verified/aichatweb/Services/Ingestion/DataIngestor.cs index b4675927d47..13260fd5370 100644 --- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.oai_aais.verified/aichatweb/Services/Ingestion/DataIngestor.cs +++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.oai_aais.verified/aichatweb/Services/Ingestion/DataIngestor.cs @@ -19,13 +19,13 @@ public async Task IngestDataAsync(DirectoryInfo directory, string searchPattern) IncrementalIngestion = false, }); + DocumentReader reader = new(directory); using var pipeline = new IngestionPipeline( - reader: new DocumentReader(directory), chunker: new SemanticSimilarityChunker(embeddingGenerator, new(TiktokenTokenizer.CreateForModel("gpt-4o"))), writer: writer, loggerFactory: loggerFactory); - await foreach (var result in pipeline.ProcessAsync(directory, searchPattern)) + await foreach (var result in pipeline.ProcessAsync(reader, directory, searchPattern)) { logger.LogInformation("Completed processing '{id}'. Succeeded: '{succeeded}'.", result.DocumentId, result.Succeeded); } From dd7721750e1d8f59355167cb11fa3ca45a278526 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 26 Apr 2026 06:35:45 +0000 Subject: [PATCH 2/3] Fix IngestionDocumentHeader constructor usage in test and README Agent-Logs-Url: https://github.com/dotnet/extensions/sessions/54f5e258-8414-40a0-b8b5-953677d1cce2 Co-authored-by: adamsitnik <6011991+adamsitnik@users.noreply.github.com> --- src/Libraries/Microsoft.Extensions.DataIngestion/README.md | 2 +- .../IngestionPipelineTests.cs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Libraries/Microsoft.Extensions.DataIngestion/README.md b/src/Libraries/Microsoft.Extensions.DataIngestion/README.md index 5aabf7b3721..0e40e5f25f7 100644 --- a/src/Libraries/Microsoft.Extensions.DataIngestion/README.md +++ b/src/Libraries/Microsoft.Extensions.DataIngestion/README.md @@ -69,7 +69,7 @@ using IngestionPipeline pipeline = new(chunker, writer); IngestionDocument document = new("my-document-id"); IngestionDocumentSection section = new("Main"); -section.Elements.Add(new IngestionDocumentHeader("Introduction", level: 1)); +section.Elements.Add(new IngestionDocumentHeader("# Introduction") { Level = 1 }); section.Elements.Add(new IngestionDocumentParagraph("This is the content of my document.")); document.Sections.Add(section); diff --git a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/IngestionPipelineTests.cs b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/IngestionPipelineTests.cs index 5eef3fd4efc..7df65e09f3b 100644 --- a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/IngestionPipelineTests.cs +++ b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/IngestionPipelineTests.cs @@ -255,7 +255,7 @@ public async Task CanProcessDocumentWithoutReader() IngestionDocument document = new("test-document-id"); IngestionDocumentSection section = new("Main"); - section.Elements.Add(new IngestionDocumentHeader("Introduction", level: 1)); + section.Elements.Add(new IngestionDocumentHeader("# Introduction") { Level = 1 }); section.Elements.Add(new IngestionDocumentParagraph("This is the content of a manually created document.")); document.Sections.Add(section); From 6258a67a0363b18beb2e3c779f32ffdbea639aa7 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 26 Apr 2026 17:03:28 +0000 Subject: [PATCH 3/3] Update OpenTelemetry packages to fix NU1902 vulnerability warnings in template tests Update OpenTelemetry.Exporter.OpenTelemetryProtocol from 1.14.0 to 1.15.3 to fix GHSA-mr8r-92fq-pj8p and GHSA-q834-8qmm-v933 vulnerabilities. Also update companion OTel packages to their latest versions. Agent-Logs-Url: https://github.com/dotnet/extensions/sessions/c6522938-3c86-4211-a016-ca79ad9b591e Co-authored-by: adamsitnik <6011991+adamsitnik@users.noreply.github.com> --- eng/packages/ProjectTemplates.props | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/eng/packages/ProjectTemplates.props b/eng/packages/ProjectTemplates.props index 1c4069cf9c2..74733c3b688 100644 --- a/eng/packages/ProjectTemplates.props +++ b/eng/packages/ProjectTemplates.props @@ -29,11 +29,11 @@ - - - - - + + + + +