diff --git a/eng/packages/ProjectTemplates.props b/eng/packages/ProjectTemplates.props
index 1c4069cf9c2..74733c3b688 100644
--- a/eng/packages/ProjectTemplates.props
+++ b/eng/packages/ProjectTemplates.props
@@ -29,11 +29,11 @@
-
-
-
-
-
+
+
+
+
+
diff --git a/src/Libraries/Microsoft.Extensions.DataIngestion/DiagnosticsConstants.cs b/src/Libraries/Microsoft.Extensions.DataIngestion/DiagnosticsConstants.cs
index 4251bef6ae3..cbf9bf5aa4e 100644
--- a/src/Libraries/Microsoft.Extensions.DataIngestion/DiagnosticsConstants.cs
+++ b/src/Libraries/Microsoft.Extensions.DataIngestion/DiagnosticsConstants.cs
@@ -32,4 +32,9 @@ internal static class ProcessFile
internal const string ActivityName = "ProcessFile";
internal const string FilePathTagName = "rag.file.path";
}
+
+ internal static class ProcessDocument
+ {
+ internal const string ActivityName = "ProcessDocument";
+ }
}
diff --git a/src/Libraries/Microsoft.Extensions.DataIngestion/IngestionPipeline.cs b/src/Libraries/Microsoft.Extensions.DataIngestion/IngestionPipeline.cs
index 1eeb94058ee..fa2b3db7a8b 100644
--- a/src/Libraries/Microsoft.Extensions.DataIngestion/IngestionPipeline.cs
+++ b/src/Libraries/Microsoft.Extensions.DataIngestion/IngestionPipeline.cs
@@ -24,7 +24,6 @@ namespace Microsoft.Extensions.DataIngestion;
/// The type of the chunk content.
public sealed class IngestionPipeline : IDisposable
{
- private readonly IngestionDocumentReader _reader;
private readonly IngestionChunker _chunker;
private readonly IngestionChunkWriter _writer;
private readonly ActivitySource _activitySource;
@@ -33,19 +32,16 @@ public sealed class IngestionPipeline : IDisposable
///
/// Initializes a new instance of the class.
///
- /// The reader for ingestion documents.
/// The chunker to split documents into chunks.
/// The writer for processing chunks.
/// The options for the ingestion pipeline.
/// The logger factory for creating loggers.
public IngestionPipeline(
- IngestionDocumentReader reader,
IngestionChunker chunker,
IngestionChunkWriter writer,
IngestionPipelineOptions? options = default,
ILoggerFactory? loggerFactory = default)
{
- _reader = Throw.IfNull(reader);
_chunker = Throw.IfNull(chunker);
_writer = Throw.IfNull(writer);
_activitySource = new((options ?? new()).ActivitySourceName);
@@ -69,17 +65,36 @@ public void Dispose()
///
public IList> ChunkProcessors { get; } = [];
+ ///
+ /// Processes the specified document through the pipeline.
+ ///
+ /// The document to process.
+ /// The cancellation token for the operation.
+ /// A task representing the asynchronous operation, returning the processed document.
+ public async Task ProcessAsync(IngestionDocument document, CancellationToken cancellationToken = default)
+ {
+ Throw.IfNull(document);
+
+ using (Activity? activity = _activitySource.StartActivity(ProcessDocument.ActivityName))
+ {
+ activity?.SetTag(ProcessSource.DocumentIdTagName, document.Identifier);
+ return await IngestAsync(document, activity, cancellationToken).ConfigureAwait(false);
+ }
+ }
+
///
/// Processes all files in the specified directory that match the given search pattern and option.
///
+ /// The reader for ingestion documents.
/// The directory to process.
/// The search pattern for file selection.
/// The search option for directory traversal.
/// The cancellation token for the operation.
/// A task representing the asynchronous operation.
- public async IAsyncEnumerable ProcessAsync(DirectoryInfo directory, string searchPattern = "*.*",
+ public async IAsyncEnumerable ProcessAsync(IngestionDocumentReader reader, DirectoryInfo directory, string searchPattern = "*.*",
SearchOption searchOption = SearchOption.TopDirectoryOnly, [EnumeratorCancellation] CancellationToken cancellationToken = default)
{
+ Throw.IfNull(reader);
Throw.IfNull(directory);
Throw.IfNullOrEmpty(searchPattern);
Throw.IfOutOfRange((int)searchOption, (int)SearchOption.TopDirectoryOnly, (int)SearchOption.AllDirectories);
@@ -91,7 +106,7 @@ public async IAsyncEnumerable ProcessAsync(DirectoryInfo direct
.SetTag(ProcessDirectory.SearchOptionTagName, searchOption.ToString());
_logger?.ProcessingDirectory(directory.FullName, searchPattern, searchOption);
- await foreach (var ingestionResult in ProcessAsync(directory.EnumerateFiles(searchPattern, searchOption), rootActivity, cancellationToken).ConfigureAwait(false))
+ await foreach (IngestionResult ingestionResult in ProcessAsync(reader, directory.EnumerateFiles(searchPattern, searchOption), rootActivity, cancellationToken).ConfigureAwait(false))
{
yield return ingestionResult;
}
@@ -101,16 +116,18 @@ public async IAsyncEnumerable ProcessAsync(DirectoryInfo direct
///
/// Processes the specified files.
///
+ /// The reader for ingestion documents.
/// The collection of files to process.
/// The cancellation token for the operation.
/// A task representing the asynchronous operation.
- public async IAsyncEnumerable ProcessAsync(IEnumerable files, [EnumeratorCancellation] CancellationToken cancellationToken = default)
+ public async IAsyncEnumerable ProcessAsync(IngestionDocumentReader reader, IEnumerable files, [EnumeratorCancellation] CancellationToken cancellationToken = default)
{
+ Throw.IfNull(reader);
Throw.IfNull(files);
using (Activity? rootActivity = _activitySource.StartActivity(ProcessFiles.ActivityName))
{
- await foreach (var ingestionResult in ProcessAsync(files, rootActivity, cancellationToken).ConfigureAwait(false))
+ await foreach (IngestionResult ingestionResult in ProcessAsync(reader, files, rootActivity, cancellationToken).ConfigureAwait(false))
{
yield return ingestionResult;
}
@@ -125,7 +142,7 @@ private static void TraceException(Activity? activity, Exception ex)
.SetStatus(ActivityStatusCode.Error, ex.Message);
}
- private async IAsyncEnumerable ProcessAsync(IEnumerable files, Activity? rootActivity,
+ private async IAsyncEnumerable ProcessAsync(IngestionDocumentReader reader, IEnumerable files, Activity? rootActivity,
[EnumeratorCancellation] CancellationToken cancellationToken)
{
#if NET
@@ -143,13 +160,13 @@ private async IAsyncEnumerable ProcessAsync(IEnumerable IngestAsync(IngestionDocument document, Ac
}
IAsyncEnumerable> chunks = _chunker.ProcessAsync(document, cancellationToken);
- foreach (var processor in ChunkProcessors)
+ foreach (IngestionChunkProcessor processor in ChunkProcessors)
{
chunks = processor.ProcessAsync(chunks, cancellationToken);
}
diff --git a/src/Libraries/Microsoft.Extensions.DataIngestion/README.md b/src/Libraries/Microsoft.Extensions.DataIngestion/README.md
index 030ac8da43b..0e40e5f25f7 100644
--- a/src/Libraries/Microsoft.Extensions.DataIngestion/README.md
+++ b/src/Libraries/Microsoft.Extensions.DataIngestion/README.md
@@ -44,6 +44,38 @@ using VectorStoreWriter> writer = new
await writer.WriteAsync(chunks);
```
+## Using the IngestionPipeline
+
+### Processing documents from the file system
+
+To process documents from the file system, create an `IngestionPipeline` and pass a reader to the `ProcessAsync` method:
+
+```csharp
+using IngestionPipeline pipeline = new(chunker, writer);
+
+IngestionDocumentReader reader = new MarkdownReader();
+await foreach (IngestionResult result in pipeline.ProcessAsync(reader, directory, "*.md"))
+{
+ Console.WriteLine($"Processed '{result.DocumentId}'. Succeeded: {result.Succeeded}");
+}
+```
+
+### Processing documents without a reader
+
+The `IngestionPipeline` can also process documents that are already in memory, without requiring a reader:
+
+```csharp
+using IngestionPipeline pipeline = new(chunker, writer);
+
+IngestionDocument document = new("my-document-id");
+IngestionDocumentSection section = new("Main");
+section.Elements.Add(new IngestionDocumentHeader("# Introduction") { Level = 1 });
+section.Elements.Add(new IngestionDocumentParagraph("This is the content of my document."));
+document.Sections.Add(section);
+
+IngestionDocument processedDocument = await pipeline.ProcessAsync(document);
+```
+
### Custom metadata
To store custom metadata alongside each chunk, create a type derived from `IngestionChunkVectorRecord` with additional properties, and a `VectorStoreWriter` subclass that overrides `SetMetadata`:
diff --git a/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Services/Ingestion/DataIngestor.cs b/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Services/Ingestion/DataIngestor.cs
index 76168b6e632..6d8fa4e90a2 100644
--- a/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Services/Ingestion/DataIngestor.cs
+++ b/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Services/Ingestion/DataIngestor.cs
@@ -19,13 +19,13 @@ public async Task IngestDataAsync(DirectoryInfo directory, string searchPattern)
IncrementalIngestion = false,
});
+ DocumentReader reader = new(directory);
using var pipeline = new IngestionPipeline(
- reader: new DocumentReader(directory),
chunker: new SemanticSimilarityChunker(embeddingGenerator, new(TiktokenTokenizer.CreateForModel("gpt-4o"))),
writer: writer,
loggerFactory: loggerFactory);
- await foreach (var result in pipeline.ProcessAsync(directory, searchPattern))
+ await foreach (var result in pipeline.ProcessAsync(reader, directory, searchPattern))
{
logger.LogInformation("Completed processing '{id}'. Succeeded: '{succeeded}'.", result.DocumentId, result.Succeeded);
}
diff --git a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/IngestionPipelineTests.cs b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/IngestionPipelineTests.cs
index e865ff39d9b..7df65e09f3b 100644
--- a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/IngestionPipelineTests.cs
+++ b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/IngestionPipelineTests.cs
@@ -86,19 +86,20 @@ public async Task CanProcessDocuments()
TestEmbeddingGenerator embeddingGenerator = new();
using InMemoryVectorStore testVectorStore = new(new() { EmbeddingGenerator = embeddingGenerator });
- var collection = testVectorStore.GetIngestionRecordCollection, string>(
+ VectorStoreCollection> collection = testVectorStore.GetIngestionRecordCollection, string>(
"chunks", TestEmbeddingGenerator.DimensionCount);
using VectorStoreWriter> vectorStoreWriter = new(collection);
- using IngestionPipeline pipeline = new(CreateReader(), CreateChunker(), vectorStoreWriter);
- List ingestionResults = await pipeline.ProcessAsync(_sampleFiles).ToListAsync();
+ IngestionDocumentReader reader = CreateReader();
+ using IngestionPipeline pipeline = new(CreateChunker(), vectorStoreWriter);
+ List ingestionResults = await pipeline.ProcessAsync(reader, _sampleFiles).ToListAsync();
Assert.Equal(_sampleFiles.Count, ingestionResults.Count);
AssertAllIngestionsSucceeded(ingestionResults);
Assert.True(embeddingGenerator.WasCalled, "Embedding generator should have been called.");
- var retrieved = await vectorStoreWriter.VectorStoreCollection
+ List> retrieved = await vectorStoreWriter.VectorStoreCollection
.GetAsync(record => _sampleFiles.Any(info => info.FullName == record.DocumentId), top: 1000)
.ToListAsync();
@@ -122,20 +123,21 @@ public async Task CanProcessDocumentsInDirectory()
TestEmbeddingGenerator embeddingGenerator = new();
using InMemoryVectorStore testVectorStore = new(new() { EmbeddingGenerator = embeddingGenerator });
- var collection = testVectorStore.GetIngestionRecordCollection, string>(
+ VectorStoreCollection> collection = testVectorStore.GetIngestionRecordCollection, string>(
"chunks-dir", TestEmbeddingGenerator.DimensionCount);
using VectorStoreWriter> vectorStoreWriter = new(collection);
- using IngestionPipeline pipeline = new(CreateReader(), CreateChunker(), vectorStoreWriter);
+ IngestionDocumentReader reader = CreateReader();
+ using IngestionPipeline pipeline = new(CreateChunker(), vectorStoreWriter);
DirectoryInfo directory = new("TestFiles");
- List ingestionResults = await pipeline.ProcessAsync(directory, "*.md").ToListAsync();
+ List ingestionResults = await pipeline.ProcessAsync(reader, directory, "*.md").ToListAsync();
Assert.Equal(directory.EnumerateFiles("*.md").Count(), ingestionResults.Count);
AssertAllIngestionsSucceeded(ingestionResults);
Assert.True(embeddingGenerator.WasCalled, "Embedding generator should have been called.");
- var retrieved = await vectorStoreWriter.VectorStoreCollection
+ List> retrieved = await vectorStoreWriter.VectorStoreCollection
.GetAsync(record => record.DocumentId.StartsWith(directory.FullName), top: 1000)
.ToListAsync();
@@ -159,16 +161,17 @@ public async Task ChunksCanBeMoreThanJustText()
TestEmbeddingGenerator embeddingGenerator = new();
using InMemoryVectorStore testVectorStore = new(new() { EmbeddingGenerator = embeddingGenerator });
- var collection = testVectorStore.GetIngestionRecordCollection, DataContent>(
+ VectorStoreCollection> collection = testVectorStore.GetIngestionRecordCollection, DataContent>(
"chunks-img", TestEmbeddingGenerator.DimensionCount);
using VectorStoreWriter> vectorStoreWriter = new(collection);
- using IngestionPipeline pipeline = new(CreateReader(), new ImageChunker(), vectorStoreWriter);
+ IngestionDocumentReader reader = CreateReader();
+ using IngestionPipeline pipeline = new(new ImageChunker(), vectorStoreWriter);
Assert.False(embeddingGenerator.WasCalled);
- var ingestionResults = await pipeline.ProcessAsync(_sampleFiles).ToListAsync();
+ List ingestionResults = await pipeline.ProcessAsync(reader, _sampleFiles).ToListAsync();
AssertAllIngestionsSucceeded(ingestionResults);
- var retrieved = await vectorStoreWriter.VectorStoreCollection
+ List> retrieved = await vectorStoreWriter.VectorStoreCollection
.GetAsync(record => record.DocumentId.EndsWith(_withImage.Name), top: 100)
.ToListAsync();
@@ -211,14 +214,14 @@ public async Task SingleFailureDoesNotTearDownEntirePipeline()
TestEmbeddingGenerator embeddingGenerator = new();
using InMemoryVectorStore testVectorStore = new(new() { EmbeddingGenerator = embeddingGenerator });
- var collection = testVectorStore.GetIngestionRecordCollection, string>(
+ VectorStoreCollection> collection = testVectorStore.GetIngestionRecordCollection, string>(
"chunks-fail", TestEmbeddingGenerator.DimensionCount);
using VectorStoreWriter> vectorStoreWriter = new(collection);
- using IngestionPipeline pipeline = new(failingForFirstReader, CreateChunker(), vectorStoreWriter);
+ using IngestionPipeline pipeline = new(CreateChunker(), vectorStoreWriter);
- await Verify(pipeline.ProcessAsync(_sampleFiles));
- await Verify(pipeline.ProcessAsync(_sampleDirectory));
+ await Verify(pipeline.ProcessAsync(failingForFirstReader, _sampleFiles));
+ await Verify(pipeline.ProcessAsync(failingForFirstReader, _sampleDirectory));
async Task Verify(IAsyncEnumerable results)
{
@@ -235,6 +238,49 @@ async Task Verify(IAsyncEnumerable results)
}
}
+ [Fact]
+ public async Task CanProcessDocumentWithoutReader()
+ {
+ List activities = [];
+ using TracerProvider tracerProvider = CreateTraceProvider(activities);
+
+ TestEmbeddingGenerator embeddingGenerator = new();
+ using InMemoryVectorStore testVectorStore = new(new() { EmbeddingGenerator = embeddingGenerator });
+
+ VectorStoreCollection> collection = testVectorStore.GetIngestionRecordCollection, string>(
+ "chunks-doc", TestEmbeddingGenerator.DimensionCount);
+ using VectorStoreWriter> vectorStoreWriter = new(collection);
+
+ using IngestionPipeline pipeline = new(CreateChunker(), vectorStoreWriter);
+
+ IngestionDocument document = new("test-document-id");
+ IngestionDocumentSection section = new("Main");
+ section.Elements.Add(new IngestionDocumentHeader("# Introduction") { Level = 1 });
+ section.Elements.Add(new IngestionDocumentParagraph("This is the content of a manually created document."));
+ document.Sections.Add(section);
+
+ IngestionDocument processedDocument = await pipeline.ProcessAsync(document);
+
+ Assert.NotNull(processedDocument);
+ Assert.Equal("test-document-id", processedDocument.Identifier);
+ Assert.True(embeddingGenerator.WasCalled, "Embedding generator should have been called.");
+
+ List> retrieved = await vectorStoreWriter.VectorStoreCollection
+ .GetAsync(record => record.DocumentId == "test-document-id", top: 100)
+ .ToListAsync();
+
+ Assert.NotEmpty(retrieved);
+ for (int i = 0; i < retrieved.Count; i++)
+ {
+ Assert.NotEqual(Guid.Empty, retrieved[i].Key);
+ Assert.NotEmpty(retrieved[i].Content!);
+ Assert.Equal("test-document-id", retrieved[i].DocumentId);
+ }
+
+ Assert.NotEmpty(activities);
+ Assert.Contains(activities, a => a.OperationName == "ProcessDocument");
+ }
+
private static IngestionDocumentReader CreateReader() => new MarkdownReader();
private static IngestionChunker CreateChunker() => new HeaderChunker(new(TiktokenTokenizer.CreateForModel("gpt-4")));
@@ -268,7 +314,7 @@ private static void AssertErrorActivities(List activities, int expecte
Assert.NotEmpty(activities);
Assert.All(activities, a => Assert.Equal("Experimental.Microsoft.Extensions.DataIngestion", a.Source.Name));
- var failed = activities.Where(act => act.Status == ActivityStatusCode.Error).ToList();
+ List failed = activities.Where(act => act.Status == ActivityStatusCode.Error).ToList();
Assert.Equal(expectedFailedActivitiesCount, failed.Count);
Assert.All(failed, a => Assert.Equal(ExpectedException.ExceptionMessage, a.StatusDescription));
}
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A.verified/aichatweb/aichatweb.Web/Services/Ingestion/DataIngestor.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A.verified/aichatweb/aichatweb.Web/Services/Ingestion/DataIngestor.cs
index 61088b1225d..7ed1a84bddf 100644
--- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A.verified/aichatweb/aichatweb.Web/Services/Ingestion/DataIngestor.cs
+++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A.verified/aichatweb/aichatweb.Web/Services/Ingestion/DataIngestor.cs
@@ -19,13 +19,13 @@ public async Task IngestDataAsync(DirectoryInfo directory, string searchPattern)
IncrementalIngestion = false,
});
+ DocumentReader reader = new(directory);
using var pipeline = new IngestionPipeline(
- reader: new DocumentReader(directory),
chunker: new SemanticSimilarityChunker(embeddingGenerator, new(TiktokenTokenizer.CreateForModel("gpt-4o"))),
writer: writer,
loggerFactory: loggerFactory);
- await foreach (var result in pipeline.ProcessAsync(directory, searchPattern))
+ await foreach (var result in pipeline.ProcessAsync(reader, directory, searchPattern))
{
logger.LogInformation("Completed processing '{id}'. Succeeded: '{succeeded}'.", result.DocumentId, result.Succeeded);
}
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A_aoai_aais.verified/aichatweb/aichatweb.Web/Services/Ingestion/DataIngestor.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A_aoai_aais.verified/aichatweb/aichatweb.Web/Services/Ingestion/DataIngestor.cs
index 61088b1225d..7ed1a84bddf 100644
--- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A_aoai_aais.verified/aichatweb/aichatweb.Web/Services/Ingestion/DataIngestor.cs
+++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A_aoai_aais.verified/aichatweb/aichatweb.Web/Services/Ingestion/DataIngestor.cs
@@ -19,13 +19,13 @@ public async Task IngestDataAsync(DirectoryInfo directory, string searchPattern)
IncrementalIngestion = false,
});
+ DocumentReader reader = new(directory);
using var pipeline = new IngestionPipeline(
- reader: new DocumentReader(directory),
chunker: new SemanticSimilarityChunker(embeddingGenerator, new(TiktokenTokenizer.CreateForModel("gpt-4o"))),
writer: writer,
loggerFactory: loggerFactory);
- await foreach (var result in pipeline.ProcessAsync(directory, searchPattern))
+ await foreach (var result in pipeline.ProcessAsync(reader, directory, searchPattern))
{
logger.LogInformation("Completed processing '{id}'. Succeeded: '{succeeded}'.", result.DocumentId, result.Succeeded);
}
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb._defaults.verified/aichatweb/Services/Ingestion/DataIngestor.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb._defaults.verified/aichatweb/Services/Ingestion/DataIngestor.cs
index b4675927d47..13260fd5370 100644
--- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb._defaults.verified/aichatweb/Services/Ingestion/DataIngestor.cs
+++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb._defaults.verified/aichatweb/Services/Ingestion/DataIngestor.cs
@@ -19,13 +19,13 @@ public async Task IngestDataAsync(DirectoryInfo directory, string searchPattern)
IncrementalIngestion = false,
});
+ DocumentReader reader = new(directory);
using var pipeline = new IngestionPipeline(
- reader: new DocumentReader(directory),
chunker: new SemanticSimilarityChunker(embeddingGenerator, new(TiktokenTokenizer.CreateForModel("gpt-4o"))),
writer: writer,
loggerFactory: loggerFactory);
- await foreach (var result in pipeline.ProcessAsync(directory, searchPattern))
+ await foreach (var result in pipeline.ProcessAsync(reader, directory, searchPattern))
{
logger.LogInformation("Completed processing '{id}'. Succeeded: '{succeeded}'.", result.DocumentId, result.Succeeded);
}
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.o_q.verified/aichatweb/aichatweb.Web/Services/Ingestion/DataIngestor.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.o_q.verified/aichatweb/aichatweb.Web/Services/Ingestion/DataIngestor.cs
index 61088b1225d..7ed1a84bddf 100644
--- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.o_q.verified/aichatweb/aichatweb.Web/Services/Ingestion/DataIngestor.cs
+++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.o_q.verified/aichatweb/aichatweb.Web/Services/Ingestion/DataIngestor.cs
@@ -19,13 +19,13 @@ public async Task IngestDataAsync(DirectoryInfo directory, string searchPattern)
IncrementalIngestion = false,
});
+ DocumentReader reader = new(directory);
using var pipeline = new IngestionPipeline(
- reader: new DocumentReader(directory),
chunker: new SemanticSimilarityChunker(embeddingGenerator, new(TiktokenTokenizer.CreateForModel("gpt-4o"))),
writer: writer,
loggerFactory: loggerFactory);
- await foreach (var result in pipeline.ProcessAsync(directory, searchPattern))
+ await foreach (var result in pipeline.ProcessAsync(reader, directory, searchPattern))
{
logger.LogInformation("Completed processing '{id}'. Succeeded: '{succeeded}'.", result.DocumentId, result.Succeeded);
}
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.oai_aais.verified/aichatweb/Services/Ingestion/DataIngestor.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.oai_aais.verified/aichatweb/Services/Ingestion/DataIngestor.cs
index b4675927d47..13260fd5370 100644
--- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.oai_aais.verified/aichatweb/Services/Ingestion/DataIngestor.cs
+++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.oai_aais.verified/aichatweb/Services/Ingestion/DataIngestor.cs
@@ -19,13 +19,13 @@ public async Task IngestDataAsync(DirectoryInfo directory, string searchPattern)
IncrementalIngestion = false,
});
+ DocumentReader reader = new(directory);
using var pipeline = new IngestionPipeline(
- reader: new DocumentReader(directory),
chunker: new SemanticSimilarityChunker(embeddingGenerator, new(TiktokenTokenizer.CreateForModel("gpt-4o"))),
writer: writer,
loggerFactory: loggerFactory);
- await foreach (var result in pipeline.ProcessAsync(directory, searchPattern))
+ await foreach (var result in pipeline.ProcessAsync(reader, directory, searchPattern))
{
logger.LogInformation("Completed processing '{id}'. Succeeded: '{succeeded}'.", result.DocumentId, result.Succeeded);
}