add blogs

workcontrolgit · workcontrolgit · commit cb8193082b65 · 2026-04-20T17:26:46.000-04:00
diff --git a/.claude/settings.local.json b/.claude/settings.local.json
@@ -5,7 +5,8 @@
       "Bash(node -e \":*)",
       "Bash(git add blogs/series-2-dotnet-api/2.1-dotnet-clean-architecture.md)",
       "Bash(git commit -m \"Add brief EasyCaching mention to 2.1 clean architecture article\")",
-      "Bash(git push)"
+      "Bash(git push)",
+      "Bash(gh pr:*)"
     ],
     "deny": [],
     "ask": []
diff --git a/ApiResources/TalentManagement-API b/ApiResources/TalentManagement-API
@@ -1 +1 @@
-Subproject commit 6586b08f050434fc668609b299f0584d79a4652d
+Subproject commit 3fe652d727db12703261a9b5c1dfe0a74b5b4815
diff --git a/Tests/AngularNetTutorial-Playwright b/Tests/AngularNetTutorial-Playwright
@@ -1 +1 @@
-Subproject commit 89d4c8b9557e95e3fe89d76ab20d09d2c8b32d6c
+Subproject commit ba4951061f1c458fe3687f88367296d5326b3c6c
diff --git a/TokenService/Duende-IdentityServer b/TokenService/Duende-IdentityServer
@@ -1 +1 @@
-Subproject commit 66391eb8d0f3700857bcf7d29d5c42a4f46d4fa6
+Subproject commit 179cc0e52fe74dc58d2b6d1631242bc7fce08430
diff --git a/blogs/series-6-ai-app-features/6.1-dotnet-ai-foundation.md b/blogs/series-6-ai-app-features/6.1-dotnet-ai-foundation.md
@@ -90,21 +90,15 @@ curl http://localhost:11434/api/tags
 
 ### Step 2: Add NuGet Packages
 
-The AI packages split across two projects to maintain Clean Architecture separation:
+Add OllamaSharp to the Infrastructure.Shared project — this is the only AI package needed:
 
-**`TalentManagementAPI.WebApi.csproj`** — The Ollama provider lives here:
+**`TalentManagementAPI.Infrastructure.Shared.csproj`**:
 
 ```xml
-<PackageReference Include="Microsoft.Extensions.AI.Ollama" Version="9.5.0" />
+<PackageReference Include="OllamaSharp" Version="5.3.4" />
 ```
 
-**`TalentManagementAPI.Infrastructure.Shared.csproj`** — The abstraction lives here:
-
-```xml
-<PackageReference Include="Microsoft.Extensions.AI" Version="9.5.0" />
-```
-
-**Why split?** The Application and Infrastructure layers must never reference provider-specific packages (`Microsoft.Extensions.AI.Ollama`). Only WebApi knows which provider is registered. Infrastructure.Shared only knows about `IChatClient` from `Microsoft.Extensions.AI`.
+**Why OllamaSharp instead of `Microsoft.Extensions.AI`?** OllamaSharp provides native streaming support via `IAsyncEnumerable<>` (`await foreach`), so tokens stream out of Ollama as they are generated — important when local model responses take several seconds. `Microsoft.Extensions.AI` is the right abstraction when you need to swap between Azure OpenAI, OpenAI, and Ollama without touching service code. For this tutorial, OllamaSharp keeps the dependency footprint minimal: one package, only in Infrastructure.Shared, no provider registration boilerplate in Program.cs.
 
 ### Step 3: Add Feature Flag and Ollama Config
 
@@ -118,10 +112,19 @@ In `TalentManagementAPI.WebApi/appsettings.json`, add `AiEnabled` to the existin
 },
 "Ollama": {
   "BaseUrl": "http://localhost:11434",
-  "Model": "llama3.2"
+  "Model": "llama3.2",
+  "EmbeddingModel": "nomic-embed-text",
+  "CacheTtlMinutes": 60
 }
 ```
 
+**What each field does:**
+
+* **`BaseUrl`** — where Ollama is listening (`ollama serve` defaults to port 11434)
+* **`Model`** — the chat model to use; `llama3.2` is pulled in Step 1
+* **`EmbeddingModel`** — used in later articles (6.5+) for semantic search; `nomic-embed-text` is a compact, high-quality embedding model
+* **`CacheTtlMinutes`** — how long AI responses are cached in-memory; identical questions within this window return instantly without hitting Ollama again (introduced in the `CachingAiChatService` below)
+
 **Key point:** `"AiEnabled": false` is the default. Developers who haven't installed Ollama can still clone and run the full stack — the AI endpoint simply returns 404. To activate AI features, change this to `true` and ensure Ollama is running.
 
 ### Step 4: Define the Application Interface
@@ -146,73 +149,102 @@ namespace TalentManagementAPI.Application.Interfaces
 Create `TalentManagementAPI.Infrastructure.Shared/Services/OllamaAiService.cs`:
 
 ```csharp
-using Microsoft.Extensions.AI;
 using TalentManagementAPI.Application.Interfaces;
 
 namespace TalentManagementAPI.Infrastructure.Shared.Services
 {
     public class OllamaAiService : IAiChatService
     {
-        private readonly IChatClient _chatClient;
+        private readonly IOllamaApiClient _ollamaApiClient;
 
-        public OllamaAiService(IChatClient chatClient)
+        public OllamaAiService(IOllamaApiClient ollamaApiClient)
         {
-            _chatClient = chatClient;
+            _ollamaApiClient = ollamaApiClient;
         }
 
         public async Task<string> ChatAsync(string message, string? systemPrompt = null,
             CancellationToken cancellationToken = default)
         {
-            var messages = new List<ChatMessage>();
+            var messages = new List<Message>();
 
             if (!string.IsNullOrWhiteSpace(systemPrompt))
-                messages.Add(new ChatMessage(ChatRole.System, systemPrompt));
+                messages.Add(new Message(new ChatRole("system"), systemPrompt));
+
+            messages.Add(new Message(new ChatRole("user"), message));
+
+            var request = new ChatRequest
+            {
+                Model = _ollamaApiClient.SelectedModel,
+                Messages = messages,
+                Stream = true
+            };
+
+            var responseBuilder = new MessageBuilder();
 
-            messages.Add(new ChatMessage(ChatRole.User, message));
+            await foreach (var response in _ollamaApiClient.ChatAsync(request, cancellationToken)
+                               .WithCancellation(cancellationToken))
+            {
+                if (response?.Message is not null)
+                    responseBuilder.Append(response);
+            }
 
-            var response = await _chatClient.CompleteAsync(messages, cancellationToken: cancellationToken);
-            return response.Message.Text ?? string.Empty;
+            return responseBuilder.HasValue
+                ? responseBuilder.ToMessage().Content ?? string.Empty
+                : string.Empty;
         }
     }
 }
 ```
 
-**What this does:** `OllamaAiService` takes `IChatClient` from DI — it has no idea it's talking to Ollama specifically. The `CompleteAsync` method sends the message list and returns the model's reply. An optional system prompt lets callers control the AI's persona or constraints.
+**What this does:** `OllamaAiService` takes `IOllamaApiClient` from DI (registered in Step 6). OllamaSharp streams tokens back using `IAsyncEnumerable<>` — the `await foreach` loop accumulates each chunk into a `MessageBuilder`, then returns the fully assembled reply. An optional system prompt lets callers control the AI's persona or constraints without the service knowing anything about the caller's intent.
 
 ### Step 6: Register Services
 
-In `Infrastructure.Shared/ServiceRegistration.cs`, add the `IAiChatService` → `OllamaAiService` binding:
+In `Infrastructure.Shared/ServiceRegistration.cs`, register `IOllamaApiClient` and wire `IAiChatService` to a caching decorator that wraps `OllamaAiService`:
 
 ```csharp
 using TalentManagementAPI.Application.Interfaces;
 using TalentManagementAPI.Infrastructure.Shared.Services;
 
-public static void AddSharedInfrastructure(this IServiceCollection services, IConfiguration _config)
+public static void AddSharedInfrastructure(this IServiceCollection services, IConfiguration config)
 {
-    services.Configure<MailSettings>(_config.GetSection("MailSettings"));
+    services.Configure<MailSettings>(config.GetSection("MailSettings"));
     services.AddTransient<IDateTimeService, DateTimeService>();
     services.AddTransient<IEmailService, EmailService>();
     services.AddTransient<IMockService, MockService>();
-    services.AddTransient<IAiChatService, OllamaAiService>();
+
+    // Register the Ollama client as a singleton — one connection reused across requests
+    services.AddSingleton<IOllamaApiClient>(_ =>
+    {
+        var baseUrl = config["Ollama:BaseUrl"] ?? "http://localhost:11434";
+        var model   = config["Ollama:Model"]   ?? "llama3.2";
+        return new OllamaApiClient(new Uri(baseUrl), model);
+    });
+
+    // Metadata scoped per-request so the controller can read cache hit/miss
+    services.AddScoped<IAiResponseMetadata, AiResponseMetadata>();
+
+    // Wrap OllamaAiService with a caching decorator — identical questions within
+    // CacheTtlMinutes return instantly without hitting Ollama again
+    var ttlMinutes = config.GetValue<int>("Ollama:CacheTtlMinutes", 60);
+    services.AddTransient<OllamaAiService>();
+    services.AddTransient<IAiChatService>(sp => new CachingAiChatService(
+        sp.GetRequiredService<OllamaAiService>(),
+        sp.GetRequiredService<ICacheProvider>(),
+        sp.GetRequiredService<IAiResponseMetadata>(),
+        TimeSpan.FromMinutes(ttlMinutes)));
 }
 ```
 
-In `WebApi/Program.cs`, register the Ollama provider for `IChatClient`:
+In `WebApi/Program.cs`, the only AI-related line is the call to `AddSharedInfrastructure` — no extra registration needed:
 
 ```csharp
-// Register application services
 builder.Services.AddApplicationLayer();
 builder.Services.AddPersistenceInfrastructure(builder.Configuration);
-builder.Services.AddSharedInfrastructure(builder.Configuration);
-
-// Register Ollama chat client (IChatClient) — used by OllamaAiService
-// AiController is gated by [FeatureGate("AiEnabled")], so no calls are made when AI is disabled
-var ollamaBaseUrl = builder.Configuration["Ollama:BaseUrl"] ?? "http://localhost:11434";
-var ollamaModel = builder.Configuration["Ollama:Model"] ?? "llama3.2";
-builder.Services.AddOllamaChatClient(ollamaModel, new Uri(ollamaBaseUrl));
+builder.Services.AddSharedInfrastructure(builder.Configuration);  // ← registers IOllamaApiClient + IAiChatService
 ```
 
-**What this does:** `AddOllamaChatClient()` registers `IChatClient` in the DI container pointing to Ollama. `OllamaAiService` receives this via constructor injection. If you later want to use Azure OpenAI, you'd replace `AddOllamaChatClient()` with `AddAzureOpenAIChatClient()` — and nothing else changes.
+**What the caching decorator does:** `CachingAiChatService` wraps `OllamaAiService`. On the first call for a given `(message, systemPrompt)` pair, it calls Ollama and stores the reply. On subsequent identical calls within the TTL window, it returns the cached reply — skipping the 1–4 second Ollama inference. The `IAiResponseMetadata` flag tells the controller whether the response was a cache hit, which is surfaced as the `X-AI-Cache: HIT/MISS` response header.
 
 ### Step 7: Create the AI Controller
 
@@ -222,33 +254,50 @@ Create `TalentManagementAPI.WebApi/Controllers/v1/AiController.cs`:
 using Asp.Versioning;
 using Microsoft.AspNetCore.Authorization;
 using Microsoft.AspNetCore.Mvc;
-using Microsoft.FeatureManagement.Mvc;
 using TalentManagementAPI.Application.Interfaces;
 
 namespace TalentManagementAPI.WebApi.Controllers.v1
 {
-    [FeatureGate("AiEnabled")]
     [ApiVersion("1.0")]
     [AllowAnonymous]
     [Route("api/v{version:apiVersion}/ai")]
     public sealed class AiController : BaseApiController
     {
         private readonly IAiChatService _aiChatService;
+        private readonly IFeatureManagerSnapshot _featureManager;
+        private readonly IAiResponseMetadata _aiMetadata;
 
-        public AiController(IAiChatService aiChatService)
+        public AiController(
+            IAiChatService aiChatService,
+            IFeatureManagerSnapshot featureManager,
+            IAiResponseMetadata aiMetadata)
         {
             _aiChatService = aiChatService;
+            _featureManager = featureManager;
+            _aiMetadata = aiMetadata;
         }
 
+        private void SetAiCacheHeader()
+            => Response.Headers["X-AI-Cache"] = _aiMetadata.WasCacheHit ? "HIT" : "MISS";
+
         /// <summary>
         /// Send a message to the AI assistant and receive a reply.
         /// </summary>
         [HttpPost("chat")]
         public async Task<IActionResult> Chat([FromBody] AiChatRequest request,
             CancellationToken cancellationToken)
         {
+            if (!await _featureManager.IsEnabledAsync("AiEnabled"))
+            {
+                return Problem(
+                    detail: "AI chat is disabled. Enable FeatureManagement:AiEnabled to use this endpoint.",
+                    title: "AI chat is disabled",
+                    statusCode: StatusCodes.Status503ServiceUnavailable);
+            }
+
             var reply = await _aiChatService.ChatAsync(
                 request.Message, request.SystemPrompt, cancellationToken);
+            SetAiCacheHeader();
             return Ok(new AiChatResponse(reply));
         }
     }
@@ -258,9 +307,15 @@ namespace TalentManagementAPI.WebApi.Controllers.v1
 }
 ```
 
-**What `[FeatureGate("AiEnabled")]` does:** When `AiEnabled` is `false` in `appsettings.json`, ASP.NET Core returns a `404 Not Found` for all routes under this controller. Ollama is never called. The controller doesn't appear in Swagger. To the rest of the app, it doesn't exist.
+**Why per-method checks instead of `[FeatureGate]` on the class?**
+
+The `[FeatureGate("AiEnabled")]` attribute returns `404 Not Found` when the feature is disabled — a misleading status for a known endpoint. The per-method check returns `503 Service Unavailable` with a clear `detail` message explaining exactly what to enable and where. This is far more helpful to developers hitting the endpoint for the first time.
+
+**`IFeatureManagerSnapshot`** — the snapshot variant reads the feature flags once per request and caches the result for the request lifetime. This avoids multiple config reads per action.
+
+**`IAiResponseMetadata`** — a scoped flag (set by `CachingAiChatService` in Step 6) that records whether the response came from the cache. `SetAiCacheHeader()` surfaces this as `X-AI-Cache: HIT` or `MISS` in every response — visible in the browser Network tab and Swagger, making it easy to see when caching is working.
 
-When `AiEnabled` is `true`, the endpoint becomes fully active. No other code changes needed.
+When `AiEnabled` is `true`, the endpoint is fully active. No other code changes needed.
 
 ---
 
diff --git a/blogs/series-6-ai-app-features/6.2-dotnet-ai-hr-assistant.md b/blogs/series-6-ai-app-features/6.2-dotnet-ai-hr-assistant.md
@@ -217,12 +217,12 @@ namespace TalentManagementAPI.Application.Features.AI.Queries.GetHrInsight
 
 ### Step 3: Add the Controller Endpoint
 
-In `TalentManagementAPI.WebApi/Controllers/v1/AiController.cs`, add the `hr-insight` endpoint and request record:
+In `TalentManagementAPI.WebApi/Controllers/v1/AiController.cs`, add the `hr-insight` action. This builds on the controller created in Article 6.1 — `_featureManager` (`IFeatureManagerSnapshot`) and `_aiMetadata` (`IAiResponseMetadata`) are already injected in the constructor alongside `IAiChatService`. The `hr-insight` action follows the exact same per-method feature flag pattern as `chat`.
 
 ```csharp
 using TalentManagementAPI.Application.Features.AI.Queries.GetHrInsight;
 
-// Inside AiController:
+// Inside AiController (add after the Chat action):
 
 /// <summary>
 /// Ask the HR AI assistant a question about your current workforce data.
@@ -245,6 +245,7 @@ public async Task<IActionResult> HrInsight(
         new GetHrInsightQuery { Question = request.Question },
         cancellationToken);
 
+    SetAiCacheHeader();
     return Ok(result);
 }
 
diff --git a/blogs/series-6-ai-app-features/6.3-angular-ai-chat-widget.md b/blogs/series-6-ai-app-features/6.3-angular-ai-chat-widget.md
@@ -82,6 +82,8 @@ export const environment = {
 
 Add the same `aiEnabled: false` line under `// Feature Flags`. Production defaults to off.
 
+> **Note for repo cloners:** If you cloned the tutorial repo, `environment.ts` may already have `aiEnabled: true` (set during development of later articles). To follow this article from scratch — seeing the disabled state first — flip it to `false`, then back to `true` when you're ready to test the chat UI.
+
 **Why default to false?** Readers who are following the original Series 0–5 tutorial don't have Ollama running. If the chat widget made API calls with `AiEnabled: false` in the API, every request would return `503 Service Unavailable` — a broken experience. Defaulting the flag to `false` in the Angular environment means the chat UI is never shown to those readers, so their app continues to work exactly as it did before.
 
 ---
@@ -150,6 +152,8 @@ export * from './base-api.service';
 // ... existing exports
 ```
 
+> **Note:** The `ai.service.ts` file shown here covers the two methods needed for Articles 6.3 and 6.4 (`chat` and `hrInsight`). Later articles (6.5+) add `nlEmployeeSearch()` and `semanticPositionSearch()` to the same service file. If you clone the repo, you will see those additional methods — they are safe to ignore until you reach those articles.
+
 ---
 
 ### Step 3: Create the Chat Component