rootcodelabs · nuwangeek · Jan 21, 2026 · Jan 21, 2026 · Jan 21, 2026 · Jan 21, 2026
diff --git a/.github/workflows/deepeval-tests.yml b/.github/workflows/deepeval-tests.yml
@@ -3,7 +3,7 @@ name: DeepEval RAG System Tests
 on:
   pull_request:
     types: [opened, synchronize, reopened]
-    branches: ["rag-33-debug", "RAG-33-31okt", "wip_3_12"]
+    branches: ["RAG-217-debug"]
     paths:
       - 'src/**'
       - 'tests/**'
@@ -94,7 +94,7 @@ jobs:
             2. Add the missing secrets with the appropriate values:
 
             **Azure OpenAI Configuration:**
-            - \`AZURE_OPENAI_ENDPOINT\` - Your Azure OpenAI resource endpoint (e.g., \`https://your-resource.openai.azure.com/\`)
+            - \`AZURE_OPENAI_ENDPOINT\` - Your Azure OpenAI  endpoint (e.g., \`https://your-resource.openai.azure.com/\`)
             - \`AZURE_OPENAI_API_KEY\` - Your Azure OpenAI API key
             - \`AZURE_OPENAI_DEPLOYMENT\` - Chat model deployment name (e.g., \`gpt-4o-mini\`)
             - \`AZURE_OPENAI_EMBEDDING_DEPLOYMENT\` - Embedding model deployment name (e.g., \`text-embedding-3-large\`)
@@ -193,8 +193,8 @@ jobs:
           AZURE_STORAGE_BLOB_NAME: ${{ secrets.AZURE_STORAGE_BLOB_NAME }}
           EVAL_MODE: "true"
         run: |
-          # Run tests with testcontainers managing Docker Compose
-          uv run python -m pytest tests/deepeval_tests/standard_tests.py -v --tb=short --log-cli-level=INFO
+          # Run tests sequentially (one at a time) to avoid rate limiting
+          uv run python -m pytest tests/deepeval_tests/standard_tests.py -v --tb=short --log-cli-level=INFO -n 0
 
       - name: Fix permissions on test artifacts
         if: always()

diff --git a/docker-compose-eval.yml b/docker-compose-eval.yml
@@ -56,7 +56,7 @@ services:
       - vault
     volumes:
       - ./test-vault/agents/llm:/agent/in
-      - ./test-vault/agent-out:/agent/out
+      - ./test-vault/agent-out:/agent/llm-token
     entrypoint: ["sh", "-c"]
     command:
       - |
@@ -254,12 +254,13 @@ services:
       - "8100:8100"
     environment:
       - VAULT_ADDR=http://vault:8200
-      - VAULT_TOKEN_FILE=/agent/out/token
+      - VAULT_TOKEN_FILE=/agent/llm-token/token
+      - ENVIRONMENT=development
       - QDRANT_URL=http://qdrant:6333
       - EVAL_MODE=true
     volumes:
       - ./src/llm_config_module/config:/app/src/llm_config_module/config:ro
-      - ./test-vault/agent-out:/agent/out:ro
+      - ./test-vault/agent-out:/agent/llm-token:ro
       - test_llm_orchestration_logs:/app/logs
     depends_on:
       - qdrant

diff --git a/src/llm_orchestration_service.py b/src/llm_orchestration_service.py
@@ -2192,7 +2192,7 @@ def _generate_rag_response(
                     },
                     output=answer,
                 )
-            
+
             retrieval_context: List[Dict[str, Any]] | None = None
             if eval_mode and relevant_chunks:
                 max_blocks_used = ResponseGenerationConstants.DEFAULT_MAX_BLOCKS
@@ -2230,16 +2230,16 @@ def _generate_rag_response(
                         chunks=None,  # No chunks when question is out of scope
                     )
                 else:
-                    response =  OrchestrationResponse(
+                    response = OrchestrationResponse(
                         chatId=request.chatId,
                         llmServiceActive=True,  # service OK; insufficient context
                         questionOutOfLLMScope=True,
                         inputGuardFailed=False,
                         content=localized_msg,
                     )
-                    if eval_mode: 
+                    if eval_mode:
                         response.retrieval_context = retrieval_context
-                    return response                    
+                    return response
 
             # In-scope: return the answer as-is (NO citations)
             logger.info("Returning in-scope answer without citations.")
@@ -2264,14 +2264,14 @@ def _generate_rag_response(
                     chunks=self._format_chunks_for_test_response(relevant_chunks),
                 )
             else:
-                response =  OrchestrationResponse(
+                response = OrchestrationResponse(
                     chatId=request.chatId,
                     llmServiceActive=True,
                     questionOutOfLLMScope=False,
                     inputGuardFailed=False,
                     content=content_with_refs,
                 )
-                if eval_mode: 
+                if eval_mode:
                     response.retrieval_context = retrieval_context
                 return response
 

diff --git a/src/llm_orchestration_service_api.py b/src/llm_orchestration_service_api.py
@@ -1,4 +1,5 @@
 """LLM Orchestration Service API - FastAPI application."""
+
 import os
 from contextlib import asynccontextmanager
 from typing import Any, AsyncGenerator, Dict
@@ -40,7 +41,7 @@
     ContextGenerationRequest,
     ContextGenerationResponse,
     EmbeddingErrorResponse,
-    DeepEvalTestOrchestrationResponse
+    DeepEvalTestOrchestrationResponse,
 )
 
 

diff --git a/src/models/request_models.py b/src/models/request_models.py
@@ -278,4 +278,4 @@ class DeepEvalTestOrchestrationResponse(BaseModel):
     content: str
     retrieval_context: Optional[List[Dict[str, Any]]] = None
     refined_questions: Optional[List[str]] = None
-    expected_output: Optional[str] = None  # For DeepEval
+    expected_output: Optional[str] = None  # For DeepEval
diff --git a/test-vault/agents/llm/agent.hcl b/test-vault/agents/llm/agent.hcl
@@ -3,7 +3,7 @@ vault {
   address = "http://vault:8200"
 }
 
-pid_file = "/agent/out/pidfile"
+pid_file = "/agent/llm-token/pidfile"
 
 auto_auth {
   method "approle" {
@@ -17,7 +17,7 @@ auto_auth {
 
   sink "file" {
     config = {
-      path = "/agent/out/token"
+      path = "/agent/llm-token/token"
     }
   }
 }
@@ -36,7 +36,7 @@ listener "tcp" {
 # dummy template so cache is “active” (some versions require this)
 template {
   source      = "/dev/null"
-  destination = "/agent/out/dummy"
+  destination = "/agent/llm-token/dummy"
 }
 
 # Disable API proxy; not needed here

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -16,14 +16,11 @@
 
 
 def download_embeddings_from_azure(
-    connection_string: str,
-    container_name: str,
-    blob_name: str,
-    local_path: Path
+    connection_string: str, container_name: str, blob_name: str, local_path: Path
 ) -> None:
     """
     Download pre-computed embeddings from Azure Blob Storage.
-    
+
     Args:
         connection_string: Azure Storage connection string
         container_name: Name of the blob container
@@ -34,28 +31,29 @@ def download_embeddings_from_azure(
     logger.info(f"  Container: {container_name}")
     logger.info(f"  Blob: {blob_name}")
     logger.info(f"  Local path: {local_path}")
-    
+
     try:
         # Create BlobServiceClient
-        blob_service_client = BlobServiceClient.from_connection_string(connection_string)
-
+        blob_service_client = BlobServiceClient.from_connection_string(
+            connection_string
+        )
+
         # Get blob client
         blob_client = blob_service_client.get_blob_client(
-            container=container_name,
-            blob=blob_name
+            container=container_name, blob=blob_name
         )
-        
+
         # Ensure parent directory exists
         local_path.parent.mkdir(parents=True, exist_ok=True)
-        
+
         # Download the blob
         with open(local_path, "wb") as download_file:
             download_stream = blob_client.download_blob()
             download_file.write(download_stream.readall())
-        
+
         file_size_kb = local_path.stat().st_size / 1024
         logger.info(f"✓ Downloaded embeddings successfully ({file_size_kb:.2f} KB)")
-        
+
     except Exception as e:
         logger.error(f"Failed to download embeddings from Azure: {e}")
         raise
@@ -144,10 +142,10 @@ def start(self) -> None:
         """Start all test containers and bootstrap Vault"""
         logger.info("Starting RAG Stack testcontainers...")
         os.environ["EVAL_MODE"] = "true"
-        
+
         # Download embeddings from Azure before starting containers
         self._download_embeddings_from_azure()
-        
+
         # Prepare Vault Agent directories
         agent_in = self.project_root / "test-vault" / "agents" / "llm"
         agent_out = self.project_root / "test-vault" / "agent-out"
@@ -212,33 +210,33 @@ def _download_embeddings_from_azure(self) -> None:
         connection_string = os.getenv("AZURE_STORAGE_CONNECTION_STRING")
         container_name = os.getenv("AZURE_STORAGE_CONTAINER_NAME", "test-embeddings")
         blob_name = os.getenv("AZURE_STORAGE_BLOB_NAME", "test_embeddings.json")
-        
+
         # Local path where embeddings should be saved
         embeddings_file = self.project_root / "tests" / "data" / "test_embeddings.json"
-        
+
         # Skip if embeddings already exist locally (for local development)
-        #if embeddings_file.exists():
+        # if embeddings_file.exists():
         #    logger.info("Embeddings file already exists locally, skipping Azure download")
         #    return
-        
+
         # Require Azure configuration for CI/CD
         if not connection_string:
             raise ValueError(
                 "AZURE_STORAGE_CONNECTION_STRING is required to download embeddings. "
                 "Either set this environment variable or ensure test_embeddings.json "
                 f"exists at {embeddings_file}"
             )
-        
+
         logger.info("=" * 80)
         logger.info("DOWNLOADING EMBEDDINGS FROM AZURE BLOB STORAGE")
         logger.info("=" * 80)
-        
+
         try:
             download_embeddings_from_azure(
                 connection_string=connection_string,
                 container_name=container_name,
                 blob_name=blob_name,
-                local_path=embeddings_file
+                local_path=embeddings_file,
             )
             logger.info("Embeddings download complete")
         except Exception as e:
@@ -594,7 +592,7 @@ def _fix_token_file_permissions(
                         "vault-agent-llm",
                         "chmod",
                         "644",
-                        "/agent/out/token",
+                        "/agent/llm-token/token",
                     ],
                     capture_output=True,
                     text=True,
@@ -797,8 +795,9 @@ def orchestration_client(rag_stack: RAGStackTestContainers):
     Function-scoped fixture that provides the orchestration service URL.
     Tests can use either requests (sync) or httpx (async).
     """
+
     class OrchestrationClient:
         def __init__(self, base_url: str):
             self.base_url = base_url
-    
-    return OrchestrationClient(rag_stack.get_orchestration_service_url())
+
+    return OrchestrationClient(rag_stack.get_orchestration_service_url())