diff --git a/.github/workflows/pytest-integration-check.yml b/.github/workflows/pytest-integration-check.yml
new file mode 100644
index 0000000..8b4b4cc
--- /dev/null
+++ b/.github/workflows/pytest-integration-check.yml
@@ -0,0 +1,200 @@
+name: RAG Module Integration Tests
+
+on:
+  pull_request:
+    branches: [wip]
+    types: [opened, synchronize, reopened]
+    paths:
+      - 'src/**'
+      - 'tests/**'
+      - 'data/**'
+      - 'docker-compose-test.yml'
+      - 'Dockerfile.llm_orchestration_service'
+      - '.github/workflows/pytest-integration-check.yml'
+
+jobs:
+  pytest-integration-tests:
+    runs-on: ubuntu-latest
+    timeout-minutes: 80
+    
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+      
+      - name: Validate required secrets
+        id: validate_secrets
+        run: |
+          echo "Validating required environment variables..."
+          MISSING_SECRETS=()
+          
+          # Check Azure OpenAI secrets
+          if [ -z "${{ secrets.AZURE_OPENAI_ENDPOINT }}" ]; then
+            MISSING_SECRETS+=("AZURE_OPENAI_ENDPOINT")
+          fi
+          
+          if [ -z "${{ secrets.AZURE_OPENAI_API_KEY }}" ]; then
+            MISSING_SECRETS+=("AZURE_OPENAI_API_KEY")
+          fi
+          
+          if [ -z "${{ secrets.AZURE_OPENAI_DEPLOYMENT_NAME }}" ]; then
+            MISSING_SECRETS+=("AZURE_OPENAI_DEPLOYMENT_NAME")
+          fi
+          
+          if [ -z "${{ secrets.AZURE_OPENAI_EMBEDDING_DEPLOYMENT }}" ]; then
+            MISSING_SECRETS+=("AZURE_OPENAI_EMBEDDING_DEPLOYMENT")
+          fi
+          
+          if [ -z "${{ secrets.AZURE_OPENAI_EMBEDDING_ENDPOINT }}" ]; then
+            MISSING_SECRETS+=("AZURE_OPENAI_EMBEDDING_ENDPOINT")
+          fi
+
+          if [ -z "${{ secrets.SALT }}" ]; then
+            MISSING_SECRETS+=("SALT")
+          fi
+
+          if [ -z "${{ secrets.ENCRYPTION_KEY }}" ]; then
+            MISSING_SECRETS+=("ENCRYPTION_KEY")
+          fi
+
+          if [ -z "${{ secrets.NEXTAUTH_SECRET }}" ]; then
+            MISSING_SECRETS+=("NEXTAUTH_SECRET")
+          fi
+
+          
+          # If any secrets are missing, fail
+          if [ ${#MISSING_SECRETS[@]} -gt 0 ]; then
+            echo "missing=true" >> $GITHUB_OUTPUT
+            echo "secrets_list=${MISSING_SECRETS[*]}" >> $GITHUB_OUTPUT
+            echo " Missing required secrets: ${MISSING_SECRETS[*]}"
+            exit 1
+          else
+            echo "missing=false" >> $GITHUB_OUTPUT
+            echo " All required secrets are configured"
+          fi
+      
+      - name: Comment PR with missing secrets error
+        if: failure() && steps.validate_secrets.outputs.missing == 'true'
+        uses: actions/github-script@v7
+        with:
+          script: |
+            const missingSecrets = '${{ steps.validate_secrets.outputs.secrets_list }}'.split(' ');
+            const secretsList = missingSecrets.map(s => `- \`${s}\``).join('\n');
+            
+            const comment = `##  RAG Module Integration Tests: Missing Required Secrets
+            
+            RAG Module Integration tests cannot run because the following GitHub secrets are not configured:
+            
+            ${secretsList}
+            
+            ### How to Fix
+            
+            1. Go to **Settings** → **Secrets and variables** → **Actions**
+            2. Add the missing secrets with the appropriate values:
+            
+            **Azure OpenAI Configuration:**
+            - \`AZURE_OPENAI_ENDPOINT\` - Your Azure OpenAI resource endpoint (e.g., \`https://your-resource.openai.azure.com/\`)
+            - \`AZURE_OPENAI_API_KEY\` - Your Azure OpenAI API key
+            - \`AZURE_OPENAI_DEPLOYMENT_NAME\` - Chat model deployment name (e.g., \`gpt-4o-mini\`)
+            - \`AZURE_OPENAI_EMBEDDING_DEPLOYMENT\` - Embedding model deployment name (e.g., \`text-embedding-3-large\`)
+            
+            
+            3. Re-run the workflow after adding the secrets
+            
+            ### Note
+            Tests will not run until all required secrets are configured.
+            
+            ---
+            *Workflow: ${context.workflow} | Run: [#${context.runNumber}](${context.payload.repository.html_url}/actions/runs/${context.runId})*`;
+            
+            // Find existing comment
+            const comments = await github.rest.issues.listComments({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              issue_number: context.issue.number
+            });
+            
+            const existingComment = comments.data.find(
+              comment => comment.user.login === 'github-actions[bot]' &&
+                comment.body.includes('RAG Module Integration Tests: Missing Required Secrets')
+            );
+            
+            if (existingComment) {
+              await github.rest.issues.updateComment({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                comment_id: existingComment.id,
+                body: comment
+              });
+            } else {
+              await github.rest.issues.createComment({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                issue_number: context.issue.number,
+                body: comment
+              });
+            }
+      
+      - name: Set up Python
+        if: success()
+        uses: actions/setup-python@v5
+        with:
+          python-version-file: '.python-version'
+      
+      - name: Set up uv
+        if: success()
+        uses: astral-sh/setup-uv@v6
+      
+      - name: Install dependencies (locked)
+        if: success()
+        run: uv sync --frozen
+   
+      - name: Create test directories with proper permissions
+        if: success()
+        run: |
+          mkdir -p test-vault/agents/llm
+          mkdir -p test-vault/agent-out
+          # Set ownership to current user and make writable
+          sudo chown -R $(id -u):$(id -g) test-vault
+          chmod -R 777 test-vault
+          # Ensure the agent-out directory is world-readable after writes
+          sudo chmod -R a+rwX test-vault/agent-out
+
+      - name: Make Cron-Manager scripts executable
+        if: success()
+        run: |
+          chmod +x DSL/CronManager/script/*.sh
+          ls -la DSL/CronManager/script/
+          
+      - name: Build Docker images
+        if: success()
+        run: docker compose -f docker-compose-test.yml build
+
+      - name: Run Pytest Integration tests with testcontainers
+        if: success()
+        id: run_tests
+        env:
+          # Azure OpenAI - Chat Model
+          AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }}
+          AZURE_OPENAI_ENDPOINT: ${{ secrets.AZURE_OPENAI_ENDPOINT }}
+          AZURE_OPENAI_DEPLOYMENT_NAME: ${{ secrets.AZURE_OPENAI_DEPLOYMENT_NAME }}
+          # Azure OpenAI - Embedding Model
+          AZURE_OPENAI_EMBEDDING_DEPLOYMENT: ${{ secrets.AZURE_OPENAI_EMBEDDING_DEPLOYMENT }}
+          AZURE_OPENAI_EMBEDDING_ENDPOINT: ${{ secrets.AZURE_OPENAI_EMBEDDING_ENDPOINT }}
+          SALT: ${{ secrets.SALT }}
+          ENCRYPTION_KEY: ${{ secrets.ENCRYPTION_KEY }}
+          NEXTAUTH_SECRET: ${{ secrets.NEXTAUTH_SECRET }}
+        run: |
+          # Run tests with testcontainers managing Docker Compose
+          uv run python -m pytest tests/integration_tests/ -v --tb=short --log-cli-level=INFO
+
+      - name: Fix permissions on test artifacts
+        if: always()
+        run: |
+          sudo chown -R $(id -u):$(id -g) test-vault || true
+          sudo chmod -R a+rX test-vault || true
+      
+      - name: Cleanup Docker resources
+        if: always()
+        run: |
+          docker compose -f docker-compose-test.yml down -v --remove-orphans || true
+          docker system prune -f || true
\ No newline at end of file
diff --git a/.github/workflows/pytest-testcases-check.yml b/.github/workflows/pytest-testcases-check.yml
deleted file mode 100644
index a147521..0000000
--- a/.github/workflows/pytest-testcases-check.yml
+++ /dev/null
@@ -1,28 +0,0 @@
-name: Pytest Testcases Check
-
-on:
-  pull_request:
-    branches: ["*"]        # run on PRs to any branch
-  push:
-    branches: [main, dev, testing, wip]   # optional; expand to ["*"] if you want all pushes
-
-jobs:
-  pytest-testcases:
-    name: Pytest Testcases Check
-    runs-on: ubuntu-latest
-
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v4
-
-      - name: Set up Python
-        uses: actions/setup-python@v5
-        with:
-          python-version-file: '.python-version'
-
-      - name: Set up uv
-        uses: astral-sh/setup-uv@v6
-
-      # Format check only — fails if files are not formatted
-      - name: Run test cases using Pytest
-        run: uv run pytest tests
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
index 7f54c28..986ffff 100644
--- a/.gitignore
+++ b/.gitignore
@@ -12,3 +12,5 @@ vault/agent-out
 
 # Snyk Security Extension - AI Rules (auto-generated)
 .github/instructions/snyk_rules.instructions.md
+# Dynamically created Ruuter health endpoint for tests
+DSL/Ruuter.private/rag-search/GET/health.yml
diff --git a/README.md b/README.md
index fd6ab79..d8e33a0 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 # BYK-RAG (Retrieval-Augmented Generation Module)
 
-The **BYK-RAG Module** is part of the Burokratt ecosystem, designed to provide **retrieval-augmented generation (RAG)** capabilities for Estonian government digital services. It ensures reliable, multilingual, and compliant AI-powered responses by integrating with multiple LLM providers, syncing with knowledge bases, and exposing flexible configuration and monitoring features for administrators.
+The **BYK-RAG Module** is part of the Burokratt ecosystem, designed to provide **retrieval-augmented generation (RAG)** capabilities for Estonian government digital services. It ensures reliable, multilingual, and compliant AI-powered responses by integrating with multiple LLM providers syncing with knowledge bases, and exposing flexible configuration and monitoring features for administrators.
 
 ---
 
diff --git a/docker-compose-test.yml b/docker-compose-test.yml
new file mode 100644
index 0000000..a9cfd5a
--- /dev/null
+++ b/docker-compose-test.yml
@@ -0,0 +1,383 @@
+services:
+  # === Core Infrastructure ===
+  
+  # Shared PostgreSQL database (used by both application and Langfuse)
+  rag_search_db:
+    image: postgres:14.1
+    container_name: rag_search_db
+    restart: always
+    environment:
+      POSTGRES_USER: postgres
+      POSTGRES_PASSWORD: dbadmin
+      POSTGRES_DB: rag-search
+    volumes:
+      - test_rag_search_db:/var/lib/postgresql/data
+    ports:
+      - "5436:5432"
+    networks:
+      - test-network
+  
+  # Ruuter Private - API gateway for private endpoint
+  ruuter-private:
+    container_name: ruuter-private
+    image: ghcr.io/buerokratt/ruuter:v2.2.8
+    environment:
+      - application.cors.allowedOrigins=http://localhost:8088,http://localhost:3001,http://localhost:3003,http://localhost:3004,http://localhost:8080,http://localhost:8000,http://localhost:8090
+      - application.httpCodesAllowList=200,201,202,204,400,401,403,500
+      - application.internalRequests.allowedIPs=127.0.0.1
+      - application.logging.displayRequestContent=true
+      - application.logging.displayResponseContent=true
+      - application.logging.printStackTrace=true
+      - application.internalRequests.disabled=true
+      - server.port=8088
+    volumes:
+      - ./DSL/Ruuter.private:/DSL
+      - ./constants.ini:/app/constants.ini
+    ports:
+      - 8088:8088
+    networks:
+      - test-network
+
+  # Ruuter Public - API gateway for public endpoints
+  ruuter-public:
+    container_name: ruuter-public
+    image: ghcr.io/buerokratt/ruuter:v2.2.8
+    environment:
+      - application.cors.allowedOrigins=http://localhost:8086,http://localhost:3001,http://localhost:3003,http://localhost:3004,http://localhost:8080,http://localhost:8000,http://localhost:8090
+      - application.httpCodesAllowList=200,201,202,204,400,401,403,500
+      - application.internalRequests.allowedIPs=127.0.0.1
+      - application.logging.displayRequestContent=true
+      - application.logging.displayResponseContent=true
+      - application.logging.printStackTrace=true
+      - application.internalRequests.disabled=true
+      - server.port=8086
+    volumes:
+      - ./DSL/Ruuter.public:/DSL
+      - ./constants.ini:/app/constants.ini
+    ports:
+      - 8086:8086
+    networks:
+      - test-network
+
+  # Cron-Manager - Scheduled job execution for vector indexing
+  cron-manager:
+    container_name: cron-manager
+    image: ghcr.io/buerokratt/cronmanager:python-1.2.0
+    user: "root"
+    volumes:
+      - ./DSL/CronManager/DSL:/DSL
+      - ./DSL/CronManager/script:/app/scripts
+      - ./src/vector_indexer:/app/src/vector_indexer
+      - ./datasets:/app/datasets
+      - ./grafana-configs/loki_logger.py:/app/src/vector_indexer/loki_logger.py
+      - ./test-vault/agent-out:/agent/out:ro
+    environment:
+      - server.port=9010
+      - PYTHONPATH=/app:/app/src:/app/src/vector_indexer
+      - VAULT_ADDR=http://vault:8200
+      - VAULT_TOKEN_FILE=/agent/out/token
+    ports:
+      - 9010:8080
+    depends_on:
+      - vault-agent-llm
+      - vault
+    networks:
+      - test-network
+
+  # Resql - SQL-based microservice for RAG operations
+  resql:
+    container_name: resql
+    image: ghcr.io/buerokratt/resql:v1.3.6
+    depends_on:
+      rag_search_db:
+        condition: service_started
+    environment:
+      - sqlms.datasources.[0].name=byk
+      - sqlms.datasources.[0].jdbcUrl=jdbc:postgresql://rag_search_db:5432/rag-search #For LocalDb Use
+      # sqlms.datasources.[0].jdbcUrl=jdbc:postgresql://171.22.247.13:5435/byk?sslmode=require
+      - sqlms.datasources.[0].username=postgres
+      - sqlms.datasources.[0].password=dbadmin
+      - logging.level.org.springframework.boot=INFO
+    ports:
+      - 8082:8082
+    volumes:
+      - ./DSL/Resql:/DSL
+      - ./shared:/shared
+      - ./DSL/DatasetGenerator/output_datasets:/app/output_datasets
+    networks:
+      - test-network
+
+  # Vector database for RAG
+  qdrant:
+    image: qdrant/qdrant:v1.15.1
+    container_name: qdrant
+    restart: always
+    ports:
+      - "6333:6333"
+      - "6334:6334"
+    volumes:
+      - test_qdrant_data:/qdrant/storage
+    networks:
+      - test-network
+
+  # === Secret Management ===
+  
+  # Vault - Secret management (dev mode)
+  vault:
+    image: hashicorp/vault:1.20.3
+    container_name: vault
+    cap_add:
+      - IPC_LOCK
+    ports:
+      - "8200:8200"
+    environment:
+      VAULT_DEV_ROOT_TOKEN_ID: root
+      VAULT_ADDR: http://0.0.0.0:8200
+      VAULT_API_ADDR: http://0.0.0.0:8200
+    command: server -dev -dev-listen-address=0.0.0.0:8200
+    networks:
+      - test-network
+
+  # Vault Agent - Automatic token management via AppRole
+  vault-agent-llm:
+    image: hashicorp/vault:1.20.3
+    container_name: vault-agent-llm
+    depends_on:
+      - vault
+    volumes:
+      - ./test-vault/agents/llm:/agent/in
+      - ./test-vault/agent-out:/agent/out
+    entrypoint: ["sh", "-c"]
+    command:
+      - |
+        # Wait for Vault to be ready
+        sleep 5
+        echo "Waiting for AppRole credentials..."
+        while [ ! -f /agent/in/role_id ] || [ ! -s /agent/in/role_id ]; do
+          sleep 1
+        done
+        while [ ! -f /agent/in/secret_id ] || [ ! -s /agent/in/secret_id ]; do
+          sleep 1
+        done
+        echo "Credentials found, starting Vault Agent..."
+        exec vault agent -config=/agent/in/agent.hcl -log-level=debug
+    networks:
+      - test-network
+
+  # === Langfuse Observability Stack ===
+  
+  # Redis - Queue and cache for Langfuse
+  redis:
+    image: redis:7
+    container_name: redis
+    restart: always
+    command: --requirepass myredissecret
+    ports:
+      - "127.0.0.1:6379:6379"
+    networks:
+      - test-network
+
+  # MinIO - S3-compatible storage for Langfuse
+  minio:
+    image: minio/minio:latest
+    container_name: minio
+    restart: always
+    entrypoint: sh
+    command: -c "mkdir -p /data/langfuse && minio server /data --address ':9000' --console-address ':9001'"
+    environment:
+      MINIO_ROOT_USER: minio
+      MINIO_ROOT_PASSWORD: miniosecret
+    ports:
+      - "9000:9000"
+      - "127.0.0.1:9091:9001"
+    volumes:
+      - test_minio_data:/data
+    networks:
+      - test-network
+
+  # ClickHouse - Analytics database for Langfuse (REQUIRED in v3)
+  clickhouse:
+    image: clickhouse/clickhouse-server:24.3
+    container_name: clickhouse
+    restart: always
+    environment:
+      CLICKHOUSE_DB: default
+      CLICKHOUSE_USER: default
+      CLICKHOUSE_PASSWORD: clickhouse
+    volumes:
+      - test_clickhouse_data:/var/lib/clickhouse
+    ports:
+      - "127.0.0.1:8123:8123"
+      - "127.0.0.1:9002:9000"
+    networks:
+      - test-network
+    ulimits:
+      nofile:
+        soft: 262144
+        hard: 262144
+
+  # Langfuse Worker - Background job processor
+  langfuse-worker:
+    image: langfuse/langfuse-worker:3
+    container_name: langfuse-worker
+    restart: always
+    depends_on:
+      - rag_search_db
+      - minio
+      - redis
+      - clickhouse
+    ports:
+      - "127.0.0.1:3030:3030"
+    environment:
+      # Database
+      DATABASE_URL: postgresql://postgres:dbadmin@rag_search_db:5432/rag-search
+      
+      # Auth & Security (TEST VALUES ONLY - NOT FOR PRODUCTION)
+      # gitleaks:allow - These are test-only hex strings
+      NEXTAUTH_URL: http://localhost:3000
+      SALT: ${SALT}
+      ENCRYPTION_KEY: ${ENCRYPTION_KEY}
+      
+      # Features
+      TELEMETRY_ENABLED: "false"
+      LANGFUSE_ENABLE_EXPERIMENTAL_FEATURES: "false"
+      
+      # ClickHouse (REQUIRED for Langfuse v3)
+      CLICKHOUSE_MIGRATION_URL: clickhouse://clickhouse:9000/default
+      CLICKHOUSE_URL: http://clickhouse:8123
+      CLICKHOUSE_USER: default
+      CLICKHOUSE_PASSWORD: clickhouse
+      CLICKHOUSE_CLUSTER_ENABLED: "false"
+      
+      # S3/MinIO Event Upload
+      LANGFUSE_S3_EVENT_UPLOAD_BUCKET: langfuse
+      LANGFUSE_S3_EVENT_UPLOAD_REGION: us-east-1
+      LANGFUSE_S3_EVENT_UPLOAD_ACCESS_KEY_ID: minio
+      LANGFUSE_S3_EVENT_UPLOAD_SECRET_ACCESS_KEY: miniosecret
+      LANGFUSE_S3_EVENT_UPLOAD_ENDPOINT: http://minio:9000
+      LANGFUSE_S3_EVENT_UPLOAD_FORCE_PATH_STYLE: "true"
+      
+      # S3/MinIO Media Upload
+      LANGFUSE_S3_MEDIA_UPLOAD_BUCKET: langfuse
+      LANGFUSE_S3_MEDIA_UPLOAD_REGION: us-east-1
+      LANGFUSE_S3_MEDIA_UPLOAD_ACCESS_KEY_ID: minio
+      LANGFUSE_S3_MEDIA_UPLOAD_SECRET_ACCESS_KEY: miniosecret
+      LANGFUSE_S3_MEDIA_UPLOAD_ENDPOINT: http://minio:9000
+      LANGFUSE_S3_MEDIA_UPLOAD_FORCE_PATH_STYLE: "true"
+      
+      # Redis
+      REDIS_HOST: redis
+      REDIS_PORT: "6379"
+      REDIS_AUTH: myredissecret
+    networks:
+      - test-network
+
+  # Langfuse Web - UI and API
+  langfuse-web:
+    image: langfuse/langfuse:3
+    container_name: langfuse-web
+    restart: always
+    depends_on:
+      - langfuse-worker
+      - rag_search_db
+      - clickhouse
+    ports:
+      - "3000:3000"
+    environment:
+      # Database
+      DATABASE_URL: postgresql://postgres:dbadmin@rag_search_db:5432/rag-search
+      
+      # Auth & Security (TEST VALUES ONLY - NOT FOR PRODUCTION)
+      # gitleaks:allow - These are test-only hex strings
+      NEXTAUTH_URL: http://localhost:3000
+      NEXTAUTH_SECRET: ${NEXTAUTH_SECRET}
+      SALT: ${SALT}
+      ENCRYPTION_KEY: ${ENCRYPTION_KEY}
+      
+      # Features
+      TELEMETRY_ENABLED: "false"
+      LANGFUSE_ENABLE_EXPERIMENTAL_FEATURES: "false"
+      
+      # ClickHouse (REQUIRED for Langfuse v3)
+      CLICKHOUSE_MIGRATION_URL: clickhouse://clickhouse:9000/default
+      CLICKHOUSE_URL: http://clickhouse:8123
+      CLICKHOUSE_USER: default
+      CLICKHOUSE_PASSWORD: clickhouse
+      CLICKHOUSE_CLUSTER_ENABLED: "false"
+      
+      # S3/MinIO Event Upload
+      LANGFUSE_S3_EVENT_UPLOAD_BUCKET: langfuse
+      LANGFUSE_S3_EVENT_UPLOAD_REGION: us-east-1
+      LANGFUSE_S3_EVENT_UPLOAD_ACCESS_KEY_ID: minio
+      LANGFUSE_S3_EVENT_UPLOAD_SECRET_ACCESS_KEY: miniosecret
+      LANGFUSE_S3_EVENT_UPLOAD_ENDPOINT: http://minio:9000
+      LANGFUSE_S3_EVENT_UPLOAD_FORCE_PATH_STYLE: "true"
+      
+      # S3/MinIO Media Upload
+      LANGFUSE_S3_MEDIA_UPLOAD_BUCKET: langfuse
+      LANGFUSE_S3_MEDIA_UPLOAD_REGION: us-east-1
+      LANGFUSE_S3_MEDIA_UPLOAD_ACCESS_KEY_ID: minio
+      LANGFUSE_S3_MEDIA_UPLOAD_SECRET_ACCESS_KEY: miniosecret
+      LANGFUSE_S3_MEDIA_UPLOAD_ENDPOINT: http://minio:9000
+      LANGFUSE_S3_MEDIA_UPLOAD_FORCE_PATH_STYLE: "true"
+      
+      # Redis
+      REDIS_HOST: redis
+      REDIS_PORT: "6379"
+      REDIS_AUTH: myredissecret
+      
+      # Initialize test project with known credentials
+      LANGFUSE_INIT_PROJECT_PUBLIC_KEY: pk-lf-test
+      LANGFUSE_INIT_PROJECT_SECRET_KEY: sk-lf-test
+    networks:
+      - test-network
+
+  # === LLM Orchestration Service ===
+  
+  llm-orchestration-service:
+    build:
+      context: .
+      dockerfile: Dockerfile.llm_orchestration_service
+    container_name: llm-orchestration-service
+    restart: always
+    ports:
+      - "8100:8100"
+    environment:
+      # Infrastructure connections
+      - VAULT_ADDR=http://vault:8200
+      - VAULT_TOKEN_FILE=/agent/out/token
+      - QDRANT_URL=http://qdrant:6333
+      - EVAL_MODE=true
+      # Disable OpenTelemetry tracing in test environment
+      - OTEL_SDK_DISABLED=true
+    volumes:
+      - ./src/llm_config_module/config:/app/src/llm_config_module/config:ro
+      - ./test-vault/agent-out:/agent/out:ro
+      - test_llm_orchestration_logs:/app/logs
+    depends_on:
+      - qdrant
+      - langfuse-web
+      - vault-agent-llm
+    networks:
+      - test-network
+
+# === Networks ===
+
+networks:
+  test-network:
+    name: test-network
+    driver: bridge
+
+# === Volumes ===
+
+volumes:
+  test_rag_search_db:
+    name: test_rag_search_db
+  test_qdrant_data:
+    name: test_qdrant_data
+  test_minio_data:
+    name: test_minio_data
+  test_clickhouse_data:
+    name: test_clickhouse_data
+  test_llm_orchestration_logs:
+    name: test_llm_orchestration_logs
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
index a2692fc..dd8f876 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -35,6 +35,8 @@ dependencies = [
     "nemoguardrails>=0.16.0",
     "tiktoken>=0.11.0",
     "langfuse>=3.8.1",
+    "minio>=7.2.0",
+    "psycopg2-binary>=2.9.11",
 ]
 
 [tool.ruff]
diff --git a/test-vault/agents/llm/agent.hcl b/test-vault/agents/llm/agent.hcl
new file mode 100644
index 0000000..9883bfe
--- /dev/null
+++ b/test-vault/agents/llm/agent.hcl
@@ -0,0 +1,45 @@
+vault {
+  # Inside Docker network, the service name "vault" resolves to the dev Vault
+  address = "http://vault:8200"
+}
+
+pid_file = "/agent/out/pidfile"
+
+auto_auth {
+  method "approle" {
+    mount_path = "auth/approle"
+    config = {
+      role_id_file_path                   = "/agent/in/role_id"
+      secret_id_file_path                 = "/agent/in/secret_id"
+      remove_secret_id_file_after_reading = false  # test-friendly
+    }
+  }
+
+  sink "file" {
+    config = {
+      path = "/agent/out/token"
+    }
+  }
+}
+
+# In-memory cache (free, no Enterprise license)
+cache {
+  default_lease_duration = "1h"
+}
+
+# Listener is required for Agent’s internal servers (not exposed)
+listener "tcp" {
+  address     = "127.0.0.1:8201"
+  tls_disable = true
+}
+
+# dummy template so cache is “active” (some versions require this)
+template {
+  source      = "/dev/null"
+  destination = "/agent/out/dummy"
+}
+
+# Disable API proxy; not needed here
+api_proxy {
+  disable = true
+}
\ No newline at end of file
diff --git a/test-vault/agents/llm/role_id b/test-vault/agents/llm/role_id
new file mode 100644
index 0000000..e69de29
diff --git a/test-vault/agents/llm/secret_id b/test-vault/agents/llm/secret_id
new file mode 100644
index 0000000..e69de29
diff --git a/tests/integration_tests/conftest.py b/tests/integration_tests/conftest.py
new file mode 100644
index 0000000..333771a
--- /dev/null
+++ b/tests/integration_tests/conftest.py
@@ -0,0 +1,1490 @@
+import pytest
+from testcontainers.compose import DockerCompose
+from minio import Minio
+from qdrant_client import QdrantClient
+from pathlib import Path
+import time
+import json
+import os
+import hvac
+import subprocess
+from typing import Dict, Any, Optional, Generator
+import requests
+from loguru import logger
+
+
+# ===================== VaultAgentClient =====================
+class VaultAgentClient:
+    """Client for interacting with Vault using a token written by Vault Agent"""
+
+    def __init__(
+        self,
+        vault_url: str,
+        token_path: Path = Path("test-vault/agent-out/token"),
+        mount_point: str = "secret",
+        timeout: int = 10,
+    ):
+        self.vault_url = vault_url
+        self.token_path = token_path
+        self.mount_point = mount_point
+
+        self.client = hvac.Client(url=self.vault_url, timeout=timeout)
+        self._load_token()
+
+    def _load_token(self) -> None:
+        """Load token from file written by Vault Agent"""
+        if not self.token_path.exists():
+            raise FileNotFoundError(f"Vault token file missing: {self.token_path}")
+        token = self.token_path.read_text().strip()
+        if not token:
+            raise ValueError("Vault token file is empty")
+        self.client.token = token
+
+    def is_authenticated(self) -> bool:
+        """Check if the current token is valid"""
+        try:
+            return self.client.is_authenticated()
+        except Exception as e:
+            logger.warning(f"Vault token is not valid: {e}")
+            return False
+
+    def is_vault_available(self) -> bool:
+        """Check if Vault is initialized and unsealed"""
+        try:
+            status = self.client.sys.read_health_status(method="GET")
+            return (
+                isinstance(status, dict)
+                and status.get("initialized", False)
+                and not status.get("sealed", True)
+            )
+        except Exception as e:
+            logger.warning(f"Vault availability check failed: {e}")
+            return False
+
+    def get_secret(self, path: str) -> dict:
+        """Read a secret from Vault KV v2"""
+        try:
+            result = self.client.secrets.kv.v2.read_secret_version(
+                path=path, mount_point=self.mount_point
+            )
+            return result["data"]["data"]
+        except Exception as e:
+            logger.error(f"Failed to read Vault secret at {path}: {e}")
+            raise
+
+
+class RAGStackTestContainers:
+    """Manages test containers for RAG stack including Vault, Qdrant, Langfuse, and LLM orchestration service"""
+
+    def __init__(self, compose_file_name: str = "docker-compose-test.yml"):
+        self.project_root = Path(__file__).parent.parent.parent
+        self.compose_file_path = self.project_root / compose_file_name
+        self.compose: Optional[DockerCompose] = None
+        self.services_info: Dict[str, Dict[str, Any]] = {}
+
+        if not self.compose_file_path.exists():
+            raise FileNotFoundError(
+                f"Docker compose file not found: {self.compose_file_path}"
+            )
+
+    def start(self) -> None:
+        """Start all test containers and bootstrap Vault"""
+        logger.info("Starting RAG Stack testcontainers...")
+        # Prepare Vault Agent directories
+        agent_in = self.project_root / "test-vault" / "agents" / "llm"
+        agent_out = self.project_root / "test-vault" / "agent-out"
+        agent_in.mkdir(parents=True, exist_ok=True)
+        agent_out.mkdir(parents=True, exist_ok=True)
+
+        # Clean up any stale files from previous runs
+        for f in ["role_id", "secret_id", "token", "pidfile", "dummy"]:
+            (agent_in / f).unlink(missing_ok=True)
+            (agent_out / f).unlink(missing_ok=True)
+
+        # Create Ruuter health endpoint for tests
+        self._create_ruuter_health_endpoint()
+
+        # Remove .guard files BEFORE starting containers
+        # (Ruuter loads DSL on startup, so guards must be removed before that)
+        self._remove_ruuter_guard_files()
+
+        # Start all Docker Compose services
+        logger.info("Starting Docker Compose services...")
+        self.compose = DockerCompose(
+            str(self.project_root),
+            compose_file_name=self.compose_file_path.name,
+            pull=False,
+        )
+        self.compose.start()
+
+        # Get Vault connection details
+        vault_url = self._get_vault_url()
+        logger.info(f"Vault URL: {vault_url}")
+
+        # Wait for Vault to be ready
+        self._wait_for_vault_ready(vault_url)
+
+        # Configure Vault with AppRole, policies, and test secrets
+        self._bootstrap_vault_dev(agent_in, vault_url)
+
+        # Verify credentials were written successfully
+        role_id = (agent_in / "role_id").read_text().strip()
+        secret_id = (agent_in / "secret_id").read_text().strip()
+        logger.info(
+            f"AppRole credentials written: role_id={role_id[:8]}..., secret_id={secret_id[:8]}..."
+        )
+
+        # Restart vault-agent to ensure it picks up the credentials
+        logger.info("Restarting vault-agent to authenticate...")
+        try:
+            import subprocess
+
+            subprocess.run(
+                ["docker", "restart", "vault-agent-llm"],
+                check=True,
+                capture_output=True,
+            )
+            logger.info("vault-agent restarted")
+            time.sleep(3)  # Give it time to start
+        except Exception as e:
+            logger.warning(f"Could not restart vault-agent: {e}")
+
+        # Wait for Vault Agent to authenticate and write token
+        logger.info("Waiting for vault-agent to authenticate...")
+        self._wait_for_valid_token(agent_out / "token", vault_url, max_attempts=20)
+
+        logger.info("Vault Agent authenticated successfully")
+
+        # Wait for other services to be ready
+        self._wait_for_services()
+        self._collect_service_info()
+
+        # Run database migration
+        self._run_database_migration()
+
+        logger.info("RAG Stack testcontainers ready")
+
+    def stop(self) -> None:
+        """Stop all test containers"""
+        if self.compose:
+            logger.info("Stopping RAG Stack testcontainers...")
+            self.compose.stop()
+            logger.info("Testcontainers stopped")
+
+        # Clean up test files
+        self._remove_ruuter_health_endpoint()
+
+    def _get_vault_url(self) -> str:
+        """Get the mapped Vault URL accessible from the host"""
+        if not self.compose:
+            raise RuntimeError("Docker Compose not initialized")
+        host = self.compose.get_service_host("vault", 8200)
+        port = self.compose.get_service_port("vault", 8200)
+        return f"http://{host}:{port}"
+
+    def _wait_for_vault_ready(self, vault_url: str, timeout: int = 60) -> None:
+        """Wait for Vault to be initialized and unsealed"""
+        logger.info("Waiting for Vault to be available...")
+        client = hvac.Client(url=vault_url, token="root", timeout=10)
+
+        start = time.time()
+        while time.time() - start < timeout:
+            try:
+                status = client.sys.read_health_status(method="GET")
+                if status.get("initialized", False) and not status.get("sealed", True):
+                    logger.info("Vault is available and unsealed")
+                    return
+            except Exception as e:
+                logger.debug(f"Vault not ready yet: {e}")
+            time.sleep(2)
+
+        raise TimeoutError("Vault did not become available within 60s")
+
+    def _bootstrap_vault_dev(self, agent_in: Path, vault_url: str) -> None:
+        """
+        Bootstrap Vault dev instance with:
+        - AppRole auth method
+        - Policy for LLM orchestration service
+        - AppRole role and credentials
+        - Test secrets (LLM connections, Langfuse, embeddings, guardrails)
+        """
+        logger.info("Bootstrapping Vault with AppRole and test secrets...")
+        client = hvac.Client(url=vault_url, token="root")
+
+        # Enable AppRole authentication method
+        if "approle/" not in client.sys.list_auth_methods():
+            client.sys.enable_auth_method("approle")
+            logger.info("AppRole enabled")
+
+        # Create policy with permissions for all secret paths (updated with correct embedding paths)
+        policy = """
+path "secret/metadata/llm/*" { capabilities = ["list"] }
+path "secret/data/llm/*"     { capabilities = ["read"] }
+path "secret/metadata/langfuse/*" { capabilities = ["list"] }
+path "secret/data/langfuse/*"     { capabilities = ["read"] }
+path "secret/metadata/embeddings/*" { capabilities = ["list"] }
+path "secret/data/embeddings/*"     { capabilities = ["read"] }
+path "secret/metadata/guardrails/*" { capabilities = ["list"] }
+path "secret/data/guardrails/*"     { capabilities = ["read"] }
+path "auth/token/lookup-self" { capabilities = ["read"] }
+path "auth/token/renew-self" { capabilities = ["update"] }
+"""
+        client.sys.create_or_update_policy("llm-orchestration", policy)
+        logger.info("Policy 'llm-orchestration' created")
+
+        # Create AppRole role with service token type
+        role_name = "llm-orchestration-service"
+        client.write(
+            f"auth/approle/role/{role_name}",
+            **{
+                "token_policies": "llm-orchestration",
+                "secret_id_ttl": "24h",
+                "token_ttl": "1h",
+                "token_max_ttl": "24h",
+                "secret_id_num_uses": 0,
+                "bind_secret_id": True,
+                "token_no_default_policy": True,
+                "token_type": "service",
+            },
+        )
+        logger.info(f"AppRole '{role_name}' created")
+
+        # Generate credentials for the AppRole
+        role_id = client.read(f"auth/approle/role/{role_name}/role-id")["data"][
+            "role_id"
+        ]
+        secret_id = client.write(f"auth/approle/role/{role_name}/secret-id")["data"][
+            "secret_id"
+        ]
+
+        # Write credentials to files that Vault Agent will read
+        (agent_in / "role_id").write_text(role_id, encoding="utf-8")
+        (agent_in / "secret_id").write_text(secret_id, encoding="utf-8")
+        logger.info("AppRole credentials written to agent-in/")
+
+        # Write test secrets
+        self._write_test_secrets(client)
+
+    def _write_test_secrets(self, client: hvac.Client) -> None:
+        """Write all test secrets to Vault with correct path structure"""
+
+        # ============================================================
+        # CRITICAL DEBUG SECTION - Environment Variables
+        # ============================================================
+        logger.info("VAULT SECRET BOOTSTRAP - ENVIRONMENT VARIABLES DEBUG")
+
+        azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
+        azure_api_key = os.getenv("AZURE_OPENAI_API_KEY")
+        azure_deployment = os.getenv("AZURE_OPENAI_DEPLOYMENT_NAME")
+        azure_embedding_deployment = os.getenv("AZURE_OPENAI_EMBEDDING_DEPLOYMENT")
+        azure_embedding_endpoint = os.getenv("AZURE_OPENAI_EMBEDDING_ENDPOINT")
+
+        # Validate critical environment variables
+        missing_vars = []
+        if not azure_endpoint:
+            missing_vars.append("AZURE_OPENAI_ENDPOINT")
+        if not azure_api_key:
+            missing_vars.append("AZURE_OPENAI_API_KEY")
+        if not azure_embedding_deployment:
+            missing_vars.append("AZURE_OPENAI_EMBEDDING_DEPLOYMENT")
+        if not azure_embedding_endpoint:
+            missing_vars.append("AZURE_OPENAI_EMBEDDING_ENDPOINT")
+
+        if missing_vars:
+            error_msg = f"CRITICAL: Missing required environment variables: {', '.join(missing_vars)}"
+            logger.error(error_msg)
+            raise ValueError(error_msg)
+
+        logger.info("All required environment variables are set")
+        logger.info("=" * 80)
+
+        # ============================================================
+        # CHAT MODEL SECRET (LLM path)
+        # ============================================================
+        logger.info("")
+        logger.info("Writing LLM connection secret (chat model)...")
+        llm_secret = {
+            "connection_id": "gpt-4o-mini",
+            "endpoint": azure_endpoint,
+            "api_key": azure_api_key,
+            "deployment_name": azure_deployment or "gpt-4o-mini",
+            "environment": "production",
+            "model": "gpt-4o-mini",
+            "model_type": "chat",
+            "api_version": "2024-02-15-preview",
+            "tags": "azure,test,chat",
+        }
+
+        logger.info(f"  chat deployment: {llm_secret['deployment_name']}")
+        logger.info(f"  endpoint: {llm_secret['endpoint']}")
+        logger.info(f"  connection_id: {llm_secret['connection_id']}")
+
+        client.secrets.kv.v2.create_or_update_secret(
+            mount_point="secret",
+            path="llm/connections/azure_openai/production/gpt-4o-mini",
+            secret=llm_secret,
+        )
+        logger.info(
+            "LLM connection secret written to llm/connections/azure_openai/production/gpt-4o-mini"
+        )
+
+        # ============================================================
+        # EMBEDDING MODEL SECRET (Embeddings path)
+        # ============================================================
+        logger.info("")
+        logger.info("Writing embedding model secret...")
+        embedding_secret = {
+            "connection_id": "2",
+            "endpoint": azure_embedding_endpoint,
+            "api_key": azure_api_key,
+            "deployment_name": azure_embedding_deployment,
+            "environment": "production",
+            "model": "text-embedding-3-large",
+            "api_version": "2024-12-01-preview",
+            "tags": "azure,test,text-embedding-3-large",
+        }
+
+        logger.info(f"  → model: {embedding_secret['model']}")
+        logger.info(f"  → connection_id: {embedding_secret['connection_id']}")
+        logger.info(
+            "  → Vault path: embeddings/connections/azure_openai/production/text-embedding-3-large"
+        )
+
+        # Write to embeddings path with connection_id in the path
+        client.secrets.kv.v2.create_or_update_secret(
+            mount_point="secret",
+            path="embeddings/connections/azure_openai/production/text-embedding-3-large",
+            secret=embedding_secret,
+        )
+        logger.info(
+            "Embedding secret written to embeddings/connections/azure_openai/production/text-embedding-3-large"
+        )
+
+        # ============================================================
+        # VERIFY SECRETS WERE WRITTEN CORRECTLY
+        # ============================================================
+        logger.info("")
+        logger.info("Verifying secrets in Vault...")
+        try:
+            # Verify LLM path
+            verify_llm = client.secrets.kv.v2.read_secret_version(
+                path="llm/connections/azure_openai/production/gpt-4o-mini",
+                mount_point="secret",
+            )
+            llm_data = verify_llm["data"]["data"]
+            logger.info("LLM path verified:")
+            logger.info(f"    connection_id: {llm_data.get('connection_id')}")
+
+            # Verify embeddings path
+            verify_embedding = client.secrets.kv.v2.read_secret_version(
+                path="embeddings/connections/azure_openai/production/text-embedding-3-large",
+                mount_point="secret",
+            )
+            embedding_data = verify_embedding["data"]["data"]
+            logger.info("Embeddings path verified:")
+            logger.info(f"    model: {embedding_data.get('model')}")
+            logger.info(f"    connection_id: {embedding_data.get('connection_id')}")
+
+            # Critical validation
+            if embedding_data.get("deployment_name") != azure_embedding_deployment:
+                error_msg = (
+                    "VAULT SECRET MISMATCH! "
+                    f"Expected deployment_name='{azure_embedding_deployment}' "
+                    f"but Vault has '{embedding_data.get('deployment_name')}'"
+                )
+                logger.error(error_msg)
+                raise ValueError(error_msg)
+
+            if embedding_data.get("connection_id") != "2":
+                error_msg = (
+                    "VAULT SECRET MISMATCH! "
+                    "Expected connection_id='2' "
+                    f"but Vault has '{embedding_data.get('connection_id')}'"
+                )
+                logger.error(error_msg)
+                raise ValueError(error_msg)
+
+            logger.info("Secret verification PASSED")
+
+        except Exception as e:
+            logger.error(f"Failed to verify secrets: {e}")
+            raise
+
+        # add the same secret configs to the 'testing' environment for test purposes
+        # connection_id is 1 (must match the database connection ID created by ensure_testing_connection)
+        llm_secret = {
+            "connection_id": 1,
+            "endpoint": azure_endpoint,
+            "api_key": azure_api_key,
+            "deployment_name": azure_deployment or "gpt-4o-mini",
+            "environment": "test",
+            "model": "gpt-4o-mini",
+            "model_type": "chat",
+            "api_version": "2024-02-15-preview",
+            "tags": "azure,test,chat",
+        }
+        client.secrets.kv.v2.create_or_update_secret(
+            mount_point="secret",
+            path="llm/connections/azure_openai/test/1",
+            secret=llm_secret,
+        )
+
+        embedding_secret = {
+            "connection_id": 1,
+            "endpoint": azure_embedding_endpoint,
+            "api_key": azure_api_key,
+            "deployment_name": azure_embedding_deployment,
+            "environment": "test",
+            "model": "text-embedding-3-large",
+            "api_version": "2024-12-01-preview",
+            "tags": "azure,test,text-embedding-3-large",
+        }
+        # Write to embeddings path with connection_id in the path
+        client.secrets.kv.v2.create_or_update_secret(
+            mount_point="secret",
+            path="embeddings/connections/azure_openai/test/1",
+            secret=embedding_secret,
+        )
+
+        # ============================================================
+        # LANGFUSE CONFIGURATION
+        # ============================================================
+        logger.info("")
+        logger.info("Writing Langfuse configuration secret...")
+        langfuse_secret = {
+            "public_key": "pk-lf-test",
+            "secret_key": "sk-lf-test",
+            "host": "http://langfuse-web:3000",
+        }
+        client.secrets.kv.v2.create_or_update_secret(
+            mount_point="secret", path="langfuse/config", secret=langfuse_secret
+        )
+        logger.info("Langfuse configuration secret written")
+
+        # ============================================================
+        # GUARDRAILS CONFIGURATION
+        # ============================================================
+
+        logger.info("ALL SECRETS WRITTEN SUCCESSFULLY")
+
+    def _run_database_migration(self) -> None:
+        """Run Liquibase database migration using migrate.sh script."""
+        logger.info("Running database migration...")
+
+        try:
+            # Run the migrate.sh script from the project root
+            # Note: migrate.sh uses network 'bykstack' but we use 'test-network'
+            # So we need to run Liquibase directly with the test network
+            result = subprocess.run(
+                [
+                    "docker",
+                    "run",
+                    "--rm",
+                    "--network",
+                    "test-network",
+                    "-v",
+                    f"{self.project_root}/DSL/Liquibase/changelog:/liquibase/changelog",
+                    "-v",
+                    f"{self.project_root}/DSL/Liquibase/master.yml:/liquibase/master.yml",
+                    "-v",
+                    f"{self.project_root}/DSL/Liquibase/data:/liquibase/data",
+                    "liquibase/liquibase:4.33",
+                    "--defaultsFile=/liquibase/changelog/liquibase.properties",
+                    "--changelog-file=master.yml",
+                    "--url=jdbc:postgresql://rag_search_db:5432/rag-search?user=postgres",
+                    "--password=dbadmin",
+                    "update",
+                ],
+                capture_output=True,
+                text=True,
+                timeout=120,
+                cwd=str(self.project_root),
+            )
+
+            if result.returncode == 0:
+                logger.info("Database migration completed successfully")
+                logger.debug(f"Migration output: {result.stdout}")
+            else:
+                logger.error(f"Database migration failed with code {result.returncode}")
+                logger.error(f"STDOUT: {result.stdout}")
+                logger.error(f"STDERR: {result.stderr}")
+                raise RuntimeError(f"Database migration failed: {result.stderr}")
+
+        except subprocess.TimeoutExpired:
+            logger.error("Database migration timed out after 120 seconds")
+            raise
+        except Exception as e:
+            logger.error(f"Failed to run database migration: {e}")
+            raise
+
+    def _run_database_migration(self) -> None:
+        """Run Liquibase database migration using migrate.sh script."""
+        logger.info("Running database migration...")
+
+        try:
+            # Run the migrate.sh script from the project root
+            # Note: migrate.sh uses network 'bykstack' but we use 'test-network'
+            # So we need to run Liquibase directly with the test network
+            result = subprocess.run(
+                [
+                    "docker",
+                    "run",
+                    "--rm",
+                    "--network",
+                    "test-network",
+                    "-v",
+                    f"{self.project_root}/DSL/Liquibase/changelog:/liquibase/changelog",
+                    "-v",
+                    f"{self.project_root}/DSL/Liquibase/master.yml:/liquibase/master.yml",
+                    "-v",
+                    f"{self.project_root}/DSL/Liquibase/data:/liquibase/data",
+                    "liquibase/liquibase:4.33",
+                    "--defaultsFile=/liquibase/changelog/liquibase.properties",
+                    "--changelog-file=master.yml",
+                    "--url=jdbc:postgresql://rag_search_db:5432/rag-search?user=postgres",
+                    "--password=dbadmin",
+                    "update",
+                ],
+                capture_output=True,
+                text=True,
+                timeout=120,
+                cwd=str(self.project_root),
+            )
+
+            if result.returncode == 0:
+                logger.info("Database migration completed successfully")
+                logger.debug(f"Migration output: {result.stdout}")
+            else:
+                logger.error(f"Database migration failed with code {result.returncode}")
+                logger.error(f"STDOUT: {result.stdout}")
+                logger.error(f"STDERR: {result.stderr}")
+                raise RuntimeError(f"Database migration failed: {result.stderr}")
+
+        except subprocess.TimeoutExpired:
+            logger.error("Database migration timed out after 120 seconds")
+            raise
+        except Exception as e:
+            logger.error(f"Failed to run database migration: {e}")
+            raise
+
+    def _capture_service_logs(self) -> None:
+        """Capture logs from all services before cleanup."""
+        services = [
+            "llm-orchestration-service",
+            "ruuter-public",
+            "ruuter-private",
+            "cron-manager",
+            "vault",
+            "qdrant",
+            "langfuse-web",
+        ]
+
+        for service in services:
+            try:
+                logger.info(f"\n{'=' * 60}")
+                logger.info(f"LOGS: {service}")
+                logger.info("=" * 60)
+
+                result = subprocess.run(
+                    [
+                        "docker",
+                        "compose",
+                        "-f",
+                        str(self.compose_file_path),
+                        "logs",
+                        "--tail",
+                        "200",
+                        service,
+                    ],
+                    capture_output=True,
+                    text=True,
+                    timeout=10,
+                    cwd=str(self.project_root),
+                )
+
+                if result.stdout:
+                    logger.info(result.stdout)
+                if result.stderr:
+                    logger.error(result.stderr)
+
+            except Exception as e:
+                logger.error(f"Failed to capture logs for {service}: {e}")
+
+    def _wait_for_valid_token(
+        self, token_path: Path, vault_url: str, max_attempts: int = 20
+    ) -> None:
+        """Wait for Vault Agent to write a valid token and verify it works"""
+        for attempt in range(max_attempts):
+            if token_path.exists() and token_path.stat().st_size > 0:
+                try:
+                    # Fix permissions before reading
+                    self._fix_token_file_permissions(token_path)
+
+                    token = token_path.read_text().strip()
+
+                    client = hvac.Client(url=vault_url, token=token)
+                    try:
+                        client.lookup_token()
+
+                        if client.is_authenticated():
+                            logger.info(f"Valid token obtained (attempt {attempt + 1})")
+                            self._verify_token_permissions(client)
+                            return
+                    except Exception as e:
+                        if attempt < max_attempts - 1:
+                            logger.debug(
+                                f"Token validation error (attempt {attempt + 1}): {type(e).__name__}"
+                            )
+                except PermissionError as e:
+                    logger.warning(
+                        f"Permission error reading token file (attempt {attempt + 1}): {e}"
+                    )
+                    # Try to fix permissions again
+                    self._fix_token_file_permissions(token_path, force=True)
+
+            time.sleep(2)
+
+        logger.error("Failed to obtain valid Vault token")
+        self._check_agent_logs()
+        raise TimeoutError(
+            f"Failed to obtain valid Vault token after {max_attempts} attempts"
+        )
+
+    def _fix_token_file_permissions(
+        self, token_path: Path, force: bool = False
+    ) -> None:
+        """Fix permissions on token file to make it readable by host user"""
+        try:
+            # Try to change permissions using subprocess (requires Docker to be accessible)
+            if force:
+                logger.info(
+                    "Attempting to fix token file permissions using docker exec..."
+                )
+                result = subprocess.run(
+                    [
+                        "docker",
+                        "exec",
+                        "vault-agent-llm",
+                        "chmod",
+                        "644",
+                        "/agent/out/token",
+                    ],
+                    capture_output=True,
+                    text=True,
+                    timeout=5,
+                )
+                if result.returncode == 0:
+                    logger.info(
+                        "Successfully fixed token file permissions via docker exec"
+                    )
+                else:
+                    logger.warning(
+                        f"Failed to fix permissions via docker exec: {result.stderr}"
+                    )
+
+            # Also try direct chmod (may not work in all environments)
+            try:
+                os.chmod(token_path, 0o644)
+            except Exception as chmod_error:
+                logger.debug(
+                    f"Direct chmod failed (expected in some environments): {chmod_error}"
+                )
+
+        except Exception as e:
+            logger.debug(f"Could not fix token file permissions: {e}")
+
+    def _verify_token_permissions(self, client: hvac.Client) -> None:
+        """Verify the token has correct permissions to read secrets"""
+        try:
+            client.secrets.kv.v2.read_secret_version(
+                path="llm/connections/azure_openai/production/gpt-4o-mini",
+                mount_point="secret",
+            )
+            logger.info("Token has correct permissions to read secrets")
+        except Exception as e:
+            logger.error(f"Token cannot read secrets: {e}")
+            raise
+
+    def _check_agent_logs(self) -> None:
+        """Check vault-agent logs for debugging authentication issues"""
+        result = subprocess.run(
+            ["docker", "logs", "--tail", "50", "vault-agent-llm"],
+            capture_output=True,
+            text=True,
+        )
+        logger.error(f"Vault Agent Logs:\n{result.stdout}\n{result.stderr}")
+
+    def _wait_for_services(self, total_timeout: int = 300) -> None:
+        """Wait for all services to be healthy"""
+        services = [
+            ("qdrant", 6333, self._check_qdrant, 60),
+            ("ruuter-private", 8088, self._check_ruuter_private, 90),
+            ("ruuter-public", 8086, self._check_ruuter_public, 90),
+            ("langfuse-web", 3000, self._check_langfuse, 120),
+            ("llm-orchestration-service", 8100, self._check_orchestration, 180),
+        ]
+        start = time.time()
+        for name, port, check, timeout in services:
+            self._wait_single(name, port, check, timeout, start, total_timeout)
+
+    def _wait_single(
+        self,
+        name: str,
+        port: int,
+        check: Any,
+        timeout: int,
+        global_start: float,
+        total_timeout: int,
+    ) -> None:
+        """Wait for a single service to be ready"""
+        if self.compose is None:
+            return
+
+        logger.info(f"Waiting for {name}...")
+        start = time.time()
+        attempt = 0
+        while time.time() - start < timeout:
+            attempt += 1
+            elapsed = time.time() - start
+            try:
+                host = self.compose.get_service_host(name, port)
+                mapped_port = self.compose.get_service_port(name, port)
+                logger.debug(
+                    f"{name} - Attempt {attempt} ({elapsed:.1f}s) - Checking {host}:{mapped_port}"
+                )
+                if check(host, mapped_port):
+                    logger.info(
+                        f"{name} ready at {host}:{mapped_port} (took {elapsed:.1f}s, {attempt} attempts)"
+                    )
+                    self.services_info[name] = {
+                        "host": host,
+                        "port": mapped_port,
+                        "url": f"http://{host}:{mapped_port}",
+                    }
+                    return
+            except Exception as e:
+                logger.debug(f"{name} - Attempt {attempt} failed: {e}")
+            time.sleep(3)
+
+        elapsed_total = time.time() - start
+        raise TimeoutError(
+            f"Timeout waiting for {name} after {elapsed_total:.1f}s ({attempt} attempts)"
+        )
+
+    def _check_qdrant(self, host: str, port: int) -> bool:
+        """Check if Qdrant is ready"""
+        try:
+            r = requests.get(f"http://{host}:{port}/collections", timeout=5)
+            return r.status_code == 200
+        except Exception:
+            return False
+
+    def _check_ruuter_private(self, host: str, port: int) -> bool:
+        """Check if Ruuter Private is ready using the /health endpoint"""
+        try:
+            # Use the health endpoint we created for testing
+            r = requests.get(f"http://{host}:{port}/rag-search/health", timeout=5)
+            logger.debug(
+                f"Ruuter Private health check - Status: {r.status_code}, Response: {r.text[:100]}"
+            )
+
+            # If we get 200, Ruuter is processing DSL correctly
+            if r.status_code == 200:
+                logger.debug("Ruuter Private health check passed with 200 status")
+                return True
+
+            logger.debug(
+                f"Ruuter Private health check failed - unexpected status: {r.status_code}"
+            )
+            return False
+        except Exception as e:
+            logger.debug(
+                f"Ruuter Private health check exception: {type(e).__name__}: {e}"
+            )
+            return False
+
+    def _check_ruuter_public(self, host: str, port: int) -> bool:
+        """Check if Ruuter Public is ready using the /health endpoint"""
+        try:
+            # Use the health endpoint we created for testing
+            r = requests.get(f"http://{host}:{port}/rag-search/health", timeout=5)
+            logger.debug(
+                f"Ruuter Public health check - Status: {r.status_code}, Response: {r.text[:100]}"
+            )
+
+            # If we get 200, Ruuter is processing DSL correctly
+            if r.status_code == 200:
+                logger.debug("Ruuter Public health check passed with 200 status")
+                return True
+
+            logger.debug(
+                f"Ruuter Public health check failed - unexpected status: {r.status_code}"
+            )
+            return False
+        except Exception as e:
+            logger.debug(
+                f"Ruuter Public health check exception: {type(e).__name__}: {e}"
+            )
+            return False
+
+    def _check_langfuse(self, host: str, port: int) -> bool:
+        """Check if Langfuse is ready"""
+        try:
+            r = requests.get(f"http://{host}:{port}/api/public/health", timeout=5)
+            return r.status_code == 200
+        except Exception:
+            return False
+
+    def _check_orchestration(self, host: str, port: int) -> bool:
+        """Check if LLM orchestration service is healthy"""
+        try:
+            r = requests.get(f"http://{host}:{port}/health", timeout=5)
+            return r.status_code == 200 and r.json().get("status") == "healthy"
+        except Exception:
+            return False
+
+    def _collect_service_info(self) -> None:
+        """Collect service connection information"""
+        if self.compose:
+            self.services_info["vault"] = {
+                "host": self.compose.get_service_host("vault", 8200),
+                "port": self.compose.get_service_port("vault", 8200),
+                "url": self._get_vault_url(),
+            }
+
+    def _remove_ruuter_guard_files(self) -> None:
+        """
+        Remove .guard files from Ruuter DSL to disable authentication during tests.
+
+        The .guard files are used by Ruuter to enforce authentication on endpoints.
+        For integration tests, we need to disable this authentication.
+
+        Note: Files are simply removed (not backed up) since they're in git.
+        After tests, `git restore` can be used to restore them if needed.
+        """
+        guard_files = [
+            "DSL/Ruuter.private/rag-search/GET/.guard",
+            "DSL/Ruuter.private/rag-search/POST/.guard",
+            "DSL/Ruuter.private/rag-search/POST/accounts/.guard",
+        ]
+
+        for guard_file in guard_files:
+            guard_path = self.project_root / guard_file
+            if guard_path.exists():
+                try:
+                    guard_path.unlink()
+                    logger.info(f"Removed guard file: {guard_file}")
+                except Exception as e:
+                    logger.warning(f"Failed to remove guard file {guard_file}: {e}")
+            else:
+                logger.debug(f"Guard file not found (already removed?): {guard_file}")
+
+    def _create_ruuter_health_endpoint(self) -> None:
+        """
+        Create a simple /health endpoint for Ruuter health checks during tests.
+
+        This endpoint is created dynamically and not committed to the repository.
+        It's used to verify Ruuter is responding properly during test setup.
+        Creates health endpoints for both Ruuter.private and Ruuter.public.
+        """
+        health_dsl_content = """declaration:
+  call: declare
+  version: 0.1
+  description: "Health check endpoint for tests"
+  method: get
+  accepts: json
+  returns: json
+  namespace: rag-search
+
+return_health:
+  return: '{"status":"healthy","service":"ruuter"}'
+  next: end
+"""
+
+        # Create health endpoint for both Ruuter.private and Ruuter.public
+        for ruuter_dir in ["Ruuter.private", "Ruuter.public"]:
+            health_endpoint_dir = (
+                self.project_root / "DSL" / ruuter_dir / "rag-search" / "GET"
+            )
+            health_endpoint_dir.mkdir(parents=True, exist_ok=True)
+
+            health_endpoint_path = health_endpoint_dir / "health.yml"
+
+            try:
+                health_endpoint_path.write_text(health_dsl_content)
+                logger.info(
+                    f"Created {ruuter_dir} health endpoint: {health_endpoint_path}"
+                )
+            except Exception as e:
+                logger.warning(f"Failed to create {ruuter_dir} health endpoint: {e}")
+
+    def _remove_ruuter_health_endpoint(self) -> None:
+        """
+        Remove the dynamically created /health endpoint after tests complete.
+        Removes health endpoints from both Ruuter.private and Ruuter.public.
+        """
+        # Remove health endpoint from both Ruuter.private and Ruuter.public
+        for ruuter_dir in ["Ruuter.private", "Ruuter.public"]:
+            health_endpoint_path = (
+                self.project_root
+                / "DSL"
+                / ruuter_dir
+                / "rag-search"
+                / "GET"
+                / "health.yml"
+            )
+
+            if health_endpoint_path.exists():
+                try:
+                    health_endpoint_path.unlink()
+                    logger.info(f"Removed {ruuter_dir} health endpoint")
+                except Exception as e:
+                    logger.warning(
+                        f"Failed to remove {ruuter_dir} health endpoint: {e}"
+                    )
+            else:
+                logger.debug(
+                    f"{ruuter_dir} health endpoint file not found (already removed?)"
+                )
+
+    def get_orchestration_service_url(self) -> str:
+        """Get the URL for the LLM orchestration service"""
+        return self.services_info["llm-orchestration-service"]["url"]
+
+    def get_qdrant_url(self) -> str:
+        """Get the URL for Qdrant"""
+        return self.services_info["qdrant"]["url"]
+
+    def get_vault_url(self) -> str:
+        """Get the URL for Vault"""
+        return self.services_info["vault"]["url"]
+
+    def get_langfuse_url(self) -> str:
+        """Get the URL for Langfuse"""
+        return self.services_info.get("langfuse-web", {}).get(
+            "url", "http://localhost:3000"
+        )
+
+    def is_service_available(self, service_name: str) -> bool:
+        """Check if a service is available"""
+        return service_name in self.services_info
+
+
+# ===================== Pytest Fixtures =====================
+
+
+@pytest.fixture(scope="session")
+def rag_stack() -> Generator[RAGStackTestContainers, None, None]:
+    """
+    Session-scoped fixture that starts all test containers once per test session.
+    Containers are automatically stopped after all tests complete.
+    """
+    stack = RAGStackTestContainers()
+    try:
+        stack.start()
+        yield stack
+    except Exception as e:
+        # If startup fails, capture logs before cleanup
+        logger.error(f"RAG stack startup failed: {e}")
+        try:
+            stack._capture_service_logs()
+        except Exception as e:
+            logger.error(f"Could not capture logs after startup failure: {e}")
+        raise
+    finally:
+        logger.info("=" * 80)
+        logger.info("CAPTURING SERVICE LOGS BEFORE CLEANUP")
+        logger.info("=" * 80)
+        try:
+            stack._capture_service_logs()
+        except Exception as e:
+            logger.error(f"Could not capture logs: {e}")
+        stack.stop()
+
+
+@pytest.fixture(scope="function")
+def orchestration_client(rag_stack: RAGStackTestContainers) -> Any:
+    """
+    Function-scoped fixture that provides a configured requests session
+    for testing the LLM orchestration service API.
+    """
+    session = requests.Session()
+    session.headers.update(
+        {"Content-Type": "application/json", "Accept": "application/json"}
+    )
+    setattr(session, "base_url", rag_stack.get_orchestration_service_url())
+    return session
+
+
+@pytest.fixture(scope="session")
+def minio_client(rag_stack):
+    """Create MinIO client connected to test instance."""
+    client = Minio(
+        "localhost:9000",
+        access_key="minio",
+        secret_key="miniosecret",
+        secure=False,
+    )
+    return client
+
+
+@pytest.fixture(scope="session")
+def qdrant_client(rag_stack):
+    """Create Qdrant client connected to test instance."""
+    client = QdrantClient(host="localhost", port=6333)
+    return client
+
+
+@pytest.fixture
+def test_bucket(minio_client: Minio):
+    """Create a test bucket with public read access and clean it up after test."""
+    bucket_name = "test-integration-bucket"
+
+    # Create bucket if it doesn't exist
+    if not minio_client.bucket_exists(bucket_name):
+        minio_client.make_bucket(bucket_name)
+
+        # Set bucket policy to allow public read access
+        policy = {
+            "Version": "2012-10-17",
+            "Statement": [
+                {
+                    "Effect": "Allow",
+                    "Principal": {"AWS": "*"},
+                    "Action": ["s3:GetObject"],
+                    "Resource": [f"arn:aws:s3:::{bucket_name}/*"],
+                }
+            ],
+        }
+
+        minio_client.set_bucket_policy(bucket_name, json.dumps(policy))
+
+    yield bucket_name
+
+    # Cleanup: remove all objects and bucket
+    try:
+        objects = minio_client.list_objects(bucket_name, recursive=True)
+        for obj in objects:
+            minio_client.remove_object(bucket_name, obj.object_name)
+        minio_client.remove_bucket(bucket_name)
+    except Exception as e:
+        # Ignore cleanup errors - bucket may not exist or objects already deleted
+        # This is acceptable in test teardown as it doesn't affect test results
+        logger.debug(f"MinIO cleanup failed for bucket {bucket_name}: {e}")
+
+
+@pytest.fixture
+def test_document(test_bucket: str, minio_client: Minio, tmp_path: Path):
+    """
+    Create a test document with cleaned.txt and source.meta.json.
+
+    Returns tuple of (bucket_name, object_prefix, local_path)
+    """
+    # Create test document directory structure
+    doc_dir = tmp_path / "test_doc"
+    doc_dir.mkdir()
+
+    # Create cleaned.txt with sample content
+    cleaned_content = """This is a test document for integration testing.
+
+It contains multiple paragraphs to test chunking.
+
+The document discusses RAG (Retrieval-Augmented Generation) systems.
+
+RAG combines retrieval mechanisms with language models.
+
+This helps provide accurate and contextual responses.
+
+Integration testing ensures all components work together correctly.
+"""
+    cleaned_file = doc_dir / "cleaned.txt"
+    cleaned_file.write_text(cleaned_content)
+
+    # Create source.meta.json
+    meta_content = {
+        "source": "integration_test",
+        "title": "Test Document",
+        "created_at": "2025-01-01T00:00:00Z",
+        "author": "Test Suite",
+    }
+    meta_file = doc_dir / "source.meta.json"
+    meta_file.write_text(json.dumps(meta_content))
+
+    # Upload to MinIO
+    object_prefix = "test_documents/doc1"
+
+    minio_client.fput_object(
+        test_bucket, f"{object_prefix}/cleaned.txt", str(cleaned_file)
+    )
+    minio_client.fput_object(
+        test_bucket, f"{object_prefix}/source.meta.json", str(meta_file)
+    )
+
+    return test_bucket, object_prefix, doc_dir
+
+
+@pytest.fixture
+def presigned_url(minio_client: Minio, test_document):
+    """
+    Generate presigned URL for test document.
+
+    Note: For actual testing, you may need to create a zip archive
+    and generate a presigned URL for that.
+    """
+    bucket_name, object_prefix, _ = test_document
+
+    # Generate presigned URL (valid for 1 hour)
+    from datetime import timedelta
+
+    url = minio_client.presigned_get_object(
+        bucket_name, f"{object_prefix}/cleaned.txt", expires=timedelta(hours=1)
+    )
+
+    return url
+
+
+@pytest.fixture(scope="session")
+def qdrant_collections():
+    """List of Qdrant collection names used by the indexer."""
+    return ["contextual_chunks_azure", "contextual_chunks_aws"]
+
+
+@pytest.fixture(scope="session")
+def llm_orchestration_url(rag_stack):
+    """
+    URL for the LLM orchestration service.
+
+    Depends on rag_stack to ensure all services are started and Vault is populated
+    with LLM connection secrets before tests run.
+    """
+    return rag_stack.get_orchestration_service_url()
+
+
+@pytest.fixture(scope="session")
+def vault_client(rag_stack):
+    """Create Vault client connected to test instance using root token (dev mode)."""
+    vault_url = rag_stack.get_vault_url()
+
+    # In test environment, Vault runs in dev mode with known root token
+    # This is simpler and avoids permission issues with agent-out token files
+    client = hvac.Client(url=vault_url, token="root")
+
+    # Verify connection
+    if not client.is_authenticated():
+        raise RuntimeError("Failed to authenticate with Vault using root token")
+
+    logger.info("Vault client authenticated using dev mode root token")
+
+    # Create a simple wrapper to match VaultAgentClient interface
+    class SimpleVaultClient:
+        def __init__(self, hvac_client):
+            self.client = hvac_client
+
+        def get_secret(self, path: str, mount_point: str = "secret") -> dict:
+            """Read a secret from Vault KV v2"""
+            result = self.client.secrets.kv.v2.read_secret_version(
+                path=path, mount_point=mount_point
+            )
+            return result["data"]["data"]
+
+    return SimpleVaultClient(client)
+
+
+@pytest.fixture(scope="session")
+def postgres_client(rag_stack):
+    """Create PostgreSQL client connected to test database."""
+    import psycopg2
+
+    # Wait for database to be ready
+    max_attempts = 30
+    for attempt in range(max_attempts):
+        try:
+            conn = psycopg2.connect(
+                host="localhost",
+                port=5436,
+                database="rag-search",
+                user="postgres",
+                password="dbadmin",
+            )
+            logger.info("PostgreSQL connection established")
+            yield conn
+            conn.close()
+            return
+        except psycopg2.OperationalError:
+            if attempt < max_attempts - 1:
+                time.sleep(2)
+            else:
+                raise
+
+    raise TimeoutError("Could not connect to PostgreSQL")
+
+
+@pytest.fixture(scope="session")
+def setup_agency_sync_schema(postgres_client):
+    """Create agency_sync and mock_ckb tables for data update tests."""
+    cursor = postgres_client.cursor()
+    try:
+        cursor.execute("""
+            CREATE TABLE IF NOT EXISTS public.agency_sync (
+                agency_id VARCHAR(255) PRIMARY KEY,
+                agency_data_hash VARCHAR(255),
+                data_url TEXT,
+                created_at TIMESTAMP DEFAULT NOW(),
+                updated_at TIMESTAMP DEFAULT NOW()
+            )
+        """)
+
+        cursor.execute("""
+            CREATE TABLE IF NOT EXISTS public.mock_ckb (
+                client_id VARCHAR(255) PRIMARY KEY,
+                client_data_hash VARCHAR(255) NOT NULL,
+                signed_s3_url TEXT NOT NULL,
+                created_at TIMESTAMP DEFAULT NOW()
+            )
+        """)
+
+        postgres_client.commit()
+        logger.info("Agency sync and mock CKB tables created")
+    except Exception as e:
+        logger.error(f"Failed to create tables: {e}")
+        raise
+    finally:
+        cursor.close()
+
+
+@pytest.fixture(scope="function")
+def ruuter_private_client(rag_stack: RAGStackTestContainers):
+    """
+    Function-scoped fixture that provides a configured requests session
+    for testing Ruuter Private API endpoints.
+
+    Ruuter Private is the routing layer that handles requests to the LLM orchestration
+    service via DSL-defined endpoints.
+
+    If Ruuter Private service is not available, tests using this fixture will be skipped.
+    """
+    # Check if Ruuter Private service is available
+    if "ruuter-private" not in rag_stack.services_info:
+        pytest.skip("Ruuter Private service not available")
+
+    session = requests.Session()
+    session.headers.update(
+        {"Content-Type": "application/json", "Accept": "application/json"}
+    )
+    # Ruuter Private runs on port 8088 in test environment
+    setattr(session, "base_url", "http://localhost:8088")
+    return session
+
+
+@pytest.fixture(scope="function")
+def ruuter_public_client(rag_stack: RAGStackTestContainers):
+    """
+    Function-scoped fixture that provides a configured requests session
+    for testing Ruuter Public API endpoints.
+
+    Ruuter Public is the routing layer that handles requests to the LLM orchestration
+    service via DSL-defined endpoints.
+
+    If Ruuter Public service is not available, tests using this fixture will be skipped.
+    """
+    # Check if Ruuter Public service is available
+    if "ruuter-public" not in rag_stack.services_info:
+        pytest.skip("Ruuter Public service not available")
+
+    session = requests.Session()
+    session.headers.update(
+        {"Content-Type": "application/json", "Accept": "application/json"}
+    )
+    # Ruuter Public runs on port 8088 in test environment
+    setattr(session, "base_url", "http://localhost:8086")
+    return session
+
+
+@pytest.fixture(scope="session")
+def sample_test_data():
+    """Load test data for inference tests."""
+    test_data_path = Path(__file__).parent / "inference_test_data.json"
+
+    if not test_data_path.exists():
+        # Fallback to inline data if file doesn't exist
+        logger.warning(
+            f"Test data file not found at {test_data_path}, using fallback data"
+        )
+        return [
+            {
+                "question": "What is the retirement age?",
+                "category": "pension_information",
+                "expected_scope": True,
+                "expected_keywords": ["retirement", "age", "pension"],
+                "description": "Simple pension question",
+            },
+            {
+                "question": "What is the capital of Mars?",
+                "category": "out_of_scope",
+                "expected_scope": False,
+                "expected_keywords": [],
+                "description": "Out of scope question",
+            },
+        ]
+
+    with open(test_data_path, "r") as f:
+        data = json.load(f)
+
+    logger.info(f"Loaded {len(data)} test cases from {test_data_path}")
+    return data
+
+
+@pytest.fixture(scope="function")
+def ensure_testing_connection(postgres_client, ruuter_private_client, rag_stack):
+    """
+    Ensure a testing gpt-4o-mini LLM connection exists for testing inference tests.
+
+    This fixture checks if a testing connection with gpt-4o-mini exists.
+    If not found, it creates one via the Ruuter API.
+
+    Note: Uses 'testing' environment to leverage the simpler /inference/test endpoint.
+    """
+    cursor = postgres_client.cursor()
+    try:
+        # First, check what connections exist in the database
+        cursor.execute(
+            "SELECT id, connection_name, environment, llm_model FROM llm_connections "
+            "ORDER BY id"
+        )
+        all_connections = cursor.fetchall()
+        logger.info(f"All connections in database: {len(all_connections)}")
+        for conn in all_connections:
+            logger.info(
+                f"  - ID={conn[0]}, Name='{conn[1]}', Env={conn[2]}, Model={conn[3]}"
+            )
+
+        # Check for existing testing connection with gpt-4o-mini
+        cursor.execute(
+            "SELECT id, connection_name FROM llm_connections "
+            "WHERE environment = 'testing' AND llm_model = 'gpt-4o-mini' "
+            "LIMIT 1"
+        )
+        row = cursor.fetchone()
+
+        if row is not None:
+            connection_id, connection_name = row
+            logger.info(
+                f"Found existing testing gpt-4o-mini connection: "
+                f"ID={connection_id}, Name='{connection_name}'"
+            )
+            logger.warning(
+                f"IMPORTANT: Vault secret must exist at path: "
+                f"llm/connections/azure_openai/test/{connection_id}"
+            )
+            return connection_id
+
+        # No testing gpt-4o-mini found - create one
+        logger.info("No testing gpt-4o-mini connection found. Creating one...")
+
+        payload = {
+            "connection_name": "Testing gpt-4o-mini for Production Tests",
+            "llm_platform": "azure",
+            "llm_model": "gpt-4o-mini",
+            "deployment_name": "gpt-4o-mini-deployment-test",
+            "target_uri": "https://test-production.openai.azure.com/",
+            "api_key": "test-production-api-key",
+            "embedding_platform": "azure",
+            "embedding_model": "text-embedding-3-large",
+            "embedding_deployment_name": "text-embedding-prod-deployment",
+            "embedding_target_uri": "https://test-production.openai.azure.com/",
+            "embedding_azure_api_key": "test-embedding-prod-key",
+            "monthly_budget": 10000.00,
+            "warn_budget_threshold": 80,
+            "stop_budget_threshold": 95,
+            "disconnect_on_budget_exceed": False,
+            "deployment_environment": "testing",
+        }
+
+        response = requests.post(
+            f"{ruuter_private_client.base_url}/rag-search/llm-connections/add",
+            json=payload,
+            timeout=30,
+        )
+
+        if response.status_code != 200:
+            raise RuntimeError(f"Failed to create testing connection: {response.text}")
+
+        data = response.json()
+        response_data = data.get("response", data)
+        connection_id = response_data["id"]
+
+        logger.info(f"Created testing gpt-4o-mini connection with ID: {connection_id}")
+        logger.warning(
+            f"IMPORTANT: Vault secret must exist at path: "
+            f"llm/connections/azure_openai/test/{connection_id}"
+        )
+        logger.warning(
+            "Currently hardcoded vault path is: llm/connections/azure_openai/test/1"
+        )
+        if connection_id != 1:
+            logger.error(
+                f"CONNECTION ID MISMATCH! Database assigned ID={connection_id}, "
+                f"but vault secret is at path .../test/1"
+            )
+
+        # Wait for database write
+        time.sleep(2)
+
+        return connection_id
+
+    finally:
+        cursor.close()
+
+
+@pytest.fixture(scope="session", autouse=True)
+def capture_container_logs_on_exit(rag_stack):
+    """
+    Capture Docker container logs at the end of the test session.
+
+    This runs automatically after all tests complete but before testcontainers
+    shuts down the Docker containers. Logs are printed to pytest output which
+    appears in GitHub Actions logs.
+    """
+    yield  # Let all tests run first
+
+    # After all tests complete, capture logs before containers are destroyed
+    import subprocess
+
+    logger.info("")
+    logger.info("=" * 80)
+    logger.info("CAPTURING CONTAINER LOGS BEFORE SHUTDOWN")
+    logger.info("=" * 80)
+
+    containers = [
+        ("llm-orchestration-service", 500),
+        ("ruuter", 200),
+        ("resql", 200),
+        ("qdrant", 100),
+    ]
+
+    for container_name, tail_lines in containers:
+        try:
+            logger.info("")
+            logger.info(f"{'=' * 80}")
+            logger.info(f"{container_name.upper()} LOGS (last {tail_lines} lines)")
+            logger.info(f"{'=' * 80}")
+
+            result = subprocess.run(
+                ["docker", "logs", container_name, "--tail", str(tail_lines)],
+                capture_output=True,
+                text=True,
+                timeout=10,
+            )
+
+            if result.stdout:
+                logger.info(result.stdout)
+            if result.stderr:
+                logger.info("--- STDERR ---")
+                logger.info(result.stderr)
+
+        except subprocess.TimeoutExpired:
+            logger.warning(f"Timeout while capturing logs from {container_name}")
+        except Exception as e:
+            logger.warning(f"Failed to capture logs from {container_name}: {e}")
+
+    logger.info("LOG CAPTURE COMPLETE")
diff --git a/tests/integration_tests/inference_test_data.json b/tests/integration_tests/inference_test_data.json
new file mode 100644
index 0000000..9b6ad27
--- /dev/null
+++ b/tests/integration_tests/inference_test_data.json
@@ -0,0 +1,44 @@
+[
+  {
+    "question": "What is the retirement age in Estonia?",
+    "category": "pension_information",
+    "expected_scope": true,
+    "expected_keywords": ["retirement", "age", "pension", "estonia"],
+    "description": "Simple question about pension eligibility"
+  },
+  {
+    "question": "How do I apply for family benefits?",
+    "category": "family_benefits",
+    "expected_scope": true,
+    "expected_keywords": ["family", "benefits", "apply"],
+    "description": "Question about family benefits application process"
+  },
+  {
+    "question": "What documents are needed for unemployment benefits?",
+    "category": "unemployment_benefits",
+    "expected_scope": true,
+    "expected_keywords": ["documents", "unemployment", "benefits"],
+    "description": "Question about required documentation"
+  },
+  {
+    "question": "What is the capital of Mars?",
+    "category": "out_of_scope",
+    "expected_scope": false,
+    "expected_keywords": [],
+    "description": "Question completely outside the knowledge base"
+  },
+  {
+    "question": "How do I make a chocolate cake?",
+    "category": "out_of_scope",
+    "expected_scope": false,
+    "expected_keywords": [],
+    "description": "Unrelated question to test scope detection"
+  },
+  {
+    "question": "Tell me about parental leave policies",
+    "category": "family_benefits",
+    "expected_scope": true,
+    "expected_keywords": ["parental", "leave", "policy"],
+    "description": "Question about parental leave for conversation history test"
+  }
+]
\ No newline at end of file
diff --git a/tests/integration_tests/test_indexing.py b/tests/integration_tests/test_indexing.py
new file mode 100644
index 0000000..a792d2a
--- /dev/null
+++ b/tests/integration_tests/test_indexing.py
@@ -0,0 +1,519 @@
+"""
+Integration tests for the vector indexing pipeline.
+
+These tests verify the full flow:
+1. Upload document to MinIO
+2. Generate presigned URL
+3. Run VectorIndexer
+4. Verify embeddings in Qdrant
+"""
+
+import pytest
+import zipfile
+import tempfile
+from pathlib import Path
+from datetime import timedelta
+import json
+import requests
+import sys
+import time
+from loguru import logger
+
+from minio import Minio
+from qdrant_client import QdrantClient
+
+# Add src to path for imports
+sys.path.insert(0, str(Path(__file__).parent.parent.parent / "src"))
+
+
+class TestIndexingPipeline:
+    """Test the complete indexing pipeline from MinIO to Qdrant."""
+
+    def test_minio_connection(self, minio_client: Minio):
+        """Verify MinIO is accessible."""
+        # List buckets to verify connection
+        buckets = minio_client.list_buckets()
+        assert buckets is not None
+
+    def test_qdrant_connection(self, qdrant_client: QdrantClient):
+        """Verify Qdrant is accessible."""
+        # Get collections to verify connection
+        collections = qdrant_client.get_collections()
+        assert collections is not None
+
+    def test_create_and_upload_document(self, minio_client: Minio, test_bucket: str):
+        """Test document upload to MinIO."""
+        # Verify bucket was created
+        assert minio_client.bucket_exists(test_bucket)
+
+        # Create and upload a simple test file
+        with tempfile.NamedTemporaryFile(mode="w", suffix=".txt", delete=False) as f:
+            f.write("Test content")
+            temp_path = f.name
+
+        try:
+            minio_client.fput_object(test_bucket, "test.txt", temp_path)
+
+            # Verify object exists
+            stat = minio_client.stat_object(test_bucket, "test.txt")
+            assert stat is not None
+            assert stat.size > 0
+        finally:
+            Path(temp_path).unlink()
+
+    def test_presigned_url_generation(self, minio_client: Minio, test_document):
+        """Test presigned URL generation."""
+        bucket_name, object_prefix, _ = test_document
+
+        # Generate presigned URL
+        url = minio_client.presigned_get_object(
+            bucket_name, f"{object_prefix}/cleaned.txt", expires=timedelta(hours=1)
+        )
+
+        assert url is not None
+        assert "localhost:9000" in url
+        assert bucket_name in url
+
+    def test_document_structure(self, minio_client: Minio, test_document):
+        """Verify test document has correct structure."""
+        bucket_name, object_prefix, local_path = test_document
+
+        # Check local files exist
+        cleaned_file = local_path / "cleaned.txt"
+        meta_file = local_path / "source.meta.json"
+
+        assert cleaned_file.exists()
+        assert meta_file.exists()
+
+        # Verify content
+        content = cleaned_file.read_text()
+        assert "RAG" in content
+        assert "integration testing" in content
+
+        # Verify metadata
+        meta = json.loads(meta_file.read_text())
+        assert meta["source"] == "integration_test"
+        assert "title" in meta
+
+    @pytest.mark.asyncio
+    async def test_indexing_pipeline_e2e(
+        self,
+        rag_stack,
+        minio_client: Minio,
+        qdrant_client: QdrantClient,
+        test_bucket: str,
+        postgres_client,
+        setup_agency_sync_schema,
+        tmp_path: Path,
+        llm_orchestration_url: str,
+    ):
+        """
+        End-to-end test of the indexing pipeline using Ruuter and Cron-Manager.
+
+        This test:
+        1. Creates test document and uploads to MinIO
+        2. Generates presigned URL
+        3. Prepares database (agency_sync + mock_ckb)
+        4. Calls Ruuter endpoint to trigger indexing via Cron-Manager
+        5. Waits for async indexing to complete (polls Qdrant)
+        6. Verifies vectors stored in Qdrant
+        """
+        # Step 0: Wait for LLM orchestration service to be healthy
+        max_retries = 30
+        for i in range(max_retries):
+            try:
+                response = requests.get(f"{llm_orchestration_url}/health", timeout=5)
+                if response.status_code == 200:
+                    health_data = response.json()
+                    if health_data.get("orchestration_service") == "initialized":
+                        break
+            except requests.exceptions.RequestException:
+                logger.debug(
+                    f"LLM orchestration health check attempt {i+1}/{max_retries} failed"
+                )
+            time.sleep(2)
+        else:
+            pytest.fail("LLM orchestration service not healthy after 60 seconds")
+
+        # Step 1: Create test document and upload to MinIO
+        # Create structure: test_agency/<hash_dir>/cleaned.txt
+        # so when extracted it becomes: extracted_datasets/test_agency/<hash_dir>/cleaned.txt
+        # The document loader expects: collection/hash_dir/cleaned.txt
+        source_dir = tmp_path / "source"
+        hash_dir = source_dir / "test_agency" / "doc_hash_001"
+        hash_dir.mkdir(parents=True)
+        dataset_dir = hash_dir
+
+        cleaned_content = """This is an integration test document for the RAG Module.
+
+It tests the full vector indexing pipeline from end to end.
+
+The document will be chunked and embedded using the configured embedding model.
+
+Each chunk will be stored in Qdrant with contextual information generated by the LLM.
+
+The RAG (Retrieval-Augmented Generation) system uses semantic search to find relevant documents.
+
+Vector embeddings are numerical representations of text that capture semantic meaning.
+
+Qdrant is a vector database that enables fast similarity search across embeddings.
+
+The contextual retrieval process adds context to each chunk before embedding.
+
+This helps improve search accuracy by providing more context about each chunk's content.
+
+The LLM orchestration service manages connections to various language model providers.
+
+Supported providers include Azure OpenAI and AWS Bedrock for both LLM and embedding models.
+
+Integration testing ensures all components work together correctly in the pipeline.
+
+The MinIO object storage is used to store and retrieve dataset files for processing.
+
+Presigned URLs allow secure, temporary access to objects in MinIO buckets.
+
+The vector indexer downloads datasets, processes documents, and stores embeddings.
+
+Each document goes through chunking, contextual enrichment, and embedding stages.
+
+The final embeddings are upserted into Qdrant collections for later retrieval.
+
+This test verifies the complete flow from upload to storage in the vector database.
+"""
+        (dataset_dir / "cleaned.txt").write_text(cleaned_content)
+
+        meta = {
+            "source": "e2e_test",
+            "title": "E2E Test Document",
+            "agency_id": "test_agency",
+        }
+        (dataset_dir / "cleaned.meta.json").write_text(json.dumps(meta))
+
+        # Create ZIP without datasets/ prefix - just test_agency/files
+        zip_path = tmp_path / "test_dataset.zip"
+        with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zf:
+            for file in dataset_dir.rglob("*"):
+                if file.is_file():
+                    # Archive path: test_agency/cleaned.txt
+                    arcname = file.relative_to(source_dir)
+                    zf.write(file, arcname)
+
+        object_name = "datasets/test_dataset.zip"
+        minio_client.fput_object(test_bucket, object_name, str(zip_path))
+
+        # Use simple direct URL instead of presigned URL
+        # Bucket is public, so no signature needed
+        dataset_url = f"http://minio:9000/{test_bucket}/{object_name}"
+        logger.info(f"Dataset URL for Docker network: {dataset_url}")
+
+        # Step 1: Prepare database state for agency sync
+        cursor = postgres_client.cursor()
+        try:
+            # Insert agency_sync record with initial hash
+            cursor.execute(
+                """
+                INSERT INTO public.agency_sync (agency_id, agency_data_hash, data_url)
+                VALUES (%s, %s, %s)
+                ON CONFLICT (agency_id) DO UPDATE
+                SET agency_data_hash = EXCLUDED.agency_data_hash
+                """,
+                ("test_agency", "initial_hash_000", ""),
+            )
+
+            # Insert mock CKB data with new hash and presigned URL
+            cursor.execute(
+                """
+                INSERT INTO public.mock_ckb (client_id, client_data_hash, signed_s3_url)
+                VALUES (%s, %s, %s)
+                ON CONFLICT (client_id) DO UPDATE
+                SET client_data_hash = EXCLUDED.client_data_hash,
+                    signed_s3_url = EXCLUDED.signed_s3_url
+                """,
+                ("test_agency", "new_hash_001", dataset_url),
+            )
+
+            postgres_client.commit()
+            logger.info(
+                "Database prepared: agency_sync (initial_hash_000) and mock_ckb (new_hash_001)"
+            )
+        finally:
+            cursor.close()
+
+        # Step 2: Call Ruuter Public endpoint to trigger indexing via Cron-Manager
+        logger.info("Calling /rag-search/data/update to trigger indexing...")
+        ruuter_public_url = "http://localhost:8086"
+
+        response = requests.post(
+            f"{ruuter_public_url}/rag-search/data/update",
+            json={},  # No body required
+            timeout=60,
+        )
+
+        assert response.status_code == 200, (
+            f"Expected 200, got {response.status_code}: {response.text}"
+        )
+        data = response.json()
+        response_data = data.get("response", {})
+        assert response_data.get("operationSuccessful") is True, (
+            f"Operation failed: {data}"
+        )
+        logger.info(
+            f"Indexing triggered successfully: {response_data.get('message', 'No message')}"
+        )
+
+        # Give Cron-Manager time to start the indexing process
+        logger.info("Waiting 5 seconds for Cron-Manager to start indexing...")
+        time.sleep(5)
+
+        # Step 3: Wait for indexing to complete (poll Qdrant with verbose logging)
+        import asyncio
+
+        max_wait = 120  # 2 minutes
+        poll_interval = 5  # seconds
+        start_time = time.time()
+
+        logger.info(f"Waiting for indexing to complete (max {max_wait}s)...")
+
+        # First, wait for collection to be created
+        collection_created = False
+        logger.info("Waiting for collection 'contextual_chunks_azure' to be created...")
+
+        while time.time() - start_time < max_wait:
+            elapsed = time.time() - start_time
+
+            try:
+                # Try to get collection info (will fail if doesn't exist)
+                collection_info = qdrant_client.get_collection(
+                    "contextual_chunks_azure"
+                )
+                if collection_info:
+                    logger.info(
+                        f"[{elapsed:.1f}s] Collection 'contextual_chunks_azure' created!"
+                    )
+                    collection_created = True
+                    break
+            except Exception as e:
+                logger.debug(
+                    f"[{elapsed:.1f}s] Collection not yet created: {type(e).__name__}"
+                )
+
+            await asyncio.sleep(poll_interval)
+
+        if not collection_created:
+            # Capture Cron-Manager logs for debugging
+            import subprocess
+
+            try:
+                logger.error(
+                    "Collection was not created - capturing Cron-Manager logs..."
+                )
+                result = subprocess.run(
+                    ["docker", "logs", "cron-manager", "--tail", "200"],
+                    capture_output=True,
+                    text=True,
+                    timeout=10,
+                )
+                logger.error("=" * 80)
+                logger.error("CRON-MANAGER LOGS:")
+                logger.error("=" * 80)
+                if result.stdout:
+                    logger.error(result.stdout)
+                if result.stderr:
+                    logger.error(f"STDERR: {result.stderr}")
+            except Exception as e:
+                logger.error(f"Failed to capture logs: {e}")
+
+            pytest.fail(
+                f"Collection 'contextual_chunks_azure' was not created within {max_wait}s timeout"
+            )
+
+        # Now wait for documents to be indexed
+        indexing_completed = False
+        logger.info("Waiting for documents to be indexed in contextual_chunks_azure...")
+        poll_count = 0
+        while time.time() - start_time < max_wait:
+            elapsed = time.time() - start_time
+            poll_count += 1
+
+            try:
+                azure_points = qdrant_client.count(
+                    collection_name="contextual_chunks_azure"
+                )
+                current_count = azure_points.count
+
+                logger.info(
+                    f"[{elapsed:.1f}s] Polling Qdrant: {current_count} documents in contextual_chunks_azure"
+                )
+
+                if current_count > 0:
+                    logger.info(
+                        f"✓ Indexing completed successfully in {elapsed:.1f}s with {current_count} documents"
+                    )
+                    indexing_completed = True
+                    break
+
+                # After 30 seconds with no documents, check Cron-Manager logs once
+                if poll_count == 6 and current_count == 0:
+                    import subprocess
+
+                    try:
+                        logger.warning(
+                            "No documents after 30s - checking Cron-Manager logs..."
+                        )
+                        result = subprocess.run(
+                            ["docker", "logs", "cron-manager", "--tail", "100"],
+                            capture_output=True,
+                            text=True,
+                            timeout=5,
+                        )
+                        if (
+                            "error" in result.stdout.lower()
+                            or "failed" in result.stdout.lower()
+                        ):
+                            logger.error("Found errors in Cron-Manager logs:")
+                            logger.error(result.stdout[-2000:])  # Last 2000 chars
+                    except Exception as e:
+                        logger.warning(f"Could not check logs: {e}")
+
+            except Exception as e:
+                logger.warning(f"[{elapsed:.1f}s] Qdrant polling error: {e}")
+
+            await asyncio.sleep(poll_interval)
+
+        if not indexing_completed:
+            # Capture final state and Cron-Manager logs
+            try:
+                final_count = qdrant_client.count(
+                    collection_name="contextual_chunks_azure"
+                )
+                logger.error(
+                    f"Final count after timeout: {final_count.count} documents"
+                )
+            except Exception as e:
+                logger.error(f"Could not get final count: {e}")
+
+            # Get Cron-Manager logs to see what happened
+            import subprocess
+
+            try:
+                logger.error("=" * 80)
+                logger.error("CRON-MANAGER LOGS (indexing phase):")
+                logger.error("=" * 80)
+                result = subprocess.run(
+                    ["docker", "logs", "cron-manager", "--tail", "300"],
+                    capture_output=True,
+                    text=True,
+                    timeout=10,
+                )
+                if result.stdout:
+                    logger.error(result.stdout)
+                if result.stderr:
+                    logger.error(f"STDERR: {result.stderr}")
+            except Exception as e:
+                logger.error(f"Failed to capture logs: {e}")
+
+            pytest.fail(
+                f"Indexing did not complete within {max_wait}s timeout - no documents found in collection"
+            )
+
+        # Step 4: Verify vectors are stored in Qdrant
+        collections_to_check = ["contextual_chunks_azure", "contextual_chunks_aws"]
+        total_points = 0
+
+        for collection_name in collections_to_check:
+            try:
+                collection_info = qdrant_client.get_collection(collection_name)
+                if collection_info:
+                    total_points += collection_info.points_count
+            except Exception:
+                # Collection might not exist
+                pass
+
+        assert total_points > 0, (
+            f"No vectors stored in Qdrant. Expected chunks but found {total_points} points."
+        )
+
+        logger.info(
+            f"E2E Test passed: Indexing completed via Ruuter/Cron-Manager, "
+            f"{total_points} points stored in Qdrant"
+        )
+
+
+class TestQdrantOperations:
+    """Test Qdrant-specific operations."""
+
+    def test_collection_operations(self, qdrant_client: QdrantClient):
+        """Test creating and querying collections."""
+        from qdrant_client.models import Distance, VectorParams
+
+        test_collection = "test_integration_collection"
+
+        try:
+            # Create collection
+            qdrant_client.create_collection(
+                collection_name=test_collection,
+                vectors_config=VectorParams(size=1536, distance=Distance.COSINE),
+            )
+
+            # Verify collection exists
+            collections = qdrant_client.get_collections()
+            collection_names = [c.name for c in collections.collections]
+            assert test_collection in collection_names
+
+            # Get collection info
+            info = qdrant_client.get_collection(test_collection)
+            assert info.config.params.vectors.size == 1536
+
+        finally:
+            # Cleanup
+            try:
+                qdrant_client.delete_collection(test_collection)
+            except Exception:
+                pass
+
+    def test_point_operations(self, qdrant_client: QdrantClient):
+        """Test inserting and querying points."""
+        from qdrant_client.models import Distance, VectorParams, PointStruct
+
+        test_collection = "test_points_collection"
+
+        try:
+            # Create collection
+            qdrant_client.create_collection(
+                collection_name=test_collection,
+                vectors_config=VectorParams(size=4, distance=Distance.COSINE),
+            )
+
+            # Insert points
+            points = [
+                PointStruct(
+                    id=1,
+                    vector=[0.1, 0.2, 0.3, 0.4],
+                    payload={"document_hash": "test123", "text": "test chunk"},
+                ),
+                PointStruct(
+                    id=2,
+                    vector=[0.2, 0.3, 0.4, 0.5],
+                    payload={"document_hash": "test123", "text": "another chunk"},
+                ),
+            ]
+
+            qdrant_client.upsert(collection_name=test_collection, points=points)
+
+            # Query by filter
+            results = qdrant_client.scroll(
+                collection_name=test_collection,
+                scroll_filter={
+                    "must": [{"key": "document_hash", "match": {"value": "test123"}}]
+                },
+                limit=10,
+            )
+
+            assert len(results[0]) == 2
+
+        finally:
+            # Cleanup
+            try:
+                qdrant_client.delete_collection(test_collection)
+            except Exception:
+                pass
diff --git a/tests/integration_tests/test_inference.py b/tests/integration_tests/test_inference.py
new file mode 100644
index 0000000..7529479
--- /dev/null
+++ b/tests/integration_tests/test_inference.py
@@ -0,0 +1,102 @@
+"""
+Integration tests for LLM inference pipeline.
+
+These tests verify:
+1. Production and testing inference endpoints
+2. Complete RAG pipeline (guardrails → refinement → retrieval → generation)
+3. Database storage of inference results
+4. Error handling and edge cases
+5. Contextual retrieval integration
+"""
+
+import requests
+import json
+from loguru import logger
+
+
+class TestInference:
+    """Test LLM inference pipeline via Ruuter endpoints."""
+
+    def test_orchestration_service_health(self, orchestration_client):
+        """Verify LLM orchestration service is healthy."""
+        response = requests.get(f"{orchestration_client.base_url}/health", timeout=10)
+        assert response.status_code == 200
+        data = response.json()
+        assert data.get("status") == "healthy"
+        logger.info("LLM orchestration service is healthy")
+
+    def test_testing_inference_basic(
+        self,
+        ruuter_private_client,
+        postgres_client,
+        vault_client,
+        sample_test_data,
+        ensure_testing_connection,
+        rag_stack,
+    ):
+        """Test production-style inference using the testing endpoint."""
+        # Ensure testing connection exists for production tests
+        connection_id = ensure_testing_connection
+        logger.info(f"Using testing connection ID: {connection_id}")
+
+        # Verify database connection details
+        cursor = postgres_client.cursor()
+        try:
+            cursor.execute(
+                "SELECT id, connection_name, environment, llm_model FROM llm_connections "
+                "WHERE id = %s",
+                (connection_id,),
+            )
+            row = cursor.fetchone()
+            if row:
+                logger.info(
+                    f"Database connection found: ID={row[0]}, Name='{row[1]}', Env={row[2]}, Model={row[3]}"
+                )
+            else:
+                logger.error(f"Connection {connection_id} not found in database!")
+        finally:
+            cursor.close()
+
+        # Get a simple test question
+        test_case = next(
+            (item for item in sample_test_data if item["expected_scope"]),
+            sample_test_data[0],
+        )
+
+        # Prepare test inference request (using testing endpoint for simplicity)
+        payload = {
+            "connectionId": connection_id,
+            "message": test_case["question"],
+            "environment": "testing",
+        }
+
+        logger.info(f"Testing inference with message: {test_case['question']}")
+        logger.info(
+            f"Expected vault path: llm/connections/azure_openai/test/{connection_id}"
+        )
+        logger.info(f"Using payload: {json.dumps(payload)}")
+        logger.info(f"Ruuter base URL: {ruuter_private_client.base_url}")
+
+        response = requests.post(
+            f"{ruuter_private_client.base_url}/rag-search/inference/test",
+            json=payload,
+            headers={"Cookie": "customJwtCookie=test-session-token"},
+            timeout=300,
+        )
+
+        assert response.status_code == 200, (
+            f"Expected 200, got {response.status_code}: {response.text}"
+        )
+        data = response.json()["response"]
+        logger.info(f"Inference response data: {data}")
+
+        # Validate response structure (test mode does not include chatId)
+        assert "llmServiceActive" in data
+        assert "questionOutOfLLMScope" in data
+        assert "inputGuardFailed" in data
+        assert "content" in data
+
+        assert data["llmServiceActive"] is True
+        assert len(data["content"]) > 0
+
+        logger.info(f"Inference successful: {data['content'][:100]}...")
diff --git a/tests/integration_tests/test_llm_connections.py b/tests/integration_tests/test_llm_connections.py
new file mode 100644
index 0000000..35ee3b0
--- /dev/null
+++ b/tests/integration_tests/test_llm_connections.py
@@ -0,0 +1,421 @@
+"""
+Integration tests for LLM connection management via Ruuter.
+
+These tests verify:
+1. Adding LLM connections via Ruuter endpoints
+2. Storing connection data in PostgreSQL
+3. Storing credentials in Vault
+4. Retrieving connection information
+5. Updating and deleting connections
+"""
+
+import requests
+import time
+from loguru import logger
+
+
+class TestLLMConnectionsRuuter:
+    """Test LLM connection management via Ruuter endpoints."""
+
+    def test_ruuter_service_health(self, ruuter_private_client):
+        """Verify Ruuter service is responding."""
+        response = requests.get(
+            f"{ruuter_private_client.base_url}/rag-search/health", timeout=10
+        )
+        assert response.status_code == 200, "Ruuter health check failed"
+        logger.info("Ruuter service is healthy")
+
+    def test_add_azure_testing_connection_via_ruuter(
+        self, ruuter_private_client, postgres_client, rag_stack
+    ):
+        """Test adding an Azure LLM connection via Ruuter with testing environment."""
+        # Prepare request payload for Azure connection
+        payload = {
+            "connection_name": "Test Azure Connection via Ruuter",
+            "llm_platform": "azure",
+            "llm_model": "gpt-4o-mini",
+            "deployment_name": "gpt-4o-mini-deployment-ruuter",
+            "target_uri": "https://test-ruuter.openai.azure.com/",
+            "api_key": "test-api-key-ruuter-12345...TESTONLY",
+            "embedding_platform": "azure",
+            "embedding_model": "text-embedding-3-large",
+            "embedding_deployment_name": "text-embedding-deployment-ruuter",
+            "embedding_target_uri": "https://test-ruuter.openai.azure.com/",
+            "embedding_azure_api_key": "test-embedding-api-key-ruuter-67890...TESTONLY",
+            "monthly_budget": 1000.00,
+            "warn_budget_threshold": 80,
+            "stop_budget_threshold": 95,
+            "disconnect_on_budget_exceed": False,
+            "deployment_environment": "testing",
+        }
+
+        # Make request to add connection via Ruuter
+        logger.info("Adding Azure testing connection via Ruuter...")
+        response = requests.post(
+            f"{ruuter_private_client.base_url}/rag-search/llm-connections/add",
+            json=payload,
+            timeout=30,
+        )
+
+        # Assert response
+        assert response.status_code == 200, f"Failed with: {response.text}"
+        data = response.json()
+
+        logger.info(f"Response from Ruuter: {data}")
+
+        # Handle nested response structure
+        response_data = data.get("response", data)
+        assert response_data.get("operationSuccess") is True, (
+            f"Operation should succeed. Response: {data}"
+        )
+        assert "id" in response_data, "Response should include connection ID"
+        connection_id = response_data["id"]
+        logger.info(f"Connection created via Ruuter with ID: {connection_id}")
+
+        # Wait for database write
+        time.sleep(2)
+
+        # Verify in database
+        cursor = postgres_client.cursor()
+        try:
+            cursor.execute(
+                "SELECT connection_name, llm_platform, llm_model, "
+                "deployment_name, target_uri, embedding_platform, "
+                "embedding_model, monthly_budget, warn_budget_threshold, "
+                "stop_budget_threshold, disconnect_on_budget_exceed, environment "
+                "FROM llm_connections WHERE id = %s",
+                (connection_id,),
+            )
+            row = cursor.fetchone()
+            assert row is not None, "Connection not found in database"
+
+            (
+                db_connection_name,
+                db_llm_platform,
+                db_llm_model,
+                db_deployment_name,
+                db_target_uri,
+                db_embedding_platform,
+                db_embedding_model,
+                db_monthly_budget,
+                db_warn_threshold,
+                db_stop_threshold,
+                db_disconnect_on_exceed,
+                db_environment,
+            ) = row
+
+            assert db_connection_name == payload["connection_name"]
+            assert db_llm_platform == payload["llm_platform"]
+            assert db_llm_model == payload["llm_model"]
+            assert db_deployment_name == payload["deployment_name"]
+            assert db_target_uri == payload["target_uri"]
+            assert db_embedding_platform == payload["embedding_platform"]
+            assert db_embedding_model == payload["embedding_model"]
+            assert float(db_monthly_budget) == payload["monthly_budget"]
+            assert db_warn_threshold == payload["warn_budget_threshold"]
+            assert db_stop_threshold == payload["stop_budget_threshold"]
+            assert db_disconnect_on_exceed == payload["disconnect_on_budget_exceed"]
+            assert db_environment == payload["deployment_environment"]
+
+            logger.info("Database verification passed for Ruuter-added connection")
+        finally:
+            cursor.close()
+
+        logger.info("All verifications passed for Azure testing connection via Ruuter")
+
+    def test_add_azure_production_connection_via_ruuter(
+        self, ruuter_private_client, postgres_client, rag_stack
+    ):
+        """Test adding an Azure LLM connection via Ruuter with production environment."""
+        payload = {
+            "connection_name": "Production Azure Connection via Ruuter",
+            "llm_platform": "azure",
+            "llm_model": "gpt-4o",
+            "deployment_name": "gpt-4o-production-deployment-ruuter",
+            "target_uri": "https://production-ruuter.openai.azure.com/",
+            "api_key": "prod-api-key-ruuter-12345...TESTONLY",
+            "embedding_platform": "azure",
+            "embedding_model": "text-embedding-3-large",
+            "embedding_deployment_name": "text-embedding-prod-deployment-ruuter",
+            "embedding_target_uri": "https://production-ruuter.openai.azure.com/",
+            "embedding_azure_api_key": "prod-embedding-api-key-ruuter-67890...TESTONLY",
+            "monthly_budget": 5000.00,
+            "warn_budget_threshold": 75,
+            "stop_budget_threshold": 90,
+            "disconnect_on_budget_exceed": True,
+            "deployment_environment": "production",
+        }
+
+        logger.info("Adding Azure production connection via Ruuter...")
+        response = requests.post(
+            f"{ruuter_private_client.base_url}/rag-search/llm-connections/add",
+            json=payload,
+            timeout=30,
+        )
+
+        assert response.status_code == 200, f"Failed with: {response.text}"
+        data = response.json()
+        # Handle nested response structure
+        response_data = data.get("response", data)
+        assert response_data.get("operationSuccess") is True
+        assert "id" in response_data
+        connection_id = response_data["id"]
+        logger.info(
+            f"Production connection created via Ruuter with ID: {connection_id}"
+        )
+
+        # Wait for database write
+        time.sleep(2)
+
+        # Verify in database
+        cursor = postgres_client.cursor()
+        try:
+            cursor.execute(
+                "SELECT connection_name, environment FROM llm_connections WHERE id = %s",
+                (connection_id,),
+            )
+            row = cursor.fetchone()
+            assert row is not None
+            assert row[0] == payload["connection_name"]
+            assert row[1] == "production"
+            logger.info("Production connection verified in database")
+        finally:
+            cursor.close()
+
+    def test_get_llm_connection_via_ruuter(
+        self, ruuter_private_client, postgres_client, rag_stack
+    ):
+        """Test retrieving LLM connection details via Ruuter."""
+        # First, add a connection
+        payload = {
+            "connection_name": "Test Get Connection",
+            "llm_platform": "azure",
+            "llm_model": "gpt-4o-mini",
+            "deployment_name": "test-deployment",
+            "target_uri": "https://test.openai.azure.com/",
+            "api_key": "test-api-key...TESTONLY",
+            "embedding_platform": "azure",
+            "embedding_model": "text-embedding-3-large",
+            "embedding_deployment_name": "test-embedding",
+            "embedding_target_uri": "https://test.openai.azure.com/",
+            "embedding_azure_api_key": "test-embedding-key...TESTONLY",
+            "monthly_budget": 1000.00,
+            "warn_budget_threshold": 80,
+            "stop_budget_threshold": 95,
+            "disconnect_on_budget_exceed": False,
+            "deployment_environment": "testing",
+        }
+
+        logger.info("Adding connection to test GET endpoint...")
+        add_response = requests.post(
+            f"{ruuter_private_client.base_url}/rag-search/llm-connections/add",
+            json=payload,
+            timeout=30,
+        )
+        assert add_response.status_code == 200
+        add_data = add_response.json()
+        add_response_data = add_data.get("response", add_data)
+        connection_id = add_response_data["id"]
+        logger.info(f"Connection added with ID: {connection_id}")
+
+        time.sleep(2)
+
+        # Now get the connection
+        logger.info("Retrieving connection via Ruuter GET endpoint...")
+        get_response = requests.post(
+            f"{ruuter_private_client.base_url}/rag-search/llm-connections/get",
+            json={"connection_id": connection_id},
+            timeout=10,
+        )
+
+        assert get_response.status_code == 200, (
+            f"Failed to get connection: {get_response.text}"
+        )
+        get_data = get_response.json()
+        # GET endpoint returns object directly, not nested
+        connection_data = get_data["response"]
+        logger.info(f"Retrieved connection data: {connection_data}")
+
+        # Verify returned data
+        assert connection_data["id"] == connection_id
+        assert connection_data["connectionName"] == payload["connection_name"]
+        assert connection_data["llmPlatform"] == payload["llm_platform"]
+        assert connection_data["llmModel"] == payload["llm_model"]
+        assert connection_data["environment"] == payload["deployment_environment"]
+
+        logger.info("Successfully retrieved connection via Ruuter GET endpoint")
+
+    def test_production_connection_demotion_via_ruuter(
+        self, ruuter_private_client, postgres_client, rag_stack
+    ):
+        """Test that adding a new production connection demotes the existing one to testing via Ruuter."""
+        # First production connection
+        first_payload = {
+            "connection_name": "First Production Connection Ruuter",
+            "llm_platform": "azure",
+            "llm_model": "gpt-4o-mini",
+            "deployment_name": "first-deployment-ruuter",
+            "target_uri": "https://first-ruuter.openai.azure.com/",
+            "api_key": "first-api-key-ruuter...TESTONLY",
+            "embedding_platform": "azure",
+            "embedding_model": "text-embedding-3-large",
+            "embedding_deployment_name": "first-embedding-deployment-ruuter",
+            "embedding_target_uri": "https://first-ruuter.openai.azure.com/",
+            "embedding_azure_api_key": "first-embedding-key-ruuter...TESTONLY",
+            "monthly_budget": 2000.00,
+            "warn_budget_threshold": 70,
+            "stop_budget_threshold": 85,
+            "disconnect_on_budget_exceed": False,
+            "deployment_environment": "production",
+        }
+
+        logger.info("Adding first production connection via Ruuter...")
+        response1 = requests.post(
+            f"{ruuter_private_client.base_url}/rag-search/llm-connections/add",
+            json=first_payload,
+            timeout=30,
+        )
+        assert response1.status_code == 200
+        response1_data = response1.json()
+        first_connection_data = response1_data.get("response", response1_data)
+        first_connection_id = first_connection_data["id"]
+        logger.info(f"First production connection ID: {first_connection_id}")
+
+        time.sleep(2)
+
+        # Verify it's production
+        cursor = postgres_client.cursor()
+        try:
+            cursor.execute(
+                "SELECT environment FROM llm_connections WHERE id = %s",
+                (first_connection_id,),
+            )
+            row = cursor.fetchone()
+            assert row[0] == "production"
+            logger.info("First connection is production")
+        finally:
+            cursor.close()
+
+        # Now add a second production connection
+        second_payload = {
+            "connection_name": "Second Production Connection Ruuter",
+            "llm_platform": "azure",
+            "llm_model": "gpt-4o",
+            "deployment_name": "second-deployment-ruuter",
+            "target_uri": "https://second-ruuter.openai.azure.com/",
+            "api_key": "second-api-key-ruuter...TESTONLY",
+            "embedding_platform": "azure",
+            "embedding_model": "text-embedding-3-large",
+            "embedding_deployment_name": "second-embedding-deployment-ruuter",
+            "embedding_target_uri": "https://second-ruuter.openai.azure.com/",
+            "embedding_azure_api_key": "second-embedding-key-ruuter...TESTONLY",
+            "monthly_budget": 3000.00,
+            "warn_budget_threshold": 80,
+            "stop_budget_threshold": 95,
+            "disconnect_on_budget_exceed": True,
+            "deployment_environment": "production",
+        }
+
+        logger.info("Adding second production connection via Ruuter...")
+        response2 = requests.post(
+            f"{ruuter_private_client.base_url}/rag-search/llm-connections/add",
+            json=second_payload,
+            timeout=30,
+        )
+        assert response2.status_code == 200
+        response2_data = response2.json()
+        second_connection_data = response2_data.get("response", response2_data)
+        second_connection_id = second_connection_data["id"]
+        logger.info(f"Second production connection ID: {second_connection_id}")
+
+        time.sleep(2)
+
+        # Verify first connection was demoted to testing
+        cursor = postgres_client.cursor()
+        try:
+            cursor.execute(
+                "SELECT environment FROM llm_connections WHERE id = %s",
+                (first_connection_id,),
+            )
+            row = cursor.fetchone()
+            assert row is not None
+            assert row[0] == "testing", "First connection should be demoted to testing"
+            logger.info("First connection was demoted to testing")
+
+            # Verify second connection is production
+            cursor.execute(
+                "SELECT environment FROM llm_connections WHERE id = %s",
+                (second_connection_id,),
+            )
+            row = cursor.fetchone()
+            assert row[0] == "production", "Second connection should be production"
+            logger.info("Second connection is production")
+        finally:
+            cursor.close()
+
+        logger.info("Production connection demotion test passed via Ruuter")
+
+    def test_delete_llm_connection_via_ruuter(
+        self, ruuter_private_client, postgres_client, rag_stack
+    ):
+        """Test deleting an LLM connection via Ruuter."""
+        # First, add a connection to delete
+        payload = {
+            "connection_name": "Connection To Delete",
+            "llm_platform": "azure",
+            "llm_model": "gpt-4o-mini",
+            "deployment_name": "delete-deployment",
+            "target_uri": "https://delete.openai.azure.com/",
+            "api_key": "delete-api-key...TESTONLY",
+            "embedding_platform": "azure",
+            "embedding_model": "text-embedding-3-large",
+            "embedding_deployment_name": "delete-embedding",
+            "embedding_target_uri": "https://delete.openai.azure.com/",
+            "embedding_azure_api_key": "delete-embedding-key...TESTONLY",
+            "monthly_budget": 500.00,
+            "warn_budget_threshold": 80,
+            "stop_budget_threshold": 95,
+            "disconnect_on_budget_exceed": False,
+            "deployment_environment": "testing",
+        }
+
+        logger.info("Adding connection to test DELETE endpoint...")
+        add_response = requests.post(
+            f"{ruuter_private_client.base_url}/rag-search/llm-connections/add",
+            json=payload,
+            timeout=30,
+        )
+        assert add_response.status_code == 200
+        add_data = add_response.json()
+        add_response_data = add_data.get("response", add_data)
+        connection_id = add_response_data["id"]
+        logger.info(f"Connection added with ID: {connection_id}")
+
+        time.sleep(2)
+
+        # Delete the connection
+        logger.info("Deleting connection via Ruuter DELETE endpoint...")
+        delete_response = requests.post(
+            f"{ruuter_private_client.base_url}/rag-search/llm-connections/delete",
+            json={"connection_id": connection_id},
+            timeout=10,
+        )
+
+        assert delete_response.status_code == 200, (
+            f"Failed to delete connection: {delete_response.text}"
+        )
+        logger.info("Delete request succeeded")
+
+        time.sleep(2)
+
+        # Verify connection no longer exists in database
+        cursor = postgres_client.cursor()
+        try:
+            cursor.execute(
+                "SELECT COUNT(*) FROM llm_connections WHERE id = %s", (connection_id,)
+            )
+            count = cursor.fetchone()[0]
+            assert count == 0, "Connection should be deleted from database"
+            logger.info("Connection successfully deleted from database")
+        finally:
+            cursor.close()
diff --git a/tests/integration_tests/test_vector_indexer_config.yaml b/tests/integration_tests/test_vector_indexer_config.yaml
new file mode 100644
index 0000000..195f0e0
--- /dev/null
+++ b/tests/integration_tests/test_vector_indexer_config.yaml
@@ -0,0 +1,89 @@
+# Vector Indexer Test Configuration - Uses localhost for testcontainers
+vector_indexer:
+  # API Configuration - localhost for testcontainers
+  api:
+    base_url: "http://localhost:8100"
+    qdrant_url: "http://localhost:6333"
+    timeout: 300
+
+  # Environment Configuration
+  processing:
+    environment: "production"
+    connection_id: null
+
+  # Chunking Configuration
+  chunking:
+    chunk_size: 800
+    chunk_overlap: 100
+    min_chunk_size: 50
+    max_chunk_size: 2000
+    chars_per_token: 4
+    tokenizer_encoding: "cl100k_base"
+    chunk_id_pattern: "{document_hash}_chunk_{index:03d}"
+    contextual_template: "{context}\n\n{content}"
+    min_word_count: 5
+    max_whitespace_ratio: 0.8
+    max_repetition_ratio: 0.5
+
+  # Concurrency Configuration
+  concurrency:
+    max_concurrent_documents: 1
+    max_concurrent_chunks_per_doc: 2
+
+  # Batch Configuration
+  batching:
+    embedding_batch_size: 5
+    context_batch_size: 3
+
+  # Error Handling
+  error_handling:
+    max_retries: 3
+    retry_delay_base: 2
+    continue_on_failure: true
+    log_failures: true
+
+  # Processing Configuration
+  processing:
+    batch_delay_seconds: 0.1
+    context_delay_seconds: 0.05
+
+  # Provider Detection
+  providers:
+    azure_patterns: ["azure", "text-embedding-3"]
+    aws_patterns: ["amazon", "titan"]
+    openai_patterns: ["openai", "gpt"]
+
+  # Logging Configuration
+  logging:
+    level: "DEBUG"
+    failure_log_file: "logs/test_vector_indexer_failures.jsonl"
+    processing_log_file: "logs/test_vector_indexer_processing.log"
+    stats_log_file: "logs/test_vector_indexer_stats.json"
+
+  # Dataset Configuration
+  dataset:
+    base_path: "datasets"
+    supported_extensions: [".txt"]
+    metadata_file: "source.meta.json"
+    target_file: "cleaned.txt"
+
+  # Document Loader Configuration
+  document_loader:
+    target_file: "cleaned.txt"
+    metadata_file: "source.meta.json"
+    min_content_length: 10
+    max_content_length: 10000000
+    encoding: "utf-8"
+    required_metadata_fields:
+      - "source"
+    enable_content_caching: false
+    max_scan_depth: 5
+    min_file_size_bytes: 1
+    max_file_size_bytes: 50000000
+
+  # Diff Identifier Configuration
+  diff_identifier:
+    datasets_path: "datasets"
+    metadata_filename: "processed-metadata.json"
+    max_retries: 3
+    max_delay_seconds: 8
\ No newline at end of file
diff --git a/uv.lock b/uv.lock
index f662ff5..b7d72c5 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1,5 +1,5 @@
 version = 1
-revision = 2
+revision = 3
 requires-python = "==3.12.10"
 
 [[package]]
@@ -120,6 +120,39 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/15/b3/9b1a8074496371342ec1e796a96f99c82c945a339cd81a8e73de28b4cf9e/anyio-4.11.0-py3-none-any.whl", hash = "sha256:0287e96f4d26d4149305414d4e3bc32f0dcd0862365a4bddea19d7a1ec38c4fc", size = 109097, upload-time = "2025-09-23T09:19:10.601Z" },
 ]
 
+[[package]]
+name = "argon2-cffi"
+version = "25.1.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "argon2-cffi-bindings" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/0e/89/ce5af8a7d472a67cc819d5d998aa8c82c5d860608c4db9f46f1162d7dab9/argon2_cffi-25.1.0.tar.gz", hash = "sha256:694ae5cc8a42f4c4e2bf2ca0e64e51e23a040c6a517a85074683d3959e1346c1", size = 45706, upload-time = "2025-06-03T06:55:32.073Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/4f/d3/a8b22fa575b297cd6e3e3b0155c7e25db170edf1c74783d6a31a2490b8d9/argon2_cffi-25.1.0-py3-none-any.whl", hash = "sha256:fdc8b074db390fccb6eb4a3604ae7231f219aa669a2652e0f20e16ba513d5741", size = 14657, upload-time = "2025-06-03T06:55:30.804Z" },
+]
+
+[[package]]
+name = "argon2-cffi-bindings"
+version = "25.1.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "cffi" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/5c/2d/db8af0df73c1cf454f71b2bbe5e356b8c1f8041c979f505b3d3186e520a9/argon2_cffi_bindings-25.1.0.tar.gz", hash = "sha256:b957f3e6ea4d55d820e40ff76f450952807013d361a65d7f28acc0acbf29229d", size = 1783441, upload-time = "2025-07-30T10:02:05.147Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/1d/57/96b8b9f93166147826da5f90376e784a10582dd39a393c99bb62cfcf52f0/argon2_cffi_bindings-25.1.0-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:aecba1723ae35330a008418a91ea6cfcedf6d31e5fbaa056a166462ff066d500", size = 54121, upload-time = "2025-07-30T10:01:50.815Z" },
+    { url = "https://files.pythonhosted.org/packages/0a/08/a9bebdb2e0e602dde230bdde8021b29f71f7841bd54801bcfd514acb5dcf/argon2_cffi_bindings-25.1.0-cp39-abi3-macosx_10_9_x86_64.whl", hash = "sha256:2630b6240b495dfab90aebe159ff784d08ea999aa4b0d17efa734055a07d2f44", size = 29177, upload-time = "2025-07-30T10:01:51.681Z" },
+    { url = "https://files.pythonhosted.org/packages/b6/02/d297943bcacf05e4f2a94ab6f462831dc20158614e5d067c35d4e63b9acb/argon2_cffi_bindings-25.1.0-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:7aef0c91e2c0fbca6fc68e7555aa60ef7008a739cbe045541e438373bc54d2b0", size = 31090, upload-time = "2025-07-30T10:01:53.184Z" },
+    { url = "https://files.pythonhosted.org/packages/c1/93/44365f3d75053e53893ec6d733e4a5e3147502663554b4d864587c7828a7/argon2_cffi_bindings-25.1.0-cp39-abi3-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1e021e87faa76ae0d413b619fe2b65ab9a037f24c60a1e6cc43457ae20de6dc6", size = 81246, upload-time = "2025-07-30T10:01:54.145Z" },
+    { url = "https://files.pythonhosted.org/packages/09/52/94108adfdd6e2ddf58be64f959a0b9c7d4ef2fa71086c38356d22dc501ea/argon2_cffi_bindings-25.1.0-cp39-abi3-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d3e924cfc503018a714f94a49a149fdc0b644eaead5d1f089330399134fa028a", size = 87126, upload-time = "2025-07-30T10:01:55.074Z" },
+    { url = "https://files.pythonhosted.org/packages/72/70/7a2993a12b0ffa2a9271259b79cc616e2389ed1a4d93842fac5a1f923ffd/argon2_cffi_bindings-25.1.0-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:c87b72589133f0346a1cb8d5ecca4b933e3c9b64656c9d175270a000e73b288d", size = 80343, upload-time = "2025-07-30T10:01:56.007Z" },
+    { url = "https://files.pythonhosted.org/packages/78/9a/4e5157d893ffc712b74dbd868c7f62365618266982b64accab26bab01edc/argon2_cffi_bindings-25.1.0-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:1db89609c06afa1a214a69a462ea741cf735b29a57530478c06eb81dd403de99", size = 86777, upload-time = "2025-07-30T10:01:56.943Z" },
+    { url = "https://files.pythonhosted.org/packages/74/cd/15777dfde1c29d96de7f18edf4cc94c385646852e7c7b0320aa91ccca583/argon2_cffi_bindings-25.1.0-cp39-abi3-win32.whl", hash = "sha256:473bcb5f82924b1becbb637b63303ec8d10e84c8d241119419897a26116515d2", size = 27180, upload-time = "2025-07-30T10:01:57.759Z" },
+    { url = "https://files.pythonhosted.org/packages/e2/c6/a759ece8f1829d1f162261226fbfd2c6832b3ff7657384045286d2afa384/argon2_cffi_bindings-25.1.0-cp39-abi3-win_amd64.whl", hash = "sha256:a98cd7d17e9f7ce244c0803cad3c23a7d379c301ba618a5fa76a67d116618b98", size = 31715, upload-time = "2025-07-30T10:01:58.56Z" },
+    { url = "https://files.pythonhosted.org/packages/42/b9/f8d6fa329ab25128b7e98fd83a3cb34d9db5b059a9847eddb840a0af45dd/argon2_cffi_bindings-25.1.0-cp39-abi3-win_arm64.whl", hash = "sha256:b0fdbcf513833809c882823f98dc2f931cf659d9a1429616ac3adebb49f5db94", size = 27149, upload-time = "2025-07-30T10:01:59.329Z" },
+]
+
 [[package]]
 name = "asyncer"
 version = "0.0.8"
@@ -1262,6 +1295,22 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979, upload-time = "2022-08-14T12:40:09.779Z" },
 ]
 
+[[package]]
+name = "minio"
+version = "7.2.20"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "argon2-cffi" },
+    { name = "certifi" },
+    { name = "pycryptodome" },
+    { name = "typing-extensions" },
+    { name = "urllib3" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/40/df/6dfc6540f96a74125a11653cce717603fd5b7d0001a8e847b3e54e72d238/minio-7.2.20.tar.gz", hash = "sha256:95898b7a023fbbfde375985aa77e2cd6a0762268db79cf886f002a9ea8e68598", size = 136113, upload-time = "2025-11-27T00:37:15.569Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/3e/9a/b697530a882588a84db616580f2ba5d1d515c815e11c30d219145afeec87/minio-7.2.20-py3-none-any.whl", hash = "sha256:eb33dd2fb80e04c3726a76b13241c6be3c4c46f8d81e1d58e757786f6501897e", size = 93751, upload-time = "2025-11-27T00:37:13.993Z" },
+]
+
 [[package]]
 name = "mmh3"
 version = "5.2.0"
@@ -1783,6 +1832,25 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/07/d1/0a28c21707807c6aacd5dc9c3704b2aa1effbf37adebd8caeaf68b17a636/protobuf-6.33.0-py3-none-any.whl", hash = "sha256:25c9e1963c6734448ea2d308cfa610e692b801304ba0908d7bfa564ac5132995", size = 170477, upload-time = "2025-10-15T20:39:51.311Z" },
 ]
 
+[[package]]
+name = "psycopg2-binary"
+version = "2.9.11"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/ac/6c/8767aaa597ba424643dc87348c6f1754dd9f48e80fdc1b9f7ca5c3a7c213/psycopg2-binary-2.9.11.tar.gz", hash = "sha256:b6aed9e096bf63f9e75edf2581aa9a7e7186d97ab5c177aa6c87797cd591236c", size = 379620, upload-time = "2025-10-10T11:14:48.041Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/d8/91/f870a02f51be4a65987b45a7de4c2e1897dd0d01051e2b559a38fa634e3e/psycopg2_binary-2.9.11-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:be9b840ac0525a283a96b556616f5b4820e0526addb8dcf6525a0fa162730be4", size = 3756603, upload-time = "2025-10-10T11:11:52.213Z" },
+    { url = "https://files.pythonhosted.org/packages/27/fa/cae40e06849b6c9a95eb5c04d419942f00d9eaac8d81626107461e268821/psycopg2_binary-2.9.11-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f090b7ddd13ca842ebfe301cd587a76a4cf0913b1e429eb92c1be5dbeb1a19bc", size = 3864509, upload-time = "2025-10-10T11:11:56.452Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/75/364847b879eb630b3ac8293798e380e441a957c53657995053c5ec39a316/psycopg2_binary-2.9.11-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ab8905b5dcb05bf3fb22e0cf90e10f469563486ffb6a96569e51f897c750a76a", size = 4411159, upload-time = "2025-10-10T11:12:00.49Z" },
+    { url = "https://files.pythonhosted.org/packages/6f/a0/567f7ea38b6e1c62aafd58375665a547c00c608a471620c0edc364733e13/psycopg2_binary-2.9.11-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:bf940cd7e7fec19181fdbc29d76911741153d51cab52e5c21165f3262125685e", size = 4468234, upload-time = "2025-10-10T11:12:04.892Z" },
+    { url = "https://files.pythonhosted.org/packages/30/da/4e42788fb811bbbfd7b7f045570c062f49e350e1d1f3df056c3fb5763353/psycopg2_binary-2.9.11-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:fa0f693d3c68ae925966f0b14b8edda71696608039f4ed61b1fe9ffa468d16db", size = 4166236, upload-time = "2025-10-10T11:12:11.674Z" },
+    { url = "https://files.pythonhosted.org/packages/3c/94/c1777c355bc560992af848d98216148be5f1be001af06e06fc49cbded578/psycopg2_binary-2.9.11-cp312-cp312-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:a1cf393f1cdaf6a9b57c0a719a1068ba1069f022a59b8b1fe44b006745b59757", size = 3983083, upload-time = "2025-10-30T02:55:15.73Z" },
+    { url = "https://files.pythonhosted.org/packages/bd/42/c9a21edf0e3daa7825ed04a4a8588686c6c14904344344a039556d78aa58/psycopg2_binary-2.9.11-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:ef7a6beb4beaa62f88592ccc65df20328029d721db309cb3250b0aae0fa146c3", size = 3652281, upload-time = "2025-10-10T11:12:17.713Z" },
+    { url = "https://files.pythonhosted.org/packages/12/22/dedfbcfa97917982301496b6b5e5e6c5531d1f35dd2b488b08d1ebc52482/psycopg2_binary-2.9.11-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:31b32c457a6025e74d233957cc9736742ac5a6cb196c6b68499f6bb51390bd6a", size = 3298010, upload-time = "2025-10-10T11:12:22.671Z" },
+    { url = "https://files.pythonhosted.org/packages/66/ea/d3390e6696276078bd01b2ece417deac954dfdd552d2edc3d03204416c0c/psycopg2_binary-2.9.11-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:edcb3aeb11cb4bf13a2af3c53a15b3d612edeb6409047ea0b5d6a21a9d744b34", size = 3044641, upload-time = "2025-10-30T02:55:19.929Z" },
+    { url = "https://files.pythonhosted.org/packages/12/9a/0402ded6cbd321da0c0ba7d34dc12b29b14f5764c2fc10750daa38e825fc/psycopg2_binary-2.9.11-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:62b6d93d7c0b61a1dd6197d208ab613eb7dcfdcca0a49c42ceb082257991de9d", size = 3347940, upload-time = "2025-10-10T11:12:26.529Z" },
+    { url = "https://files.pythonhosted.org/packages/b1/d2/99b55e85832ccde77b211738ff3925a5d73ad183c0b37bcbbe5a8ff04978/psycopg2_binary-2.9.11-cp312-cp312-win_amd64.whl", hash = "sha256:b33fabeb1fde21180479b2d4667e994de7bbf0eec22832ba5d9b5e4cf65b6c6d", size = 2714147, upload-time = "2025-10-10T11:12:29.535Z" },
+]
+
 [[package]]
 name = "py-rust-stemmers"
 version = "0.1.5"
@@ -1831,6 +1899,25 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/a0/e3/59cd50310fc9b59512193629e1984c1f95e5c8ae6e5d8c69532ccc65a7fe/pycparser-2.23-py3-none-any.whl", hash = "sha256:e5c6e8d3fbad53479cab09ac03729e0a9faf2bee3db8208a550daf5af81a5934", size = 118140, upload-time = "2025-09-09T13:23:46.651Z" },
 ]
 
+[[package]]
+name = "pycryptodome"
+version = "3.23.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/8e/a6/8452177684d5e906854776276ddd34eca30d1b1e15aa1ee9cefc289a33f5/pycryptodome-3.23.0.tar.gz", hash = "sha256:447700a657182d60338bab09fdb27518f8856aecd80ae4c6bdddb67ff5da44ef", size = 4921276, upload-time = "2025-05-17T17:21:45.242Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/db/6c/a1f71542c969912bb0e106f64f60a56cc1f0fabecf9396f45accbe63fa68/pycryptodome-3.23.0-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:187058ab80b3281b1de11c2e6842a357a1f71b42cb1e15bce373f3d238135c27", size = 2495627, upload-time = "2025-05-17T17:20:47.139Z" },
+    { url = "https://files.pythonhosted.org/packages/6e/4e/a066527e079fc5002390c8acdd3aca431e6ea0a50ffd7201551175b47323/pycryptodome-3.23.0-cp37-abi3-macosx_10_9_x86_64.whl", hash = "sha256:cfb5cd445280c5b0a4e6187a7ce8de5a07b5f3f897f235caa11f1f435f182843", size = 1640362, upload-time = "2025-05-17T17:20:50.392Z" },
+    { url = "https://files.pythonhosted.org/packages/50/52/adaf4c8c100a8c49d2bd058e5b551f73dfd8cb89eb4911e25a0c469b6b4e/pycryptodome-3.23.0-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:67bd81fcbe34f43ad9422ee8fd4843c8e7198dd88dd3d40e6de42ee65fbe1490", size = 2182625, upload-time = "2025-05-17T17:20:52.866Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/e9/a09476d436d0ff1402ac3867d933c61805ec2326c6ea557aeeac3825604e/pycryptodome-3.23.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c8987bd3307a39bc03df5c8e0e3d8be0c4c3518b7f044b0f4c15d1aa78f52575", size = 2268954, upload-time = "2025-05-17T17:20:55.027Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/c5/ffe6474e0c551d54cab931918127c46d70cab8f114e0c2b5a3c071c2f484/pycryptodome-3.23.0-cp37-abi3-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:aa0698f65e5b570426fc31b8162ed4603b0c2841cbb9088e2b01641e3065915b", size = 2308534, upload-time = "2025-05-17T17:20:57.279Z" },
+    { url = "https://files.pythonhosted.org/packages/18/28/e199677fc15ecf43010f2463fde4c1a53015d1fe95fb03bca2890836603a/pycryptodome-3.23.0-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:53ecbafc2b55353edcebd64bf5da94a2a2cdf5090a6915bcca6eca6cc452585a", size = 2181853, upload-time = "2025-05-17T17:20:59.322Z" },
+    { url = "https://files.pythonhosted.org/packages/ce/ea/4fdb09f2165ce1365c9eaefef36625583371ee514db58dc9b65d3a255c4c/pycryptodome-3.23.0-cp37-abi3-musllinux_1_2_i686.whl", hash = "sha256:156df9667ad9f2ad26255926524e1c136d6664b741547deb0a86a9acf5ea631f", size = 2342465, upload-time = "2025-05-17T17:21:03.83Z" },
+    { url = "https://files.pythonhosted.org/packages/22/82/6edc3fc42fe9284aead511394bac167693fb2b0e0395b28b8bedaa07ef04/pycryptodome-3.23.0-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:dea827b4d55ee390dc89b2afe5927d4308a8b538ae91d9c6f7a5090f397af1aa", size = 2267414, upload-time = "2025-05-17T17:21:06.72Z" },
+    { url = "https://files.pythonhosted.org/packages/59/fe/aae679b64363eb78326c7fdc9d06ec3de18bac68be4b612fc1fe8902693c/pycryptodome-3.23.0-cp37-abi3-win32.whl", hash = "sha256:507dbead45474b62b2bbe318eb1c4c8ee641077532067fec9c1aa82c31f84886", size = 1768484, upload-time = "2025-05-17T17:21:08.535Z" },
+    { url = "https://files.pythonhosted.org/packages/54/2f/e97a1b8294db0daaa87012c24a7bb714147c7ade7656973fd6c736b484ff/pycryptodome-3.23.0-cp37-abi3-win_amd64.whl", hash = "sha256:c75b52aacc6c0c260f204cbdd834f76edc9fb0d8e0da9fbf8352ef58202564e2", size = 1799636, upload-time = "2025-05-17T17:21:10.393Z" },
+    { url = "https://files.pythonhosted.org/packages/18/3d/f9441a0d798bf2b1e645adc3265e55706aead1255ccdad3856dbdcffec14/pycryptodome-3.23.0-cp37-abi3-win_arm64.whl", hash = "sha256:11eeeb6917903876f134b56ba11abe95c0b0fd5e3330def218083c7d98bbcb3c", size = 1703675, upload-time = "2025-05-17T17:21:13.146Z" },
+]
+
 [[package]]
 name = "pydantic"
 version = "2.12.3"
@@ -2126,10 +2213,12 @@ dependencies = [
     { name = "hvac" },
     { name = "langfuse" },
     { name = "loguru" },
+    { name = "minio" },
     { name = "nemoguardrails" },
     { name = "numpy" },
     { name = "openai" },
     { name = "pre-commit" },
+    { name = "psycopg2-binary" },
     { name = "pydantic" },
     { name = "pyright" },
     { name = "pytest" },
@@ -2158,10 +2247,12 @@ requires-dist = [
     { name = "hvac", specifier = ">=2.3.0" },
     { name = "langfuse", specifier = ">=3.8.1" },
     { name = "loguru", specifier = ">=0.7.3" },
+    { name = "minio", specifier = ">=7.2.0" },
     { name = "nemoguardrails", specifier = ">=0.16.0" },
     { name = "numpy", specifier = ">=2.3.2" },
     { name = "openai", specifier = ">=1.106.1" },
     { name = "pre-commit", specifier = ">=4.3.0" },
+    { name = "psycopg2-binary", specifier = ">=2.9.11" },
     { name = "pydantic", specifier = ">=2.11.7" },
     { name = "pyright", specifier = ">=1.1.407" },
     { name = "pytest", specifier = ">=8.4.1" },