diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index baac125..69d3cfc 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -37,6 +37,9 @@ jobs:
       - name: Test
         run: npm run test:all
 
+      - name: Test Coverage
+        run: npm run test:coverage
+
       - name: Install Playwright Chromium
         run: npx playwright install --with-deps chromium
 
diff --git a/docs/multi-agent-scaling-roadmap.md b/docs/multi-agent-scaling-roadmap.md
new file mode 100644
index 0000000..3ac49ac
--- /dev/null
+++ b/docs/multi-agent-scaling-roadmap.md
@@ -0,0 +1,315 @@
+# Multi-Agent Scaling Roadmap
+
+**Feature: RecourseOS Pro — Multi-Agent Support (10+ concurrent agents)**
+
+This document outlines the technical roadmap for scaling RecourseOS to support 10+ concurrent AI agents. This is a premium/enterprise feature.
+
+---
+
+## Current State
+
+| Metric | Current Capacity | Target (Pro) |
+|--------|------------------|--------------|
+| Concurrent evaluations | 3/sec | 50+/sec |
+| Max agents | 1-3 | 100+ |
+| State lookup latency | 200-500ms | <50ms |
+| Request latency under load | >5s | <500ms |
+
+**Bottlenecks identified:**
+- Synchronous blast-radius evaluation blocks event loop
+- No connection pooling for AWS/GCP/Azure API calls
+- No state caching — every evaluation re-fetches
+- In-memory attestations don't scale across processes
+- No request queuing or backpressure
+
+---
+
+## Phase 1: Core Parallelization (Week 1-2)
+
+### 1.1 Async Evaluation Pipeline
+**Priority: Critical**
+
+Move blast-radius analysis off the main thread.
+
+```
+src/evaluator/terraform.ts
+src/evaluator/shell.ts
+src/evaluator/mcp.ts
+```
+
+**Changes:**
+- [ ] Wrap `evaluateBlastRadius()` in Worker thread pool
+- [ ] Use `piscina` or `workerpool` for thread management
+- [ ] Max workers = CPU cores - 1
+- [ ] Fallback to async queue if workers exhausted
+
+**Impact:** 3 evals/sec → 15-20 evals/sec per process
+
+### 1.2 Request Queue with Backpressure
+**Priority: Critical**
+
+```
+src/mcp/server.ts
+src/http/server.ts
+```
+
+**Changes:**
+- [ ] Add `p-queue` with concurrency limit
+- [ ] Implement circuit breaker for downstream failures
+- [ ] Add queue depth metrics
+- [ ] Return 429 when queue exceeds threshold
+
+**Config:**
+```typescript
+{
+  maxConcurrency: 10,
+  maxQueueSize: 100,
+  queueTimeout: 30000
+}
+```
+
+---
+
+## Phase 2: Connection & Caching (Week 2-3)
+
+### 2.1 AWS SDK v3 Migration
+**Priority: High**
+
+Replace custom `AwsSignedClient` with official SDK.
+
+```
+src/state/aws/*.ts
+```
+
+**Changes:**
+- [ ] Migrate to `@aws-sdk/client-*` packages
+- [ ] Enable HTTP/2 multiplexing
+- [ ] Configure keep-alive connection pooling
+- [ ] Add retry with exponential backoff
+
+**Impact:** 50 connections → 5-10 reused connections
+
+### 2.2 State Cache Layer
+**Priority: High**
+
+Add Redis-backed cache for state lookups.
+
+```
+NEW: src/cache/state-cache.ts
+NEW: src/cache/redis-client.ts
+```
+
+**Cache strategy:**
+| Resource Type | TTL | Cache Key |
+|--------------|-----|-----------|
+| S3 bucket | 60s | `s3:{region}:{bucket}` |
+| RDS instance | 30s | `rds:{region}:{instanceId}` |
+| EC2 instance | 30s | `ec2:{region}:{instanceId}` |
+| IAM role | 300s | `iam:{accountId}:{roleName}` |
+
+**Changes:**
+- [ ] Add `ioredis` dependency
+- [ ] Implement cache-aside pattern
+- [ ] Add cache hit/miss metrics
+- [ ] Support cache invalidation via MCP tool
+
+**Impact:** ~70% cache hit rate → 2-3x throughput
+
+### 2.3 GCP/Azure Parity
+**Priority: Medium**
+
+Apply same optimizations to other cloud providers.
+
+```
+src/state/gcp/*.ts
+src/state/azure/*.ts
+```
+
+---
+
+## Phase 3: Distributed Architecture (Week 3-4)
+
+### 3.1 Redis-Backed Attestations
+**Priority: High**
+
+Replace in-memory + file attestation storage.
+
+```
+src/attestation/service.ts
+NEW: src/attestation/redis-store.ts
+```
+
+**Changes:**
+- [ ] Implement `AttestationStore` interface
+- [ ] Add Redis implementation with TTL
+- [ ] Support cross-process attestation lookups
+- [ ] Add attestation replication for HA
+
+**Schema:**
+```
+attestation:{id} → JSON(Attestation)
+attestation:by-resource:{resourceId} → SET(attestationIds)
+```
+
+### 3.2 Horizontal Scaling
+**Priority: Medium**
+
+Support multi-process deployment.
+
+```
+NEW: src/cluster.ts
+```
+
+**Changes:**
+- [ ] Add Node.js cluster mode support
+- [ ] Sticky sessions for MCP connections
+- [ ] Shared Redis for cross-process state
+- [ ] PM2 ecosystem config
+
+**Deployment options:**
+- Single node: 4-8 workers via cluster
+- Multi-node: Load balancer + Redis
+
+### 3.3 Connection Multiplexing
+**Priority: Medium**
+
+Single MCP connection serving multiple agents.
+
+```
+src/mcp/server.ts
+NEW: src/mcp/multiplexer.ts
+```
+
+**Changes:**
+- [ ] Add agent ID to request context
+- [ ] Route responses to correct agent
+- [ ] Per-agent rate limiting
+- [ ] Agent isolation (no cross-contamination)
+
+---
+
+## Phase 4: Observability & Hardening (Week 4-5)
+
+### 4.1 Metrics & Monitoring
+**Priority: High**
+
+```
+NEW: src/metrics/prometheus.ts
+```
+
+**Metrics to track:**
+- `recourse_evaluation_duration_seconds` (histogram)
+- `recourse_queue_depth` (gauge)
+- `recourse_cache_hit_ratio` (gauge)
+- `recourse_active_agents` (gauge)
+- `recourse_state_lookup_duration_seconds` (histogram)
+
+### 4.2 Rate Limiting
+**Priority: High**
+
+Per-agent and global rate limits.
+
+```
+NEW: src/ratelimit/index.ts
+```
+
+**Tiers:**
+| Plan | Evals/min | Agents | State lookups/min |
+|------|-----------|--------|-------------------|
+| Free | 10 | 1 | 100 |
+| Pro | 500 | 25 | 5000 |
+| Enterprise | Unlimited | 100+ | Unlimited |
+
+### 4.3 Graceful Degradation
+**Priority: Medium**
+
+- [ ] Timeout slow evaluations (30s max)
+- [ ] Fallback to cached state on API failures
+- [ ] Circuit breaker for cloud provider APIs
+- [ ] Health check endpoints
+
+---
+
+## Phase 5: Enterprise Features (Week 5-6)
+
+### 5.1 Multi-Tenancy
+**Priority: Enterprise**
+
+```
+NEW: src/tenant/index.ts
+```
+
+- [ ] Tenant isolation
+- [ ] Per-tenant Redis namespaces
+- [ ] Tenant-specific rate limits
+- [ ] Usage metering per tenant
+
+### 5.2 Streaming Evaluations
+**Priority: Enterprise**
+
+For large Terraform plans (1000+ resources).
+
+- [ ] Stream changes instead of buffering
+- [ ] Incremental blast-radius updates
+- [ ] Progress callbacks to agents
+
+### 5.3 Audit Logging
+**Priority: Enterprise**
+
+- [ ] Structured audit log for all evaluations
+- [ ] S3/CloudWatch export
+- [ ] Compliance reporting
+
+---
+
+## Dependencies
+
+```json
+{
+  "piscina": "^4.0.0",
+  "p-queue": "^7.0.0",
+  "ioredis": "^5.3.0",
+  "@aws-sdk/client-s3": "^3.500.0",
+  "@aws-sdk/client-rds": "^3.500.0",
+  "@aws-sdk/client-ec2": "^3.500.0",
+  "prom-client": "^15.0.0"
+}
+```
+
+---
+
+## Milestones
+
+| Milestone | Target | Deliverable |
+|-----------|--------|-------------|
+| M1: Parallel Eval | Week 2 | 15+ evals/sec single process |
+| M2: Cached State | Week 3 | <50ms state lookups |
+| M3: Multi-Process | Week 4 | 50+ evals/sec cluster |
+| M4: Pro Beta | Week 5 | 25 agent support |
+| M5: Enterprise GA | Week 6 | 100+ agent support |
+
+---
+
+## Pricing Considerations
+
+| Feature | Free | Pro | Enterprise |
+|---------|------|-----|------------|
+| Concurrent agents | 1 | 25 | 100+ |
+| Evaluations/month | 1,000 | 50,000 | Unlimited |
+| State caching | No | Yes | Yes |
+| Redis (managed) | No | Included | Dedicated |
+| SLA | None | 99.5% | 99.9% |
+| Support | Community | Email | Dedicated |
+
+---
+
+## Open Questions
+
+1. **Redis hosting**: Managed (Upstash/Redis Cloud) vs self-hosted?
+2. **Pricing model**: Per-agent vs per-evaluation vs flat tier?
+3. **Agent authentication**: API keys per agent or per org?
+4. **Data residency**: Regional Redis deployments needed?
+
+---
+
+*Last updated: 2026-05-10*
diff --git a/package.json b/package.json
index d580cf6..27c3bea 100644
--- a/package.json
+++ b/package.json
@@ -14,6 +14,7 @@
     "pack:check": "npm pack --dry-run --cache .npm-cache",
     "dev": "tsc --watch",
     "test": "vitest",
+    "test:coverage": "vitest run --coverage",
     "test:cli": "npm run build && vitest --run tests/cli-scenarios.test.ts",
     "test:all": "npm run build && vitest --run && vitest --run tests/cli-scenarios.test.ts",
     "test:aws-live": "RUN_AWS_LIVE_TESTS=1 vitest --run tests/aws-live.test.ts",
diff --git a/src/analyzer/blast-radius.ts b/src/analyzer/blast-radius.ts
index 611bd8b..ae18d10 100644
--- a/src/analyzer/blast-radius.ts
+++ b/src/analyzer/blast-radius.ts
@@ -24,7 +24,7 @@ export function analyzeBlastRadius(
   state: TerraformState | null,
   options: AnalyzeOptions = {}
 ): BlastRadiusReport {
-  const { includeNonDestructive = true, useClassifier = false } = options;
+  const { includeNonDestructive = true, useClassifier = true } = options;
 
   // Get changes to analyze
   let changes: ResourceChange[];
diff --git a/src/analyzer/cross-action-patterns.ts b/src/analyzer/cross-action-patterns.ts
index e52fd9f..80e0233 100644
--- a/src/analyzer/cross-action-patterns.ts
+++ b/src/analyzer/cross-action-patterns.ts
@@ -113,8 +113,10 @@ function detectBackupRelationship(
     }
   }
 
-  // TODO: State lookup for additional relationships
-  // TODO: Naming convention detection (low confidence)
+  // Future enhancements (not blocking for v1):
+  // - State lookup: Query live state for snapshot->instance relationships not in plan
+  // - Naming convention: Detect patterns like "mydb-snapshot" -> "mydb" (low confidence)
+  // These would improve detection but explicit_reference covers the critical cases.
 
   return null;
 }
diff --git a/src/attestation/service.ts b/src/attestation/service.ts
index efebe9a..3d04d6e 100644
--- a/src/attestation/service.ts
+++ b/src/attestation/service.ts
@@ -95,10 +95,24 @@ export class AttestationService {
   constructor(config: AttestationServiceConfig = {}) {
     this.configDir = config.configDir ?? join(homedir(), '.recourse');
     this.instanceId = config.instanceId ?? 'recourse-local';
-    this.instanceBaseUrl = config.instanceBaseUrl ?? 'http://localhost:3001';
     this.evaluatorVersion = config.evaluatorVersion ?? '1.0.0';
     this.registry = createRegistry();
     this.attestationsDir = join(this.configDir, 'attestations');
+
+    // Resolve instance base URL with production validation
+    const envUrl = process.env.RECOURSE_INSTANCE_URL;
+    const defaultUrl = 'http://localhost:3001';
+    this.instanceBaseUrl = config.instanceBaseUrl ?? envUrl ?? defaultUrl;
+
+    // Warn if using localhost in production mode
+    const isProduction = process.env.NODE_ENV === 'production';
+    const isLocalhost = this.instanceBaseUrl.includes('localhost') || this.instanceBaseUrl.includes('127.0.0.1');
+    if (isProduction && isLocalhost) {
+      console.warn(
+        '[WARN] Attestation service using localhost URL in production mode. ' +
+        'Set RECOURSE_INSTANCE_URL to a publicly accessible URL for valid attestations.'
+      );
+    }
   }
 
   /**
@@ -256,9 +270,10 @@ export class AttestationService {
         encoding: 'utf8',
         mode: 0o644,
       });
-    } catch {
-      // Silently fail - disk persistence is best-effort
-      // Memory storage is still available for this process
+    } catch (err) {
+      // Log persistence failures - audit trail should not silently disappear
+      const message = err instanceof Error ? err.message : String(err);
+      console.warn(`[WARN] Failed to persist attestation ${id}: ${message}. Memory storage only.`);
     }
   }
 
diff --git a/src/cli.ts b/src/cli.ts
index d8a2b54..de30599 100644
--- a/src/cli.ts
+++ b/src/cli.ts
@@ -15,6 +15,7 @@ import {
   evaluateTerraformPlanConsequences,
 } from './evaluator/index.js';
 import type { McpToolCall } from './adapters/index.js';
+import { parseRiskLevels } from './mcp/gateway.js';
 import {
   analyzeDynamoDbTableDeletionEvidence,
   analyzeIamRoleDeletionEvidence,
@@ -189,7 +190,7 @@ mcp
     }
 
     if (options.allow) {
-      config.allowedRiskLevels = options.allow.split(',') as any;
+      config.allowedRiskLevels = parseRiskLevels(options.allow);
     }
 
     if (options.verbose !== undefined) {
@@ -935,7 +936,23 @@ program
     }
   });
 
+/**
+ * Execute a shell command after RecourseOS evaluation.
+ *
+ * SECURITY NOTE: shell:true is intentional here because:
+ * 1. This is the `recourse exec` CLI command where user explicitly provides a command
+ * 2. Commands may include shell features (pipes, redirections, etc.)
+ * 3. The command is validated by RecourseOS before execution
+ *
+ * This is NOT used for programmatic/untrusted input.
+ */
 function executeCommand(command: string): void {
+  // Validate command is a non-empty string
+  if (typeof command !== 'string' || command.trim().length === 0) {
+    console.error('Invalid command: must be a non-empty string');
+    process.exit(1);
+  }
+
   const child = spawn(command, {
     shell: true,
     stdio: 'inherit',
diff --git a/src/core/consequence.ts b/src/core/consequence.ts
index b2789b8..98aecb0 100644
--- a/src/core/consequence.ts
+++ b/src/core/consequence.ts
@@ -3,6 +3,7 @@ import type { DependencyImpact, EvidenceItem, MissingEvidence, MutationIntent, V
 import type { EvidenceRequirementLevel, EvidenceSufficiency } from './state-schema.js';
 import type { CrossActionRisk } from '../analyzer/cross-action.js';
 import type { ReasoningTrace, VerificationInstructions } from '../evaluator/trace.js';
+import type { EvaluationTiming } from './timing.js';
 
 export type ConsequenceDecision = 'allow' | 'warn' | 'block' | 'escalate';
 
@@ -135,4 +136,10 @@ export interface ConsequenceReport {
    * Includes CLI commands and API calls a verifier can run.
    */
   verification?: VerificationInstructions;
+
+  /**
+   * Performance timing metrics for this evaluation.
+   * Includes total time, phase breakdown, and SLA compliance.
+   */
+  timing?: EvaluationTiming;
 }
diff --git a/src/core/failure-mode.ts b/src/core/failure-mode.ts
new file mode 100644
index 0000000..c9d7e38
--- /dev/null
+++ b/src/core/failure-mode.ts
@@ -0,0 +1,125 @@
+/**
+ * Failure mode configuration for RecourseOS evaluations.
+ *
+ * Determines how RecourseOS behaves when state lookups fail
+ * (network errors, API timeouts, permission denied, etc.)
+ */
+
+/**
+ * Failure mode options:
+ * - 'closed': Block the action when evidence cannot be gathered (safest)
+ * - 'review': Escalate to human review when evidence unavailable (default)
+ * - 'open': Allow the action despite missing evidence (dangerous!)
+ */
+export type FailureMode = 'closed' | 'review' | 'open';
+
+/**
+ * Result of checking for evidence failures in a consequence report.
+ */
+export interface EvidenceFailureCheck {
+  /** Whether any evidence gathering failed */
+  hasFailures: boolean;
+  /** List of resources with missing evidence */
+  failedResources: string[];
+  /** Reasons for failures */
+  failureReasons: string[];
+}
+
+/**
+ * Default failure modes by deployment context.
+ * - OSS/self-hosted: 'review' (escalate to human)
+ * - Pro/managed: 'closed' (fail-safe)
+ * - Explicit override: user's choice
+ */
+export const DEFAULT_FAILURE_MODE: FailureMode = 'review';
+export const PRO_DEFAULT_FAILURE_MODE: FailureMode = 'closed';
+
+/**
+ * Check if a consequence report has evidence failures that should
+ * trigger failure mode handling.
+ */
+export function checkEvidenceFailures(
+  mutations: Array<{
+    missingEvidence?: Array<{ key: string; description?: string }>;
+    intent?: { target?: { id?: string } };
+  }>
+): EvidenceFailureCheck {
+  const failedResources: string[] = [];
+  const failureReasons: string[] = [];
+
+  for (const mutation of mutations) {
+    if (mutation.missingEvidence && mutation.missingEvidence.length > 0) {
+      const resourceId = mutation.intent?.target?.id || 'unknown';
+      failedResources.push(resourceId);
+
+      for (const missing of mutation.missingEvidence) {
+        failureReasons.push(
+          missing.description || `Missing evidence: ${missing.key}`
+        );
+      }
+    }
+  }
+
+  return {
+    hasFailures: failedResources.length > 0,
+    failedResources: Array.from(new Set(failedResources)),
+    failureReasons: Array.from(new Set(failureReasons)),
+  };
+}
+
+/**
+ * Apply failure mode to a consequence decision.
+ *
+ * @param currentDecision - The decision from normal policy evaluation
+ * @param failureCheck - Result of evidence failure check
+ * @param failureMode - The configured failure mode
+ * @returns The potentially modified decision and reason
+ */
+export function applyFailureMode(
+  currentDecision: 'allow' | 'warn' | 'escalate' | 'block',
+  currentReason: string,
+  failureCheck: EvidenceFailureCheck,
+  failureMode: FailureMode
+): { decision: 'allow' | 'warn' | 'escalate' | 'block'; reason: string } {
+  // No failures, return original decision
+  if (!failureCheck.hasFailures) {
+    return { decision: currentDecision, reason: currentReason };
+  }
+
+  const failureContext = `Evidence unavailable for: ${failureCheck.failedResources.join(', ')}`;
+
+  switch (failureMode) {
+    case 'closed':
+      // Fail-closed: always block when evidence is missing
+      return {
+        decision: 'block',
+        reason: `[FAIL-CLOSED] ${failureContext}. Action blocked due to inability to verify safety.`,
+      };
+
+    case 'review':
+      // Fail-review: escalate to human (current default behavior)
+      // Only upgrade if current decision is less severe than escalate
+      if (currentDecision === 'allow' || currentDecision === 'warn') {
+        return {
+          decision: 'escalate',
+          reason: `[FAIL-REVIEW] ${failureContext}. Human review required.`,
+        };
+      }
+      return { decision: currentDecision, reason: currentReason };
+
+    case 'open':
+      // Fail-open: allow despite missing evidence (dangerous!)
+      // Log a warning but don't change the decision
+      return {
+        decision: currentDecision,
+        reason: `[FAIL-OPEN WARNING] ${failureContext}. Proceeding without complete evidence.`,
+      };
+
+    default:
+      // Unknown mode, fail safe
+      return {
+        decision: 'escalate',
+        reason: `Unknown failure mode. ${failureContext}`,
+      };
+  }
+}
diff --git a/src/core/index.ts b/src/core/index.ts
index 418b219..a15abb5 100644
--- a/src/core/index.ts
+++ b/src/core/index.ts
@@ -72,3 +72,28 @@ export {
   getRegisteredResourceTypes,
   hasEvidenceRequirements,
 } from './evidence-requirements.js';
+
+// Failure mode handling
+export type {
+  FailureMode,
+  EvidenceFailureCheck,
+} from './failure-mode.js';
+
+export {
+  DEFAULT_FAILURE_MODE,
+  PRO_DEFAULT_FAILURE_MODE,
+  checkEvidenceFailures,
+  applyFailureMode,
+} from './failure-mode.js';
+
+// Performance timing
+export type {
+  EvaluationTiming,
+  SLATarget,
+} from './timing.js';
+
+export {
+  SLA_TARGETS,
+  EvaluationTimer,
+  formatTiming,
+} from './timing.js';
diff --git a/src/core/timing.ts b/src/core/timing.ts
new file mode 100644
index 0000000..fc9b1b5
--- /dev/null
+++ b/src/core/timing.ts
@@ -0,0 +1,142 @@
+/**
+ * Performance timing utilities for RecourseOS evaluations.
+ *
+ * Used to measure latency and enforce SLA targets.
+ */
+
+/**
+ * SLA targets for different operation types (in milliseconds).
+ *
+ * These are p95 targets - 95% of evaluations should complete within these times.
+ */
+export const SLA_TARGETS = {
+  /** Single resource evaluation without network calls */
+  localEvaluation: 10,
+  /** Single resource evaluation with AWS state lookup */
+  remoteEvaluation: 500,
+  /** Full plan evaluation (up to 50 resources) */
+  planEvaluation: 2000,
+  /** Shell command parsing and evaluation */
+  shellEvaluation: 5,
+  /** MCP tool call evaluation */
+  mcpEvaluation: 10,
+} as const;
+
+export type SLATarget = keyof typeof SLA_TARGETS;
+
+/**
+ * Timing metrics for an evaluation.
+ */
+export interface EvaluationTiming {
+  /** Total wall-clock time in milliseconds */
+  totalMs: number;
+  /** Time spent in parsing/preparation */
+  parseMs?: number;
+  /** Time spent in blast radius analysis */
+  analysisMs?: number;
+  /** Time spent in policy evaluation */
+  policyMs?: number;
+  /** Time spent waiting for remote state lookups */
+  remoteMs?: number;
+  /** Whether the evaluation met its SLA target */
+  metSla: boolean;
+  /** The SLA target used for comparison */
+  slaTarget: SLATarget;
+  /** The target time in milliseconds */
+  slaTargetMs: number;
+}
+
+/**
+ * Timer for tracking evaluation performance.
+ */
+export class EvaluationTimer {
+  private startTime: number;
+  private phases: Map<string, { start: number; end?: number }> = new Map();
+  private slaTarget: SLATarget;
+
+  constructor(slaTarget: SLATarget = 'localEvaluation') {
+    this.startTime = performance.now();
+    this.slaTarget = slaTarget;
+  }
+
+  /**
+   * Start timing a phase.
+   */
+  startPhase(name: string): void {
+    this.phases.set(name, { start: performance.now() });
+  }
+
+  /**
+   * End timing a phase.
+   */
+  endPhase(name: string): number {
+    const phase = this.phases.get(name);
+    if (!phase) {
+      return 0;
+    }
+    phase.end = performance.now();
+    return phase.end - phase.start;
+  }
+
+  /**
+   * Get the duration of a completed phase.
+   */
+  getPhaseMs(name: string): number | undefined {
+    const phase = this.phases.get(name);
+    if (!phase || phase.end === undefined) {
+      return undefined;
+    }
+    return phase.end - phase.start;
+  }
+
+  /**
+   * Set the SLA target for this evaluation.
+   */
+  setSlaTarget(target: SLATarget): void {
+    this.slaTarget = target;
+  }
+
+  /**
+   * Complete timing and return metrics.
+   */
+  finish(): EvaluationTiming {
+    const totalMs = performance.now() - this.startTime;
+    const slaTargetMs = SLA_TARGETS[this.slaTarget];
+
+    return {
+      totalMs: Math.round(totalMs * 100) / 100,
+      parseMs: this.getPhaseMs('parse'),
+      analysisMs: this.getPhaseMs('analysis'),
+      policyMs: this.getPhaseMs('policy'),
+      remoteMs: this.getPhaseMs('remote'),
+      metSla: totalMs <= slaTargetMs,
+      slaTarget: this.slaTarget,
+      slaTargetMs,
+    };
+  }
+}
+
+/**
+ * Format timing metrics for human-readable output.
+ */
+export function formatTiming(timing: EvaluationTiming): string {
+  const status = timing.metSla ? '✓' : '⚠';
+  const parts = [`${status} ${timing.totalMs.toFixed(1)}ms`];
+
+  if (timing.parseMs !== undefined) {
+    parts.push(`parse=${timing.parseMs.toFixed(1)}ms`);
+  }
+  if (timing.analysisMs !== undefined) {
+    parts.push(`analysis=${timing.analysisMs.toFixed(1)}ms`);
+  }
+  if (timing.policyMs !== undefined) {
+    parts.push(`policy=${timing.policyMs.toFixed(1)}ms`);
+  }
+  if (timing.remoteMs !== undefined) {
+    parts.push(`remote=${timing.remoteMs.toFixed(1)}ms`);
+  }
+
+  parts.push(`(target: ${timing.slaTargetMs}ms ${timing.slaTarget})`);
+
+  return parts.join(' | ');
+}
diff --git a/src/evaluator/mcp.ts b/src/evaluator/mcp.ts
index 70149e6..8594ff2 100644
--- a/src/evaluator/mcp.ts
+++ b/src/evaluator/mcp.ts
@@ -13,6 +13,7 @@ import {
   buildRequiredEvidence,
   getEvidenceRequirements,
   DEFAULT_UNKNOWN_REQUIREMENTS,
+  EvaluationTimer,
 } from '../core/index.js';
 import {
   RecoverabilityLabels,
@@ -62,7 +63,13 @@ export function evaluateMcpToolCallConsequences(
   call: McpToolCall,
   options: McpConsequenceOptions = {}
 ): ConsequenceReport {
+  const timer = new EvaluationTimer('mcpEvaluation');
+  timer.startPhase('parse');
+
   const intent = mcpToolCallToMutation(call, options.adapterContext);
+  timer.endPhase('parse');
+
+  timer.startPhase('analysis');
   const s3Analysis = getS3Analysis(intent, options.awsEvidence?.s3Buckets);
   const rdsAnalysis = getRdsAnalysis(intent, options.awsEvidence?.rdsInstances);
   const dynamoDbAnalysis = getDynamoDbAnalysis(intent, options.awsEvidence?.dynamoDbTables);
@@ -105,6 +112,9 @@ export function evaluateMcpToolCallConsequences(
   // Build required evidence for the consequence report
   const requiredEvidence = buildRequiredEvidenceForIntent(intent, mutation.evidence);
 
+  timer.endPhase('analysis');
+  const timing = timer.finish();
+
   return {
     mutations: [mutation],
     summary: {
@@ -117,6 +127,7 @@ export function evaluateMcpToolCallConsequences(
     riskAssessment: policyEvaluation.decision,
     assessmentReason: policyEvaluation.reason,
     requiredEvidence,
+    timing,
   };
 }
 
diff --git a/src/evaluator/shell.ts b/src/evaluator/shell.ts
index 0a5571f..19b4404 100644
--- a/src/evaluator/shell.ts
+++ b/src/evaluator/shell.ts
@@ -6,6 +6,7 @@ import type {
   ConsequenceReport,
   MutationIntent,
 } from '../core/index.js';
+import { EvaluationTimer } from '../core/index.js';
 import {
   RecoverabilityLabels,
   RecoverabilityTier,
@@ -44,6 +45,9 @@ export function evaluateShellCommandConsequences(
   input: ShellCommandInput | string,
   options: ShellConsequenceOptions = {}
 ): ConsequenceReport {
+  const timer = new EvaluationTimer('shellEvaluation');
+  timer.startPhase('parse');
+
   const command = typeof input === 'string' ? input : input.command;
   const intent = shellCommandToMutation(command, {
     ...options.adapterContext,
@@ -52,7 +56,9 @@ export function evaluateShellCommandConsequences(
       cwd: typeof input === 'string' ? undefined : input.cwd,
     },
   });
+  timer.endPhase('parse');
 
+  timer.startPhase('analysis');
   const s3Analysis = getS3Analysis(intent, options.awsEvidence?.s3Buckets);
   const rdsAnalysis = getRdsAnalysis(intent, options.awsEvidence?.rdsInstances);
   const dynamoDbAnalysis = getDynamoDbAnalysis(intent, options.awsEvidence?.dynamoDbTables);
@@ -92,6 +98,9 @@ export function evaluateShellCommandConsequences(
     dependencyImpact: [],
   };
 
+  timer.endPhase('analysis');
+  const timing = timer.finish();
+
   return {
     mutations: [mutation],
     summary: {
@@ -103,6 +112,7 @@ export function evaluateShellCommandConsequences(
     },
     riskAssessment: policyEvaluation.decision,
     assessmentReason: policyEvaluation.reason,
+    timing,
   };
 }
 
diff --git a/src/evaluator/terraform.ts b/src/evaluator/terraform.ts
index aca0712..ec00f73 100644
--- a/src/evaluator/terraform.ts
+++ b/src/evaluator/terraform.ts
@@ -16,6 +16,14 @@ import type {
   VerificationStatusInfo,
   RequiredEvidence,
   EvidenceItem,
+  FailureMode,
+} from '../core/index.js';
+import {
+  checkEvidenceFailures,
+  applyFailureMode,
+  DEFAULT_FAILURE_MODE,
+  EvaluationTimer,
+  type EvaluationTiming,
 } from '../core/index.js';
 import {
   TraceBuilder,
@@ -53,6 +61,10 @@ export function evaluateTerraformPlanConsequences(
   state: TerraformState | null,
   options: TerraformConsequenceOptions = {}
 ): ConsequenceReport {
+  // Initialize timing
+  const timer = new EvaluationTimer('planEvaluation');
+  timer.startPhase('parse');
+
   // Initialize trace capture
   const trace = new TraceBuilder();
   trace.source('terraform-plan');
@@ -61,7 +73,9 @@ export function evaluateTerraformPlanConsequences(
   }
 
   trace.step('parse_input', `Parsed Terraform plan with ${plan.resourceChanges?.length ?? 0} resource changes`);
+  timer.endPhase('parse');
 
+  timer.startPhase('analysis');
   const blastRadiusReport = analyzeBlastRadius(plan, state, {
     useClassifier: options.useClassifier,
   });
@@ -69,7 +83,9 @@ export function evaluateTerraformPlanConsequences(
   trace.step('analyze_blast_radius', `Analyzed ${blastRadiusReport.changes.length} changes`, {
     decision: `total_changes=${blastRadiusReport.summary.totalChanges}, has_unrecoverable=${blastRadiusReport.summary.hasUnrecoverable}`,
   });
+  timer.endPhase('analysis');
 
+  timer.startPhase('policy');
   const policyEvaluation = evaluateBlastRadiusReport(
     blastRadiusReport,
     options.policy
@@ -274,6 +290,36 @@ export function evaluateTerraformPlanConsequences(
       )
     : undefined;
 
+  // Check for evidence failures and apply failure mode
+  const failureCheck = checkEvidenceFailures(mutations);
+  const effectiveFailureMode = options.policy?.failureMode ?? DEFAULT_FAILURE_MODE;
+
+  let finalDecision = finalPolicyEvaluation.decision;
+  let finalReason = hasUnrecoverableFromCrossAction
+    ? `${finalPolicyEvaluation.reason} (cross-action risk detected)`
+    : finalPolicyEvaluation.reason;
+
+  // Apply failure mode if there are evidence failures
+  if (failureCheck.hasFailures) {
+    const failureModeResult = applyFailureMode(
+      finalDecision,
+      finalReason,
+      failureCheck,
+      effectiveFailureMode
+    );
+    finalDecision = failureModeResult.decision;
+    finalReason = failureModeResult.reason;
+
+    trace.step('failure_mode_check', `Evidence failures detected, applying ${effectiveFailureMode} mode`, {
+      decision: `failed_resources=${failureCheck.failedResources.length}, mode=${effectiveFailureMode}, result=${finalDecision}`,
+    });
+  }
+
+  timer.endPhase('policy');
+
+  // Finalize timing
+  const timing = timer.finish();
+
   const report: ConsequenceReport = {
     mutations,
     summary: {
@@ -283,15 +329,15 @@ export function evaluateTerraformPlanConsequences(
       hasUnrecoverable,
       dependencyImpactCount: blastRadiusReport.summary.cascadeImpactCount,
     },
-    riskAssessment: finalPolicyEvaluation.decision,
-    assessmentReason: hasUnrecoverableFromCrossAction
-      ? `${finalPolicyEvaluation.reason} (cross-action risk detected)`
-      : finalPolicyEvaluation.reason,
+    riskAssessment: finalDecision,
+    assessmentReason: finalReason,
     // Always include cross-action risks (empty array if none detected)
     crossActionRisks,
     // Attestation richness fields
     trace: trace.build(),
     verification: verificationInstructions,
+    // Performance timing
+    timing,
   };
 
   // Add verification protocol fields
diff --git a/src/http/server.ts b/src/http/server.ts
index 1d31705..fdfedd8 100644
--- a/src/http/server.ts
+++ b/src/http/server.ts
@@ -2,7 +2,7 @@ import { createServer, type IncomingMessage, type ServerResponse } from 'http';
 import { readFileSync, existsSync } from 'fs';
 import { join, extname } from 'path';
 import { fileURLToPath } from 'url';
-import { exec } from 'child_process';
+import { execFile } from 'child_process';
 import { parsePlanJson } from '../parsers/plan.js';
 import {
   evaluateMcpToolCallConsequences,
@@ -153,7 +153,10 @@ export async function runHttpServer(options: HttpServerOptions = {}): Promise<vo
           });
           res.end(content);
           return;
-        } catch {
+        } catch (err) {
+          // Log the actual error for debugging (permission denied, etc.)
+          const message = err instanceof Error ? err.message : String(err);
+          console.error(`[ERROR] Failed to read ${fullPath}: ${message}`);
           // Fall through to 404
         }
       }
@@ -203,9 +206,15 @@ export async function runHttpServer(options: HttpServerOptions = {}): Promise<vo
 }
 
 function openUrl(url: string): void {
+  // Use execFile with array args to prevent command injection
   const cmd = process.platform === 'darwin' ? 'open' :
-              process.platform === 'win32' ? 'start' : 'xdg-open';
-  exec(`${cmd} ${url}`);
+              process.platform === 'win32' ? 'cmd' : 'xdg-open';
+  const args = process.platform === 'win32' ? ['/c', 'start', '', url] : [url];
+  execFile(cmd, args, (err) => {
+    if (err) {
+      console.error('Failed to open browser:', err.message);
+    }
+  });
 }
 
 function evaluate(request: EvaluateRequest) {
@@ -222,7 +231,7 @@ function evaluate(request: EvaluateRequest) {
         : JSON.stringify(request.input);
       const plan = parsePlanJson(planJson);
       return evaluateTerraformPlanConsequences(plan, null, {
-        useClassifier: request.options?.classifier ?? false,
+        useClassifier: request.options?.classifier ?? true,
         adapterContext,
       });
     }
@@ -254,10 +263,23 @@ function evaluate(request: EvaluateRequest) {
   }
 }
 
+// Maximum request body size (10MB) to prevent DoS
+const MAX_BODY_SIZE = 10 * 1024 * 1024;
+
 function readBody(req: IncomingMessage): Promise<string> {
   return new Promise((resolve, reject) => {
     const chunks: Buffer[] = [];
-    req.on('data', chunk => chunks.push(chunk));
+    let totalSize = 0;
+
+    req.on('data', (chunk: Buffer) => {
+      totalSize += chunk.length;
+      if (totalSize > MAX_BODY_SIZE) {
+        req.destroy();
+        reject(new Error(`Request body exceeds maximum size of ${MAX_BODY_SIZE} bytes`));
+        return;
+      }
+      chunks.push(chunk);
+    });
     req.on('end', () => resolve(Buffer.concat(chunks).toString('utf8')));
     req.on('error', reject);
   });
diff --git a/src/iam/broker-server.ts b/src/iam/broker-server.ts
index ceec32f..8b15be5 100644
--- a/src/iam/broker-server.ts
+++ b/src/iam/broker-server.ts
@@ -88,14 +88,27 @@ export async function startBrokerServer(
   });
 }
 
+// Maximum request body size (1MB) to prevent DoS
+const MAX_BODY_SIZE = 1024 * 1024;
+
 /**
- * Read request body
+ * Read request body with size limit
  */
 function readBody(req: http.IncomingMessage): Promise<string> {
   return new Promise((resolve, reject) => {
-    let body = '';
-    req.on('data', (chunk) => (body += chunk.toString()));
-    req.on('end', () => resolve(body));
+    const chunks: Buffer[] = [];
+    let totalSize = 0;
+
+    req.on('data', (chunk: Buffer) => {
+      totalSize += chunk.length;
+      if (totalSize > MAX_BODY_SIZE) {
+        req.destroy();
+        reject(new Error(`Request body exceeds maximum size of ${MAX_BODY_SIZE} bytes`));
+        return;
+      }
+      chunks.push(chunk);
+    });
+    req.on('end', () => resolve(Buffer.concat(chunks).toString('utf8')));
     req.on('error', reject);
   });
 }
diff --git a/src/iam/session-broker.ts b/src/iam/session-broker.ts
index 1b7623d..6bb7b4d 100644
--- a/src/iam/session-broker.ts
+++ b/src/iam/session-broker.ts
@@ -29,6 +29,7 @@ import {
 } from '../evaluator/index.js';
 import { toConsequenceJson } from '../output/consequence-json.js';
 import { getAttestationService, type AttestationService } from '../attestation/service.js';
+import { parseRiskLevels } from '../mcp/gateway.js';
 
 // Session request
 export interface SessionRequest {
@@ -492,10 +493,9 @@ export function createBrokerFromEnv(): SessionBroker {
 
   return new SessionBroker({
     brokerRoleArn: roleArn,
-    allowedRiskLevels: (process.env.RECOURSE_ALLOWED_LEVELS?.split(',') as any) ?? [
-      'allow',
-      'warn',
-    ],
+    allowedRiskLevels: process.env.RECOURSE_ALLOWED_LEVELS
+      ? parseRiskLevels(process.env.RECOURSE_ALLOWED_LEVELS)
+      : ['allow', 'warn'],
     defaultDurationSeconds: parseInt(process.env.RECOURSE_SESSION_DURATION ?? '900'),
     maxDurationSeconds: parseInt(process.env.RECOURSE_MAX_SESSION_DURATION ?? '3600'),
     attestation: process.env.RECOURSE_ATTESTATION !== 'false',
diff --git a/src/mcp/gateway.ts b/src/mcp/gateway.ts
index 2d070ef..dad954b 100644
--- a/src/mcp/gateway.ts
+++ b/src/mcp/gateway.ts
@@ -20,6 +20,34 @@ import {
 import { toConsequenceJson } from '../output/consequence-json.js';
 import { getAttestationService, type AttestationService } from '../attestation/service.js';
 
+// Valid risk level values
+export type RiskLevel = 'allow' | 'warn' | 'escalate' | 'block';
+const VALID_RISK_LEVELS: RiskLevel[] = ['allow', 'warn', 'escalate', 'block'];
+
+/**
+ * Parse and validate risk levels from a comma-separated string.
+ * Invalid values are filtered out with a warning.
+ */
+export function parseRiskLevels(input: string): RiskLevel[] {
+  const levels = input.split(',').map(s => s.trim().toLowerCase());
+  const valid: RiskLevel[] = [];
+  const invalid: string[] = [];
+
+  for (const level of levels) {
+    if (VALID_RISK_LEVELS.includes(level as RiskLevel)) {
+      valid.push(level as RiskLevel);
+    } else if (level.length > 0) {
+      invalid.push(level);
+    }
+  }
+
+  if (invalid.length > 0) {
+    console.warn(`[WARN] Invalid risk levels ignored: ${invalid.join(', ')}. Valid: ${VALID_RISK_LEVELS.join(', ')}`);
+  }
+
+  return valid.length > 0 ? valid : ['allow', 'warn']; // Default if all invalid
+}
+
 // Gateway configuration
 export interface GatewayConfig {
   // Upstream MCP servers to proxy
@@ -193,7 +221,9 @@ async function evaluateToolCall(
     attestation = attestationService.createAttestation(input, jsonReport);
   }
 
-  const allowed = config.allowedRiskLevels.includes(riskAssessment as any);
+  // Validate risk assessment is a known level before checking policy
+  const isValidLevel = VALID_RISK_LEVELS.includes(riskAssessment as RiskLevel);
+  const allowed = isValidLevel && config.allowedRiskLevels.includes(riskAssessment as RiskLevel);
 
   return { allowed, report: jsonReport, attestation };
 }
@@ -458,7 +488,7 @@ export function loadGatewayConfig(configPath?: string): GatewayConfig {
   }
 
   if (process.env.RECOURSE_ALLOWED_LEVELS) {
-    defaultConfig.allowedRiskLevels = process.env.RECOURSE_ALLOWED_LEVELS.split(',') as any;
+    defaultConfig.allowedRiskLevels = parseRiskLevels(process.env.RECOURSE_ALLOWED_LEVELS);
   }
 
   return defaultConfig;
diff --git a/src/mcp/server.ts b/src/mcp/server.ts
index d69d0b1..e32d80d 100644
--- a/src/mcp/server.ts
+++ b/src/mcp/server.ts
@@ -879,6 +879,9 @@ function error(id: JsonRpcRequest['id'], code: number, message: string): Record<
   };
 }
 
+// Maximum MCP frame size (10MB) to prevent DoS
+const MAX_FRAME_SIZE = 10 * 1024 * 1024;
+
 function readFrame(buffer: Buffer): { body: Buffer; remaining: Buffer } | null {
   const headerEnd = buffer.indexOf('\r\n\r\n');
   if (headerEnd === -1) return null;
@@ -890,6 +893,12 @@ function readFrame(buffer: Buffer): { body: Buffer; remaining: Buffer } | null {
   }
 
   const contentLength = Number(match[1]);
+
+  // Validate frame size to prevent memory exhaustion
+  if (contentLength > MAX_FRAME_SIZE) {
+    throw new Error(`MCP frame size ${contentLength} exceeds maximum allowed size of ${MAX_FRAME_SIZE} bytes`);
+  }
+
   const bodyStart = headerEnd + 4;
   const frameEnd = bodyStart + contentLength;
   if (buffer.length < frameEnd) return null;
diff --git a/src/notifications/index.ts b/src/notifications/index.ts
index 57ff878..0aaf084 100644
--- a/src/notifications/index.ts
+++ b/src/notifications/index.ts
@@ -1,20 +1,19 @@
 /**
  * Notification system for RecourseOS escalations.
  *
- * Supports:
+ * Currently supported:
  * - Slack webhooks (RECOURSE_SLACK_WEBHOOK)
  * - Discord webhooks (RECOURSE_DISCORD_WEBHOOK)
- * - PagerDuty Events API (PAGERDUTY_ROUTING_KEY)
- * - Opsgenie Alerts API (OPSGENIE_API_KEY)
+ *
+ * Planned (not yet implemented):
+ * - PagerDuty Events API
+ * - Opsgenie Alerts API
  *
  * Notifications are sent automatically when risk is 'escalate' or 'block'.
  */
 
 export { sendSlackNotification, createSlackNotifier, formatSlackMessage } from './slack.js';
 export { sendDiscordNotification, createDiscordNotifier, formatDiscordMessage } from './discord.js';
-// PagerDuty and Opsgenie temporarily disabled - type fixes needed
-// export { ... } from './pagerduty.js';
-// export { ... } from './opsgenie.js';
 
 export interface ConsequenceAlert {
   riskAssessment: 'allow' | 'warn' | 'escalate' | 'block';
@@ -68,9 +67,7 @@ export function createNotifier(): Notifier | null {
 export function hasNotifications(): boolean {
   return !!(
     process.env.RECOURSE_SLACK_WEBHOOK ||
-    process.env.RECOURSE_DISCORD_WEBHOOK ||
-    process.env.PAGERDUTY_ROUTING_KEY ||
-    process.env.OPSGENIE_API_KEY
+    process.env.RECOURSE_DISCORD_WEBHOOK
   );
 }
 
diff --git a/src/policy/local.ts b/src/policy/local.ts
index 8b1ee31..21f908d 100644
--- a/src/policy/local.ts
+++ b/src/policy/local.ts
@@ -4,13 +4,20 @@ import {
   type BlastRadiusReport,
   type RecoverabilityResult,
 } from '../resources/types.js';
-import type { ConsequenceDecision } from '../core/index.js';
+import type { ConsequenceDecision, FailureMode } from '../core/index.js';
 
 export interface LocalPolicy {
   blockOn?: RecoverabilityTier;
   escalateOn?: RecoverabilityTier;
   warnOn?: RecoverabilityTier;
   requireReviewOnNeedsReview?: boolean;
+  /**
+   * How to handle evidence gathering failures (API errors, timeouts, etc.)
+   * - 'closed': Block action when evidence unavailable (safest, recommended for Pro)
+   * - 'review': Escalate to human review (default for OSS)
+   * - 'open': Allow action despite missing evidence (dangerous!)
+   */
+  failureMode?: FailureMode;
 }
 
 export interface PolicyEvaluation {
@@ -24,6 +31,7 @@ export const defaultLocalPolicy: Required<LocalPolicy> = {
   escalateOn: RecoverabilityTier.NEEDS_REVIEW,
   warnOn: RecoverabilityTier.RECOVERABLE_FROM_BACKUP,
   requireReviewOnNeedsReview: true,
+  failureMode: 'review', // Default: escalate to human when evidence unavailable
 };
 
 export function evaluateRecoverability(
diff --git a/src/state/aws/client.ts b/src/state/aws/client.ts
index ad7fa5a..9d870e2 100644
--- a/src/state/aws/client.ts
+++ b/src/state/aws/client.ts
@@ -28,13 +28,125 @@ export type AwsTransport = (
   input: AwsRequestInput & { headers: Record<string, string>; body: string }
 ) => Promise<AwsHttpResponse>;
 
+export interface RetryOptions {
+  /** Maximum number of retry attempts (default: 3) */
+  maxRetries?: number;
+  /** Base delay in ms for exponential backoff (default: 100) */
+  baseDelayMs?: number;
+  /** Maximum delay in ms (default: 5000) */
+  maxDelayMs?: number;
+  /** Whether to retry on 5xx errors (default: true) */
+  retryOn5xx?: boolean;
+  /** Whether to retry on network errors (default: true) */
+  retryOnNetworkError?: boolean;
+}
+
+const DEFAULT_RETRY_OPTIONS: Required<RetryOptions> = {
+  maxRetries: 3,
+  baseDelayMs: 100,
+  maxDelayMs: 5000,
+  retryOn5xx: true,
+  retryOnNetworkError: true,
+};
+
+/**
+ * Sleep for a given number of milliseconds.
+ */
+function sleep(ms: number): Promise<void> {
+  return new Promise(resolve => setTimeout(resolve, ms));
+}
+
+/**
+ * Calculate delay with exponential backoff and jitter.
+ */
+function calculateBackoff(attempt: number, baseDelayMs: number, maxDelayMs: number): number {
+  const exponentialDelay = baseDelayMs * Math.pow(2, attempt);
+  const jitter = Math.random() * 0.3 * exponentialDelay; // 0-30% jitter
+  return Math.min(exponentialDelay + jitter, maxDelayMs);
+}
+
+/**
+ * Check if a response should be retried.
+ */
+function shouldRetry(
+  statusCode: number,
+  options: Required<RetryOptions>
+): boolean {
+  // Network error (statusCode 0)
+  if (statusCode === 0 && options.retryOnNetworkError) {
+    return true;
+  }
+  // 5xx server errors
+  if (statusCode >= 500 && options.retryOn5xx) {
+    return true;
+  }
+  // 429 Too Many Requests
+  if (statusCode === 429) {
+    return true;
+  }
+  return false;
+}
+
 export class AwsSignedClient {
+  private readonly retryOptions: Required<RetryOptions>;
+
   constructor(
     private readonly credentials: AwsCredentials,
-    private readonly transport: AwsTransport = defaultHttpsTransport
-  ) {}
+    private readonly transport: AwsTransport = defaultHttpsTransport,
+    retryOptions: RetryOptions = {}
+  ) {
+    this.retryOptions = { ...DEFAULT_RETRY_OPTIONS, ...retryOptions };
+  }
 
   async request(input: AwsRequestInput): Promise<AwsHttpResponse> {
+    let lastError: Error | undefined;
+    let lastResponse: AwsHttpResponse | undefined;
+
+    for (let attempt = 0; attempt <= this.retryOptions.maxRetries; attempt++) {
+      try {
+        const response = await this.requestOnce(input);
+        lastResponse = response;
+
+        // Check if we should retry
+        if (attempt < this.retryOptions.maxRetries && shouldRetry(response.statusCode, this.retryOptions)) {
+          const delay = calculateBackoff(attempt, this.retryOptions.baseDelayMs, this.retryOptions.maxDelayMs);
+          await sleep(delay);
+          continue;
+        }
+
+        return response;
+      } catch (error) {
+        lastError = error instanceof Error ? error : new Error(String(error));
+
+        // Network error - retry if configured
+        if (attempt < this.retryOptions.maxRetries && this.retryOptions.retryOnNetworkError) {
+          const delay = calculateBackoff(attempt, this.retryOptions.baseDelayMs, this.retryOptions.maxDelayMs);
+          await sleep(delay);
+          continue;
+        }
+
+        // Return a response with statusCode 0 to indicate network failure
+        return {
+          statusCode: 0,
+          body: lastError.message,
+          headers: {},
+        };
+      }
+    }
+
+    // All retries exhausted
+    if (lastResponse) {
+      return lastResponse;
+    }
+
+    return {
+      statusCode: 0,
+      body: lastError?.message || 'Request failed after retries',
+      headers: {},
+    };
+  }
+
+  private async requestOnce(input: AwsRequestInput): Promise<AwsHttpResponse> {
     const body = input.body ?? '';
     const now = new Date();
     const amzDate = toAmzDate(now);
diff --git a/tests/failure-mode.test.ts b/tests/failure-mode.test.ts
new file mode 100644
index 0000000..35223ef
--- /dev/null
+++ b/tests/failure-mode.test.ts
@@ -0,0 +1,138 @@
+import { describe, it, expect } from 'vitest';
+import {
+  checkEvidenceFailures,
+  applyFailureMode,
+  DEFAULT_FAILURE_MODE,
+  PRO_DEFAULT_FAILURE_MODE,
+} from '../src/core/failure-mode.js';
+
+describe('failure-mode', () => {
+  describe('checkEvidenceFailures', () => {
+    it('returns no failures for mutations without missing evidence', () => {
+      const mutations = [
+        { missingEvidence: [], intent: { target: { id: 'bucket-1' } } },
+        { missingEvidence: [], intent: { target: { id: 'bucket-2' } } },
+      ];
+
+      const result = checkEvidenceFailures(mutations);
+
+      expect(result.hasFailures).toBe(false);
+      expect(result.failedResources).toHaveLength(0);
+      expect(result.failureReasons).toHaveLength(0);
+    });
+
+    it('detects failures when mutations have missing evidence', () => {
+      const mutations = [
+        {
+          missingEvidence: [
+            { key: 's3.versioning', description: 'Unable to verify s3.versioning' },
+          ],
+          intent: { target: { id: 'my-bucket' } },
+        },
+      ];
+
+      const result = checkEvidenceFailures(mutations);
+
+      expect(result.hasFailures).toBe(true);
+      expect(result.failedResources).toContain('my-bucket');
+      expect(result.failureReasons).toContain('Unable to verify s3.versioning');
+    });
+
+    it('deduplicates resources and reasons', () => {
+      const mutations = [
+        {
+          missingEvidence: [
+            { key: 's3.versioning', description: 'API error' },
+            { key: 's3.replication', description: 'API error' },
+          ],
+          intent: { target: { id: 'bucket-1' } },
+        },
+        {
+          missingEvidence: [{ key: 's3.lifecycle', description: 'API error' }],
+          intent: { target: { id: 'bucket-1' } },
+        },
+      ];
+
+      const result = checkEvidenceFailures(mutations);
+
+      expect(result.failedResources).toEqual(['bucket-1']);
+      expect(result.failureReasons).toEqual(['API error']);
+    });
+  });
+
+  describe('applyFailureMode', () => {
+    const failureCheck = {
+      hasFailures: true,
+      failedResources: ['my-bucket'],
+      failureReasons: ['Network timeout'],
+    };
+
+    const noFailures = {
+      hasFailures: false,
+      failedResources: [],
+      failureReasons: [],
+    };
+
+    it('returns original decision when no failures', () => {
+      const result = applyFailureMode('allow', 'All good', noFailures, 'closed');
+
+      expect(result.decision).toBe('allow');
+      expect(result.reason).toBe('All good');
+    });
+
+    describe('fail-closed mode', () => {
+      it('blocks when evidence is unavailable', () => {
+        const result = applyFailureMode('allow', 'Original', failureCheck, 'closed');
+
+        expect(result.decision).toBe('block');
+        expect(result.reason).toContain('FAIL-CLOSED');
+        expect(result.reason).toContain('my-bucket');
+      });
+
+      it('blocks even if original decision was escalate', () => {
+        const result = applyFailureMode('escalate', 'Original', failureCheck, 'closed');
+
+        expect(result.decision).toBe('block');
+      });
+    });
+
+    describe('fail-review mode', () => {
+      it('escalates allow to review when evidence unavailable', () => {
+        const result = applyFailureMode('allow', 'Original', failureCheck, 'review');
+
+        expect(result.decision).toBe('escalate');
+        expect(result.reason).toContain('FAIL-REVIEW');
+      });
+
+      it('escalates warn to review', () => {
+        const result = applyFailureMode('warn', 'Original', failureCheck, 'review');
+
+        expect(result.decision).toBe('escalate');
+      });
+
+      it('does not downgrade existing escalate/block', () => {
+        expect(applyFailureMode('escalate', 'Original', failureCheck, 'review').decision).toBe('escalate');
+        expect(applyFailureMode('block', 'Original', failureCheck, 'review').decision).toBe('block');
+      });
+    });
+
+    describe('fail-open mode', () => {
+      it('keeps original decision but adds warning', () => {
+        const result = applyFailureMode('allow', 'Original', failureCheck, 'open');
+
+        expect(result.decision).toBe('allow');
+        expect(result.reason).toContain('FAIL-OPEN WARNING');
+      });
+    });
+  });
+
+  describe('default failure modes', () => {
+    it('OSS default is review', () => {
+      expect(DEFAULT_FAILURE_MODE).toBe('review');
+    });
+
+    it('Pro default is closed', () => {
+      expect(PRO_DEFAULT_FAILURE_MODE).toBe('closed');
+    });
+  });
+});
diff --git a/tests/timing.test.ts b/tests/timing.test.ts
new file mode 100644
index 0000000..1480a80
--- /dev/null
+++ b/tests/timing.test.ts
@@ -0,0 +1,96 @@
+import { describe, it, expect } from 'vitest';
+import {
+  EvaluationTimer,
+  SLA_TARGETS,
+  formatTiming,
+} from '../src/core/timing.js';
+
+describe('timing', () => {
+  describe('SLA_TARGETS', () => {
+    it('defines targets for all evaluation types', () => {
+      expect(SLA_TARGETS.localEvaluation).toBe(10);
+      expect(SLA_TARGETS.remoteEvaluation).toBe(500);
+      expect(SLA_TARGETS.planEvaluation).toBe(2000);
+      expect(SLA_TARGETS.shellEvaluation).toBe(5);
+      expect(SLA_TARGETS.mcpEvaluation).toBe(10);
+    });
+  });
+
+  describe('EvaluationTimer', () => {
+    it('tracks total time', () => {
+      const timer = new EvaluationTimer('localEvaluation');
+      const timing = timer.finish();
+
+      expect(timing.totalMs).toBeGreaterThanOrEqual(0);
+      expect(timing.slaTarget).toBe('localEvaluation');
+      expect(timing.slaTargetMs).toBe(10);
+    });
+
+    it('tracks phases', () => {
+      const timer = new EvaluationTimer();
+      timer.startPhase('parse');
+      timer.endPhase('parse');
+      timer.startPhase('analysis');
+      timer.endPhase('analysis');
+
+      const timing = timer.finish();
+
+      expect(timing.parseMs).toBeDefined();
+      expect(timing.analysisMs).toBeDefined();
+      expect(timing.parseMs).toBeGreaterThanOrEqual(0);
+      expect(timing.analysisMs).toBeGreaterThanOrEqual(0);
+    });
+
+    it('reports SLA compliance', () => {
+      const timer = new EvaluationTimer('localEvaluation');
+      const timing = timer.finish();
+
+      // Local evaluation should be fast enough to meet SLA
+      expect(timing.metSla).toBe(true);
+    });
+
+    it('allows changing SLA target', () => {
+      const timer = new EvaluationTimer('localEvaluation');
+      timer.setSlaTarget('planEvaluation');
+      const timing = timer.finish();
+
+      expect(timing.slaTarget).toBe('planEvaluation');
+      expect(timing.slaTargetMs).toBe(2000);
+    });
+  });
+
+  describe('formatTiming', () => {
+    it('formats timing with checkmark when SLA met', () => {
+      const timing = {
+        totalMs: 5.5,
+        parseMs: 1.2,
+        analysisMs: 4.3,
+        metSla: true,
+        slaTarget: 'localEvaluation' as const,
+        slaTargetMs: 10,
+      };
+
+      const formatted = formatTiming(timing);
+
+      expect(formatted).toContain('✓');
+      expect(formatted).toContain('5.5ms');
+      expect(formatted).toContain('parse=1.2ms');
+      expect(formatted).toContain('analysis=4.3ms');
+      expect(formatted).toContain('target: 10ms');
+    });
+
+    it('formats timing with warning when SLA missed', () => {
+      const timing = {
+        totalMs: 150.0,
+        metSla: false,
+        slaTarget: 'localEvaluation' as const,
+        slaTargetMs: 10,
+      };
+
+      const formatted = formatTiming(timing);
+
+      expect(formatted).toContain('⚠');
+      expect(formatted).toContain('150.0ms');
+    });
+  });
+});
diff --git a/vitest.config.ts b/vitest.config.ts
index a37182f..6926412 100644
--- a/vitest.config.ts
+++ b/vitest.config.ts
@@ -3,5 +3,25 @@ import { configDefaults, defineConfig } from 'vitest/config';
 export default defineConfig({
   test: {
     exclude: [...configDefaults.exclude, '**/*visual.spec.ts'],
+    coverage: {
+      provider: 'v8',
+      reporter: ['text', 'json', 'html'],
+      include: ['src/**/*.ts'],
+      exclude: [
+        'src/**/*.d.ts',
+        'src/**/types.ts',
+        'src/tools/**',
+        'src/index.ts',
+      ],
+      thresholds: {
+        // Minimum coverage thresholds (fail CI if below these)
+        // Current baseline (May 2026): ~54% lines, ~50% branches
+        // These prevent regression; increase targets as coverage improves
+        lines: 50,
+        functions: 50,
+        branches: 45,
+        statements: 50,
+      },
+    },
   },
 });