diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index baac125..69d3cfc 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -37,6 +37,9 @@ jobs: - name: Test run: npm run test:all + - name: Test Coverage + run: npm run test:coverage + - name: Install Playwright Chromium run: npx playwright install --with-deps chromium diff --git a/docs/multi-agent-scaling-roadmap.md b/docs/multi-agent-scaling-roadmap.md new file mode 100644 index 0000000..3ac49ac --- /dev/null +++ b/docs/multi-agent-scaling-roadmap.md @@ -0,0 +1,315 @@ +# Multi-Agent Scaling Roadmap + +**Feature: RecourseOS Pro — Multi-Agent Support (10+ concurrent agents)** + +This document outlines the technical roadmap for scaling RecourseOS to support 10+ concurrent AI agents. This is a premium/enterprise feature. + +--- + +## Current State + +| Metric | Current Capacity | Target (Pro) | +|--------|------------------|--------------| +| Concurrent evaluations | 3/sec | 50+/sec | +| Max agents | 1-3 | 100+ | +| State lookup latency | 200-500ms | <50ms | +| Request latency under load | >5s | <500ms | + +**Bottlenecks identified:** +- Synchronous blast-radius evaluation blocks event loop +- No connection pooling for AWS/GCP/Azure API calls +- No state caching — every evaluation re-fetches +- In-memory attestations don't scale across processes +- No request queuing or backpressure + +--- + +## Phase 1: Core Parallelization (Week 1-2) + +### 1.1 Async Evaluation Pipeline +**Priority: Critical** + +Move blast-radius analysis off the main thread. + +``` +src/evaluator/terraform.ts +src/evaluator/shell.ts +src/evaluator/mcp.ts +``` + +**Changes:** +- [ ] Wrap `evaluateBlastRadius()` in Worker thread pool +- [ ] Use `piscina` or `workerpool` for thread management +- [ ] Max workers = CPU cores - 1 +- [ ] Fallback to async queue if workers exhausted + +**Impact:** 3 evals/sec → 15-20 evals/sec per process + +### 1.2 Request Queue with Backpressure +**Priority: Critical** + +``` +src/mcp/server.ts +src/http/server.ts +``` + +**Changes:** +- [ ] Add `p-queue` with concurrency limit +- [ ] Implement circuit breaker for downstream failures +- [ ] Add queue depth metrics +- [ ] Return 429 when queue exceeds threshold + +**Config:** +```typescript +{ + maxConcurrency: 10, + maxQueueSize: 100, + queueTimeout: 30000 +} +``` + +--- + +## Phase 2: Connection & Caching (Week 2-3) + +### 2.1 AWS SDK v3 Migration +**Priority: High** + +Replace custom `AwsSignedClient` with official SDK. + +``` +src/state/aws/*.ts +``` + +**Changes:** +- [ ] Migrate to `@aws-sdk/client-*` packages +- [ ] Enable HTTP/2 multiplexing +- [ ] Configure keep-alive connection pooling +- [ ] Add retry with exponential backoff + +**Impact:** 50 connections → 5-10 reused connections + +### 2.2 State Cache Layer +**Priority: High** + +Add Redis-backed cache for state lookups. + +``` +NEW: src/cache/state-cache.ts +NEW: src/cache/redis-client.ts +``` + +**Cache strategy:** +| Resource Type | TTL | Cache Key | +|--------------|-----|-----------| +| S3 bucket | 60s | `s3:{region}:{bucket}` | +| RDS instance | 30s | `rds:{region}:{instanceId}` | +| EC2 instance | 30s | `ec2:{region}:{instanceId}` | +| IAM role | 300s | `iam:{accountId}:{roleName}` | + +**Changes:** +- [ ] Add `ioredis` dependency +- [ ] Implement cache-aside pattern +- [ ] Add cache hit/miss metrics +- [ ] Support cache invalidation via MCP tool + +**Impact:** ~70% cache hit rate → 2-3x throughput + +### 2.3 GCP/Azure Parity +**Priority: Medium** + +Apply same optimizations to other cloud providers. + +``` +src/state/gcp/*.ts +src/state/azure/*.ts +``` + +--- + +## Phase 3: Distributed Architecture (Week 3-4) + +### 3.1 Redis-Backed Attestations +**Priority: High** + +Replace in-memory + file attestation storage. + +``` +src/attestation/service.ts +NEW: src/attestation/redis-store.ts +``` + +**Changes:** +- [ ] Implement `AttestationStore` interface +- [ ] Add Redis implementation with TTL +- [ ] Support cross-process attestation lookups +- [ ] Add attestation replication for HA + +**Schema:** +``` +attestation:{id} → JSON(Attestation) +attestation:by-resource:{resourceId} → SET(attestationIds) +``` + +### 3.2 Horizontal Scaling +**Priority: Medium** + +Support multi-process deployment. + +``` +NEW: src/cluster.ts +``` + +**Changes:** +- [ ] Add Node.js cluster mode support +- [ ] Sticky sessions for MCP connections +- [ ] Shared Redis for cross-process state +- [ ] PM2 ecosystem config + +**Deployment options:** +- Single node: 4-8 workers via cluster +- Multi-node: Load balancer + Redis + +### 3.3 Connection Multiplexing +**Priority: Medium** + +Single MCP connection serving multiple agents. + +``` +src/mcp/server.ts +NEW: src/mcp/multiplexer.ts +``` + +**Changes:** +- [ ] Add agent ID to request context +- [ ] Route responses to correct agent +- [ ] Per-agent rate limiting +- [ ] Agent isolation (no cross-contamination) + +--- + +## Phase 4: Observability & Hardening (Week 4-5) + +### 4.1 Metrics & Monitoring +**Priority: High** + +``` +NEW: src/metrics/prometheus.ts +``` + +**Metrics to track:** +- `recourse_evaluation_duration_seconds` (histogram) +- `recourse_queue_depth` (gauge) +- `recourse_cache_hit_ratio` (gauge) +- `recourse_active_agents` (gauge) +- `recourse_state_lookup_duration_seconds` (histogram) + +### 4.2 Rate Limiting +**Priority: High** + +Per-agent and global rate limits. + +``` +NEW: src/ratelimit/index.ts +``` + +**Tiers:** +| Plan | Evals/min | Agents | State lookups/min | +|------|-----------|--------|-------------------| +| Free | 10 | 1 | 100 | +| Pro | 500 | 25 | 5000 | +| Enterprise | Unlimited | 100+ | Unlimited | + +### 4.3 Graceful Degradation +**Priority: Medium** + +- [ ] Timeout slow evaluations (30s max) +- [ ] Fallback to cached state on API failures +- [ ] Circuit breaker for cloud provider APIs +- [ ] Health check endpoints + +--- + +## Phase 5: Enterprise Features (Week 5-6) + +### 5.1 Multi-Tenancy +**Priority: Enterprise** + +``` +NEW: src/tenant/index.ts +``` + +- [ ] Tenant isolation +- [ ] Per-tenant Redis namespaces +- [ ] Tenant-specific rate limits +- [ ] Usage metering per tenant + +### 5.2 Streaming Evaluations +**Priority: Enterprise** + +For large Terraform plans (1000+ resources). + +- [ ] Stream changes instead of buffering +- [ ] Incremental blast-radius updates +- [ ] Progress callbacks to agents + +### 5.3 Audit Logging +**Priority: Enterprise** + +- [ ] Structured audit log for all evaluations +- [ ] S3/CloudWatch export +- [ ] Compliance reporting + +--- + +## Dependencies + +```json +{ + "piscina": "^4.0.0", + "p-queue": "^7.0.0", + "ioredis": "^5.3.0", + "@aws-sdk/client-s3": "^3.500.0", + "@aws-sdk/client-rds": "^3.500.0", + "@aws-sdk/client-ec2": "^3.500.0", + "prom-client": "^15.0.0" +} +``` + +--- + +## Milestones + +| Milestone | Target | Deliverable | +|-----------|--------|-------------| +| M1: Parallel Eval | Week 2 | 15+ evals/sec single process | +| M2: Cached State | Week 3 | <50ms state lookups | +| M3: Multi-Process | Week 4 | 50+ evals/sec cluster | +| M4: Pro Beta | Week 5 | 25 agent support | +| M5: Enterprise GA | Week 6 | 100+ agent support | + +--- + +## Pricing Considerations + +| Feature | Free | Pro | Enterprise | +|---------|------|-----|------------| +| Concurrent agents | 1 | 25 | 100+ | +| Evaluations/month | 1,000 | 50,000 | Unlimited | +| State caching | No | Yes | Yes | +| Redis (managed) | No | Included | Dedicated | +| SLA | None | 99.5% | 99.9% | +| Support | Community | Email | Dedicated | + +--- + +## Open Questions + +1. **Redis hosting**: Managed (Upstash/Redis Cloud) vs self-hosted? +2. **Pricing model**: Per-agent vs per-evaluation vs flat tier? +3. **Agent authentication**: API keys per agent or per org? +4. **Data residency**: Regional Redis deployments needed? + +--- + +*Last updated: 2026-05-10* diff --git a/package.json b/package.json index d580cf6..27c3bea 100644 --- a/package.json +++ b/package.json @@ -14,6 +14,7 @@ "pack:check": "npm pack --dry-run --cache .npm-cache", "dev": "tsc --watch", "test": "vitest", + "test:coverage": "vitest run --coverage", "test:cli": "npm run build && vitest --run tests/cli-scenarios.test.ts", "test:all": "npm run build && vitest --run && vitest --run tests/cli-scenarios.test.ts", "test:aws-live": "RUN_AWS_LIVE_TESTS=1 vitest --run tests/aws-live.test.ts", diff --git a/src/analyzer/blast-radius.ts b/src/analyzer/blast-radius.ts index 611bd8b..ae18d10 100644 --- a/src/analyzer/blast-radius.ts +++ b/src/analyzer/blast-radius.ts @@ -24,7 +24,7 @@ export function analyzeBlastRadius( state: TerraformState | null, options: AnalyzeOptions = {} ): BlastRadiusReport { - const { includeNonDestructive = true, useClassifier = false } = options; + const { includeNonDestructive = true, useClassifier = true } = options; // Get changes to analyze let changes: ResourceChange[]; diff --git a/src/analyzer/cross-action-patterns.ts b/src/analyzer/cross-action-patterns.ts index e52fd9f..80e0233 100644 --- a/src/analyzer/cross-action-patterns.ts +++ b/src/analyzer/cross-action-patterns.ts @@ -113,8 +113,10 @@ function detectBackupRelationship( } } - // TODO: State lookup for additional relationships - // TODO: Naming convention detection (low confidence) + // Future enhancements (not blocking for v1): + // - State lookup: Query live state for snapshot->instance relationships not in plan + // - Naming convention: Detect patterns like "mydb-snapshot" -> "mydb" (low confidence) + // These would improve detection but explicit_reference covers the critical cases. return null; } diff --git a/src/attestation/service.ts b/src/attestation/service.ts index efebe9a..3d04d6e 100644 --- a/src/attestation/service.ts +++ b/src/attestation/service.ts @@ -95,10 +95,24 @@ export class AttestationService { constructor(config: AttestationServiceConfig = {}) { this.configDir = config.configDir ?? join(homedir(), '.recourse'); this.instanceId = config.instanceId ?? 'recourse-local'; - this.instanceBaseUrl = config.instanceBaseUrl ?? 'http://localhost:3001'; this.evaluatorVersion = config.evaluatorVersion ?? '1.0.0'; this.registry = createRegistry(); this.attestationsDir = join(this.configDir, 'attestations'); + + // Resolve instance base URL with production validation + const envUrl = process.env.RECOURSE_INSTANCE_URL; + const defaultUrl = 'http://localhost:3001'; + this.instanceBaseUrl = config.instanceBaseUrl ?? envUrl ?? defaultUrl; + + // Warn if using localhost in production mode + const isProduction = process.env.NODE_ENV === 'production'; + const isLocalhost = this.instanceBaseUrl.includes('localhost') || this.instanceBaseUrl.includes('127.0.0.1'); + if (isProduction && isLocalhost) { + console.warn( + '[WARN] Attestation service using localhost URL in production mode. ' + + 'Set RECOURSE_INSTANCE_URL to a publicly accessible URL for valid attestations.' + ); + } } /** @@ -256,9 +270,10 @@ export class AttestationService { encoding: 'utf8', mode: 0o644, }); - } catch { - // Silently fail - disk persistence is best-effort - // Memory storage is still available for this process + } catch (err) { + // Log persistence failures - audit trail should not silently disappear + const message = err instanceof Error ? err.message : String(err); + console.warn(`[WARN] Failed to persist attestation ${id}: ${message}. Memory storage only.`); } } diff --git a/src/cli.ts b/src/cli.ts index d8a2b54..de30599 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -15,6 +15,7 @@ import { evaluateTerraformPlanConsequences, } from './evaluator/index.js'; import type { McpToolCall } from './adapters/index.js'; +import { parseRiskLevels } from './mcp/gateway.js'; import { analyzeDynamoDbTableDeletionEvidence, analyzeIamRoleDeletionEvidence, @@ -189,7 +190,7 @@ mcp } if (options.allow) { - config.allowedRiskLevels = options.allow.split(',') as any; + config.allowedRiskLevels = parseRiskLevels(options.allow); } if (options.verbose !== undefined) { @@ -935,7 +936,23 @@ program } }); +/** + * Execute a shell command after RecourseOS evaluation. + * + * SECURITY NOTE: shell:true is intentional here because: + * 1. This is the `recourse exec` CLI command where user explicitly provides a command + * 2. Commands may include shell features (pipes, redirections, etc.) + * 3. The command is validated by RecourseOS before execution + * + * This is NOT used for programmatic/untrusted input. + */ function executeCommand(command: string): void { + // Validate command is a non-empty string + if (typeof command !== 'string' || command.trim().length === 0) { + console.error('Invalid command: must be a non-empty string'); + process.exit(1); + } + const child = spawn(command, { shell: true, stdio: 'inherit', diff --git a/src/core/consequence.ts b/src/core/consequence.ts index b2789b8..98aecb0 100644 --- a/src/core/consequence.ts +++ b/src/core/consequence.ts @@ -3,6 +3,7 @@ import type { DependencyImpact, EvidenceItem, MissingEvidence, MutationIntent, V import type { EvidenceRequirementLevel, EvidenceSufficiency } from './state-schema.js'; import type { CrossActionRisk } from '../analyzer/cross-action.js'; import type { ReasoningTrace, VerificationInstructions } from '../evaluator/trace.js'; +import type { EvaluationTiming } from './timing.js'; export type ConsequenceDecision = 'allow' | 'warn' | 'block' | 'escalate'; @@ -135,4 +136,10 @@ export interface ConsequenceReport { * Includes CLI commands and API calls a verifier can run. */ verification?: VerificationInstructions; + + /** + * Performance timing metrics for this evaluation. + * Includes total time, phase breakdown, and SLA compliance. + */ + timing?: EvaluationTiming; } diff --git a/src/core/failure-mode.ts b/src/core/failure-mode.ts new file mode 100644 index 0000000..c9d7e38 --- /dev/null +++ b/src/core/failure-mode.ts @@ -0,0 +1,125 @@ +/** + * Failure mode configuration for RecourseOS evaluations. + * + * Determines how RecourseOS behaves when state lookups fail + * (network errors, API timeouts, permission denied, etc.) + */ + +/** + * Failure mode options: + * - 'closed': Block the action when evidence cannot be gathered (safest) + * - 'review': Escalate to human review when evidence unavailable (default) + * - 'open': Allow the action despite missing evidence (dangerous!) + */ +export type FailureMode = 'closed' | 'review' | 'open'; + +/** + * Result of checking for evidence failures in a consequence report. + */ +export interface EvidenceFailureCheck { + /** Whether any evidence gathering failed */ + hasFailures: boolean; + /** List of resources with missing evidence */ + failedResources: string[]; + /** Reasons for failures */ + failureReasons: string[]; +} + +/** + * Default failure modes by deployment context. + * - OSS/self-hosted: 'review' (escalate to human) + * - Pro/managed: 'closed' (fail-safe) + * - Explicit override: user's choice + */ +export const DEFAULT_FAILURE_MODE: FailureMode = 'review'; +export const PRO_DEFAULT_FAILURE_MODE: FailureMode = 'closed'; + +/** + * Check if a consequence report has evidence failures that should + * trigger failure mode handling. + */ +export function checkEvidenceFailures( + mutations: Array<{ + missingEvidence?: Array<{ key: string; description?: string }>; + intent?: { target?: { id?: string } }; + }> +): EvidenceFailureCheck { + const failedResources: string[] = []; + const failureReasons: string[] = []; + + for (const mutation of mutations) { + if (mutation.missingEvidence && mutation.missingEvidence.length > 0) { + const resourceId = mutation.intent?.target?.id || 'unknown'; + failedResources.push(resourceId); + + for (const missing of mutation.missingEvidence) { + failureReasons.push( + missing.description || `Missing evidence: ${missing.key}` + ); + } + } + } + + return { + hasFailures: failedResources.length > 0, + failedResources: Array.from(new Set(failedResources)), + failureReasons: Array.from(new Set(failureReasons)), + }; +} + +/** + * Apply failure mode to a consequence decision. + * + * @param currentDecision - The decision from normal policy evaluation + * @param failureCheck - Result of evidence failure check + * @param failureMode - The configured failure mode + * @returns The potentially modified decision and reason + */ +export function applyFailureMode( + currentDecision: 'allow' | 'warn' | 'escalate' | 'block', + currentReason: string, + failureCheck: EvidenceFailureCheck, + failureMode: FailureMode +): { decision: 'allow' | 'warn' | 'escalate' | 'block'; reason: string } { + // No failures, return original decision + if (!failureCheck.hasFailures) { + return { decision: currentDecision, reason: currentReason }; + } + + const failureContext = `Evidence unavailable for: ${failureCheck.failedResources.join(', ')}`; + + switch (failureMode) { + case 'closed': + // Fail-closed: always block when evidence is missing + return { + decision: 'block', + reason: `[FAIL-CLOSED] ${failureContext}. Action blocked due to inability to verify safety.`, + }; + + case 'review': + // Fail-review: escalate to human (current default behavior) + // Only upgrade if current decision is less severe than escalate + if (currentDecision === 'allow' || currentDecision === 'warn') { + return { + decision: 'escalate', + reason: `[FAIL-REVIEW] ${failureContext}. Human review required.`, + }; + } + return { decision: currentDecision, reason: currentReason }; + + case 'open': + // Fail-open: allow despite missing evidence (dangerous!) + // Log a warning but don't change the decision + return { + decision: currentDecision, + reason: `[FAIL-OPEN WARNING] ${failureContext}. Proceeding without complete evidence.`, + }; + + default: + // Unknown mode, fail safe + return { + decision: 'escalate', + reason: `Unknown failure mode. ${failureContext}`, + }; + } +} diff --git a/src/core/index.ts b/src/core/index.ts index 418b219..a15abb5 100644 --- a/src/core/index.ts +++ b/src/core/index.ts @@ -72,3 +72,28 @@ export { getRegisteredResourceTypes, hasEvidenceRequirements, } from './evidence-requirements.js'; + +// Failure mode handling +export type { + FailureMode, + EvidenceFailureCheck, +} from './failure-mode.js'; + +export { + DEFAULT_FAILURE_MODE, + PRO_DEFAULT_FAILURE_MODE, + checkEvidenceFailures, + applyFailureMode, +} from './failure-mode.js'; + +// Performance timing +export type { + EvaluationTiming, + SLATarget, +} from './timing.js'; + +export { + SLA_TARGETS, + EvaluationTimer, + formatTiming, +} from './timing.js'; diff --git a/src/core/timing.ts b/src/core/timing.ts new file mode 100644 index 0000000..fc9b1b5 --- /dev/null +++ b/src/core/timing.ts @@ -0,0 +1,142 @@ +/** + * Performance timing utilities for RecourseOS evaluations. + * + * Used to measure latency and enforce SLA targets. + */ + +/** + * SLA targets for different operation types (in milliseconds). + * + * These are p95 targets - 95% of evaluations should complete within these times. + */ +export const SLA_TARGETS = { + /** Single resource evaluation without network calls */ + localEvaluation: 10, + /** Single resource evaluation with AWS state lookup */ + remoteEvaluation: 500, + /** Full plan evaluation (up to 50 resources) */ + planEvaluation: 2000, + /** Shell command parsing and evaluation */ + shellEvaluation: 5, + /** MCP tool call evaluation */ + mcpEvaluation: 10, +} as const; + +export type SLATarget = keyof typeof SLA_TARGETS; + +/** + * Timing metrics for an evaluation. + */ +export interface EvaluationTiming { + /** Total wall-clock time in milliseconds */ + totalMs: number; + /** Time spent in parsing/preparation */ + parseMs?: number; + /** Time spent in blast radius analysis */ + analysisMs?: number; + /** Time spent in policy evaluation */ + policyMs?: number; + /** Time spent waiting for remote state lookups */ + remoteMs?: number; + /** Whether the evaluation met its SLA target */ + metSla: boolean; + /** The SLA target used for comparison */ + slaTarget: SLATarget; + /** The target time in milliseconds */ + slaTargetMs: number; +} + +/** + * Timer for tracking evaluation performance. + */ +export class EvaluationTimer { + private startTime: number; + private phases: Map = new Map(); + private slaTarget: SLATarget; + + constructor(slaTarget: SLATarget = 'localEvaluation') { + this.startTime = performance.now(); + this.slaTarget = slaTarget; + } + + /** + * Start timing a phase. + */ + startPhase(name: string): void { + this.phases.set(name, { start: performance.now() }); + } + + /** + * End timing a phase. + */ + endPhase(name: string): number { + const phase = this.phases.get(name); + if (!phase) { + return 0; + } + phase.end = performance.now(); + return phase.end - phase.start; + } + + /** + * Get the duration of a completed phase. + */ + getPhaseMs(name: string): number | undefined { + const phase = this.phases.get(name); + if (!phase || phase.end === undefined) { + return undefined; + } + return phase.end - phase.start; + } + + /** + * Set the SLA target for this evaluation. + */ + setSlaTarget(target: SLATarget): void { + this.slaTarget = target; + } + + /** + * Complete timing and return metrics. + */ + finish(): EvaluationTiming { + const totalMs = performance.now() - this.startTime; + const slaTargetMs = SLA_TARGETS[this.slaTarget]; + + return { + totalMs: Math.round(totalMs * 100) / 100, + parseMs: this.getPhaseMs('parse'), + analysisMs: this.getPhaseMs('analysis'), + policyMs: this.getPhaseMs('policy'), + remoteMs: this.getPhaseMs('remote'), + metSla: totalMs <= slaTargetMs, + slaTarget: this.slaTarget, + slaTargetMs, + }; + } +} + +/** + * Format timing metrics for human-readable output. + */ +export function formatTiming(timing: EvaluationTiming): string { + const status = timing.metSla ? '✓' : '⚠'; + const parts = [`${status} ${timing.totalMs.toFixed(1)}ms`]; + + if (timing.parseMs !== undefined) { + parts.push(`parse=${timing.parseMs.toFixed(1)}ms`); + } + if (timing.analysisMs !== undefined) { + parts.push(`analysis=${timing.analysisMs.toFixed(1)}ms`); + } + if (timing.policyMs !== undefined) { + parts.push(`policy=${timing.policyMs.toFixed(1)}ms`); + } + if (timing.remoteMs !== undefined) { + parts.push(`remote=${timing.remoteMs.toFixed(1)}ms`); + } + + parts.push(`(target: ${timing.slaTargetMs}ms ${timing.slaTarget})`); + + return parts.join(' | '); +} diff --git a/src/evaluator/mcp.ts b/src/evaluator/mcp.ts index 70149e6..8594ff2 100644 --- a/src/evaluator/mcp.ts +++ b/src/evaluator/mcp.ts @@ -13,6 +13,7 @@ import { buildRequiredEvidence, getEvidenceRequirements, DEFAULT_UNKNOWN_REQUIREMENTS, + EvaluationTimer, } from '../core/index.js'; import { RecoverabilityLabels, @@ -62,7 +63,13 @@ export function evaluateMcpToolCallConsequences( call: McpToolCall, options: McpConsequenceOptions = {} ): ConsequenceReport { + const timer = new EvaluationTimer('mcpEvaluation'); + timer.startPhase('parse'); + const intent = mcpToolCallToMutation(call, options.adapterContext); + timer.endPhase('parse'); + + timer.startPhase('analysis'); const s3Analysis = getS3Analysis(intent, options.awsEvidence?.s3Buckets); const rdsAnalysis = getRdsAnalysis(intent, options.awsEvidence?.rdsInstances); const dynamoDbAnalysis = getDynamoDbAnalysis(intent, options.awsEvidence?.dynamoDbTables); @@ -105,6 +112,9 @@ export function evaluateMcpToolCallConsequences( // Build required evidence for the consequence report const requiredEvidence = buildRequiredEvidenceForIntent(intent, mutation.evidence); + timer.endPhase('analysis'); + const timing = timer.finish(); + return { mutations: [mutation], summary: { @@ -117,6 +127,7 @@ export function evaluateMcpToolCallConsequences( riskAssessment: policyEvaluation.decision, assessmentReason: policyEvaluation.reason, requiredEvidence, + timing, }; } diff --git a/src/evaluator/shell.ts b/src/evaluator/shell.ts index 0a5571f..19b4404 100644 --- a/src/evaluator/shell.ts +++ b/src/evaluator/shell.ts @@ -6,6 +6,7 @@ import type { ConsequenceReport, MutationIntent, } from '../core/index.js'; +import { EvaluationTimer } from '../core/index.js'; import { RecoverabilityLabels, RecoverabilityTier, @@ -44,6 +45,9 @@ export function evaluateShellCommandConsequences( input: ShellCommandInput | string, options: ShellConsequenceOptions = {} ): ConsequenceReport { + const timer = new EvaluationTimer('shellEvaluation'); + timer.startPhase('parse'); + const command = typeof input === 'string' ? input : input.command; const intent = shellCommandToMutation(command, { ...options.adapterContext, @@ -52,7 +56,9 @@ export function evaluateShellCommandConsequences( cwd: typeof input === 'string' ? undefined : input.cwd, }, }); + timer.endPhase('parse'); + timer.startPhase('analysis'); const s3Analysis = getS3Analysis(intent, options.awsEvidence?.s3Buckets); const rdsAnalysis = getRdsAnalysis(intent, options.awsEvidence?.rdsInstances); const dynamoDbAnalysis = getDynamoDbAnalysis(intent, options.awsEvidence?.dynamoDbTables); @@ -92,6 +98,9 @@ export function evaluateShellCommandConsequences( dependencyImpact: [], }; + timer.endPhase('analysis'); + const timing = timer.finish(); + return { mutations: [mutation], summary: { @@ -103,6 +112,7 @@ export function evaluateShellCommandConsequences( }, riskAssessment: policyEvaluation.decision, assessmentReason: policyEvaluation.reason, + timing, }; } diff --git a/src/evaluator/terraform.ts b/src/evaluator/terraform.ts index aca0712..ec00f73 100644 --- a/src/evaluator/terraform.ts +++ b/src/evaluator/terraform.ts @@ -16,6 +16,14 @@ import type { VerificationStatusInfo, RequiredEvidence, EvidenceItem, + FailureMode, +} from '../core/index.js'; +import { + checkEvidenceFailures, + applyFailureMode, + DEFAULT_FAILURE_MODE, + EvaluationTimer, + type EvaluationTiming, } from '../core/index.js'; import { TraceBuilder, @@ -53,6 +61,10 @@ export function evaluateTerraformPlanConsequences( state: TerraformState | null, options: TerraformConsequenceOptions = {} ): ConsequenceReport { + // Initialize timing + const timer = new EvaluationTimer('planEvaluation'); + timer.startPhase('parse'); + // Initialize trace capture const trace = new TraceBuilder(); trace.source('terraform-plan'); @@ -61,7 +73,9 @@ export function evaluateTerraformPlanConsequences( } trace.step('parse_input', `Parsed Terraform plan with ${plan.resourceChanges?.length ?? 0} resource changes`); + timer.endPhase('parse'); + timer.startPhase('analysis'); const blastRadiusReport = analyzeBlastRadius(plan, state, { useClassifier: options.useClassifier, }); @@ -69,7 +83,9 @@ export function evaluateTerraformPlanConsequences( trace.step('analyze_blast_radius', `Analyzed ${blastRadiusReport.changes.length} changes`, { decision: `total_changes=${blastRadiusReport.summary.totalChanges}, has_unrecoverable=${blastRadiusReport.summary.hasUnrecoverable}`, }); + timer.endPhase('analysis'); + timer.startPhase('policy'); const policyEvaluation = evaluateBlastRadiusReport( blastRadiusReport, options.policy @@ -274,6 +290,36 @@ export function evaluateTerraformPlanConsequences( ) : undefined; + // Check for evidence failures and apply failure mode + const failureCheck = checkEvidenceFailures(mutations); + const effectiveFailureMode = options.policy?.failureMode ?? DEFAULT_FAILURE_MODE; + + let finalDecision = finalPolicyEvaluation.decision; + let finalReason = hasUnrecoverableFromCrossAction + ? `${finalPolicyEvaluation.reason} (cross-action risk detected)` + : finalPolicyEvaluation.reason; + + // Apply failure mode if there are evidence failures + if (failureCheck.hasFailures) { + const failureModeResult = applyFailureMode( + finalDecision, + finalReason, + failureCheck, + effectiveFailureMode + ); + finalDecision = failureModeResult.decision; + finalReason = failureModeResult.reason; + + trace.step('failure_mode_check', `Evidence failures detected, applying ${effectiveFailureMode} mode`, { + decision: `failed_resources=${failureCheck.failedResources.length}, mode=${effectiveFailureMode}, result=${finalDecision}`, + }); + } + + timer.endPhase('policy'); + + // Finalize timing + const timing = timer.finish(); + const report: ConsequenceReport = { mutations, summary: { @@ -283,15 +329,15 @@ export function evaluateTerraformPlanConsequences( hasUnrecoverable, dependencyImpactCount: blastRadiusReport.summary.cascadeImpactCount, }, - riskAssessment: finalPolicyEvaluation.decision, - assessmentReason: hasUnrecoverableFromCrossAction - ? `${finalPolicyEvaluation.reason} (cross-action risk detected)` - : finalPolicyEvaluation.reason, + riskAssessment: finalDecision, + assessmentReason: finalReason, // Always include cross-action risks (empty array if none detected) crossActionRisks, // Attestation richness fields trace: trace.build(), verification: verificationInstructions, + // Performance timing + timing, }; // Add verification protocol fields diff --git a/src/http/server.ts b/src/http/server.ts index 1d31705..fdfedd8 100644 --- a/src/http/server.ts +++ b/src/http/server.ts @@ -2,7 +2,7 @@ import { createServer, type IncomingMessage, type ServerResponse } from 'http'; import { readFileSync, existsSync } from 'fs'; import { join, extname } from 'path'; import { fileURLToPath } from 'url'; -import { exec } from 'child_process'; +import { execFile } from 'child_process'; import { parsePlanJson } from '../parsers/plan.js'; import { evaluateMcpToolCallConsequences, @@ -153,7 +153,10 @@ export async function runHttpServer(options: HttpServerOptions = {}): Promise { + if (err) { + console.error('Failed to open browser:', err.message); + } + }); } function evaluate(request: EvaluateRequest) { @@ -222,7 +231,7 @@ function evaluate(request: EvaluateRequest) { : JSON.stringify(request.input); const plan = parsePlanJson(planJson); return evaluateTerraformPlanConsequences(plan, null, { - useClassifier: request.options?.classifier ?? false, + useClassifier: request.options?.classifier ?? true, adapterContext, }); } @@ -254,10 +263,23 @@ function evaluate(request: EvaluateRequest) { } } +// Maximum request body size (10MB) to prevent DoS +const MAX_BODY_SIZE = 10 * 1024 * 1024; + function readBody(req: IncomingMessage): Promise { return new Promise((resolve, reject) => { const chunks: Buffer[] = []; - req.on('data', chunk => chunks.push(chunk)); + let totalSize = 0; + + req.on('data', (chunk: Buffer) => { + totalSize += chunk.length; + if (totalSize > MAX_BODY_SIZE) { + req.destroy(); + reject(new Error(`Request body exceeds maximum size of ${MAX_BODY_SIZE} bytes`)); + return; + } + chunks.push(chunk); + }); req.on('end', () => resolve(Buffer.concat(chunks).toString('utf8'))); req.on('error', reject); }); diff --git a/src/iam/broker-server.ts b/src/iam/broker-server.ts index ceec32f..8b15be5 100644 --- a/src/iam/broker-server.ts +++ b/src/iam/broker-server.ts @@ -88,14 +88,27 @@ export async function startBrokerServer( }); } +// Maximum request body size (1MB) to prevent DoS +const MAX_BODY_SIZE = 1024 * 1024; + /** - * Read request body + * Read request body with size limit */ function readBody(req: http.IncomingMessage): Promise { return new Promise((resolve, reject) => { - let body = ''; - req.on('data', (chunk) => (body += chunk.toString())); - req.on('end', () => resolve(body)); + const chunks: Buffer[] = []; + let totalSize = 0; + + req.on('data', (chunk: Buffer) => { + totalSize += chunk.length; + if (totalSize > MAX_BODY_SIZE) { + req.destroy(); + reject(new Error(`Request body exceeds maximum size of ${MAX_BODY_SIZE} bytes`)); + return; + } + chunks.push(chunk); + }); + req.on('end', () => resolve(Buffer.concat(chunks).toString('utf8'))); req.on('error', reject); }); } diff --git a/src/iam/session-broker.ts b/src/iam/session-broker.ts index 1b7623d..6bb7b4d 100644 --- a/src/iam/session-broker.ts +++ b/src/iam/session-broker.ts @@ -29,6 +29,7 @@ import { } from '../evaluator/index.js'; import { toConsequenceJson } from '../output/consequence-json.js'; import { getAttestationService, type AttestationService } from '../attestation/service.js'; +import { parseRiskLevels } from '../mcp/gateway.js'; // Session request export interface SessionRequest { @@ -492,10 +493,9 @@ export function createBrokerFromEnv(): SessionBroker { return new SessionBroker({ brokerRoleArn: roleArn, - allowedRiskLevels: (process.env.RECOURSE_ALLOWED_LEVELS?.split(',') as any) ?? [ - 'allow', - 'warn', - ], + allowedRiskLevels: process.env.RECOURSE_ALLOWED_LEVELS + ? parseRiskLevels(process.env.RECOURSE_ALLOWED_LEVELS) + : ['allow', 'warn'], defaultDurationSeconds: parseInt(process.env.RECOURSE_SESSION_DURATION ?? '900'), maxDurationSeconds: parseInt(process.env.RECOURSE_MAX_SESSION_DURATION ?? '3600'), attestation: process.env.RECOURSE_ATTESTATION !== 'false', diff --git a/src/mcp/gateway.ts b/src/mcp/gateway.ts index 2d070ef..dad954b 100644 --- a/src/mcp/gateway.ts +++ b/src/mcp/gateway.ts @@ -20,6 +20,34 @@ import { import { toConsequenceJson } from '../output/consequence-json.js'; import { getAttestationService, type AttestationService } from '../attestation/service.js'; +// Valid risk level values +export type RiskLevel = 'allow' | 'warn' | 'escalate' | 'block'; +const VALID_RISK_LEVELS: RiskLevel[] = ['allow', 'warn', 'escalate', 'block']; + +/** + * Parse and validate risk levels from a comma-separated string. + * Invalid values are filtered out with a warning. + */ +export function parseRiskLevels(input: string): RiskLevel[] { + const levels = input.split(',').map(s => s.trim().toLowerCase()); + const valid: RiskLevel[] = []; + const invalid: string[] = []; + + for (const level of levels) { + if (VALID_RISK_LEVELS.includes(level as RiskLevel)) { + valid.push(level as RiskLevel); + } else if (level.length > 0) { + invalid.push(level); + } + } + + if (invalid.length > 0) { + console.warn(`[WARN] Invalid risk levels ignored: ${invalid.join(', ')}. Valid: ${VALID_RISK_LEVELS.join(', ')}`); + } + + return valid.length > 0 ? valid : ['allow', 'warn']; // Default if all invalid +} + // Gateway configuration export interface GatewayConfig { // Upstream MCP servers to proxy @@ -193,7 +221,9 @@ async function evaluateToolCall( attestation = attestationService.createAttestation(input, jsonReport); } - const allowed = config.allowedRiskLevels.includes(riskAssessment as any); + // Validate risk assessment is a known level before checking policy + const isValidLevel = VALID_RISK_LEVELS.includes(riskAssessment as RiskLevel); + const allowed = isValidLevel && config.allowedRiskLevels.includes(riskAssessment as RiskLevel); return { allowed, report: jsonReport, attestation }; } @@ -458,7 +488,7 @@ export function loadGatewayConfig(configPath?: string): GatewayConfig { } if (process.env.RECOURSE_ALLOWED_LEVELS) { - defaultConfig.allowedRiskLevels = process.env.RECOURSE_ALLOWED_LEVELS.split(',') as any; + defaultConfig.allowedRiskLevels = parseRiskLevels(process.env.RECOURSE_ALLOWED_LEVELS); } return defaultConfig; diff --git a/src/mcp/server.ts b/src/mcp/server.ts index d69d0b1..e32d80d 100644 --- a/src/mcp/server.ts +++ b/src/mcp/server.ts @@ -879,6 +879,9 @@ function error(id: JsonRpcRequest['id'], code: number, message: string): Record< }; } +// Maximum MCP frame size (10MB) to prevent DoS +const MAX_FRAME_SIZE = 10 * 1024 * 1024; + function readFrame(buffer: Buffer): { body: Buffer; remaining: Buffer } | null { const headerEnd = buffer.indexOf('\r\n\r\n'); if (headerEnd === -1) return null; @@ -890,6 +893,12 @@ function readFrame(buffer: Buffer): { body: Buffer; remaining: Buffer } | null { } const contentLength = Number(match[1]); + + // Validate frame size to prevent memory exhaustion + if (contentLength > MAX_FRAME_SIZE) { + throw new Error(`MCP frame size ${contentLength} exceeds maximum allowed size of ${MAX_FRAME_SIZE} bytes`); + } + const bodyStart = headerEnd + 4; const frameEnd = bodyStart + contentLength; if (buffer.length < frameEnd) return null; diff --git a/src/notifications/index.ts b/src/notifications/index.ts index 57ff878..0aaf084 100644 --- a/src/notifications/index.ts +++ b/src/notifications/index.ts @@ -1,20 +1,19 @@ /** * Notification system for RecourseOS escalations. * - * Supports: + * Currently supported: * - Slack webhooks (RECOURSE_SLACK_WEBHOOK) * - Discord webhooks (RECOURSE_DISCORD_WEBHOOK) - * - PagerDuty Events API (PAGERDUTY_ROUTING_KEY) - * - Opsgenie Alerts API (OPSGENIE_API_KEY) + * + * Planned (not yet implemented): + * - PagerDuty Events API + * - Opsgenie Alerts API * * Notifications are sent automatically when risk is 'escalate' or 'block'. */ export { sendSlackNotification, createSlackNotifier, formatSlackMessage } from './slack.js'; export { sendDiscordNotification, createDiscordNotifier, formatDiscordMessage } from './discord.js'; -// PagerDuty and Opsgenie temporarily disabled - type fixes needed -// export { ... } from './pagerduty.js'; -// export { ... } from './opsgenie.js'; export interface ConsequenceAlert { riskAssessment: 'allow' | 'warn' | 'escalate' | 'block'; @@ -68,9 +67,7 @@ export function createNotifier(): Notifier | null { export function hasNotifications(): boolean { return !!( process.env.RECOURSE_SLACK_WEBHOOK || - process.env.RECOURSE_DISCORD_WEBHOOK || - process.env.PAGERDUTY_ROUTING_KEY || - process.env.OPSGENIE_API_KEY + process.env.RECOURSE_DISCORD_WEBHOOK ); } diff --git a/src/policy/local.ts b/src/policy/local.ts index 8b1ee31..21f908d 100644 --- a/src/policy/local.ts +++ b/src/policy/local.ts @@ -4,13 +4,20 @@ import { type BlastRadiusReport, type RecoverabilityResult, } from '../resources/types.js'; -import type { ConsequenceDecision } from '../core/index.js'; +import type { ConsequenceDecision, FailureMode } from '../core/index.js'; export interface LocalPolicy { blockOn?: RecoverabilityTier; escalateOn?: RecoverabilityTier; warnOn?: RecoverabilityTier; requireReviewOnNeedsReview?: boolean; + /** + * How to handle evidence gathering failures (API errors, timeouts, etc.) + * - 'closed': Block action when evidence unavailable (safest, recommended for Pro) + * - 'review': Escalate to human review (default for OSS) + * - 'open': Allow action despite missing evidence (dangerous!) + */ + failureMode?: FailureMode; } export interface PolicyEvaluation { @@ -24,6 +31,7 @@ export const defaultLocalPolicy: Required = { escalateOn: RecoverabilityTier.NEEDS_REVIEW, warnOn: RecoverabilityTier.RECOVERABLE_FROM_BACKUP, requireReviewOnNeedsReview: true, + failureMode: 'review', // Default: escalate to human when evidence unavailable }; export function evaluateRecoverability( diff --git a/src/state/aws/client.ts b/src/state/aws/client.ts index ad7fa5a..9d870e2 100644 --- a/src/state/aws/client.ts +++ b/src/state/aws/client.ts @@ -28,13 +28,125 @@ export type AwsTransport = ( input: AwsRequestInput & { headers: Record; body: string } ) => Promise; +export interface RetryOptions { + /** Maximum number of retry attempts (default: 3) */ + maxRetries?: number; + /** Base delay in ms for exponential backoff (default: 100) */ + baseDelayMs?: number; + /** Maximum delay in ms (default: 5000) */ + maxDelayMs?: number; + /** Whether to retry on 5xx errors (default: true) */ + retryOn5xx?: boolean; + /** Whether to retry on network errors (default: true) */ + retryOnNetworkError?: boolean; +} + +const DEFAULT_RETRY_OPTIONS: Required = { + maxRetries: 3, + baseDelayMs: 100, + maxDelayMs: 5000, + retryOn5xx: true, + retryOnNetworkError: true, +}; + +/** + * Sleep for a given number of milliseconds. + */ +function sleep(ms: number): Promise { + return new Promise(resolve => setTimeout(resolve, ms)); +} + +/** + * Calculate delay with exponential backoff and jitter. + */ +function calculateBackoff(attempt: number, baseDelayMs: number, maxDelayMs: number): number { + const exponentialDelay = baseDelayMs * Math.pow(2, attempt); + const jitter = Math.random() * 0.3 * exponentialDelay; // 0-30% jitter + return Math.min(exponentialDelay + jitter, maxDelayMs); +} + +/** + * Check if a response should be retried. + */ +function shouldRetry( + statusCode: number, + options: Required +): boolean { + // Network error (statusCode 0) + if (statusCode === 0 && options.retryOnNetworkError) { + return true; + } + // 5xx server errors + if (statusCode >= 500 && options.retryOn5xx) { + return true; + } + // 429 Too Many Requests + if (statusCode === 429) { + return true; + } + return false; +} + export class AwsSignedClient { + private readonly retryOptions: Required; + constructor( private readonly credentials: AwsCredentials, - private readonly transport: AwsTransport = defaultHttpsTransport - ) {} + private readonly transport: AwsTransport = defaultHttpsTransport, + retryOptions: RetryOptions = {} + ) { + this.retryOptions = { ...DEFAULT_RETRY_OPTIONS, ...retryOptions }; + } async request(input: AwsRequestInput): Promise { + let lastError: Error | undefined; + let lastResponse: AwsHttpResponse | undefined; + + for (let attempt = 0; attempt <= this.retryOptions.maxRetries; attempt++) { + try { + const response = await this.requestOnce(input); + lastResponse = response; + + // Check if we should retry + if (attempt < this.retryOptions.maxRetries && shouldRetry(response.statusCode, this.retryOptions)) { + const delay = calculateBackoff(attempt, this.retryOptions.baseDelayMs, this.retryOptions.maxDelayMs); + await sleep(delay); + continue; + } + + return response; + } catch (error) { + lastError = error instanceof Error ? error : new Error(String(error)); + + // Network error - retry if configured + if (attempt < this.retryOptions.maxRetries && this.retryOptions.retryOnNetworkError) { + const delay = calculateBackoff(attempt, this.retryOptions.baseDelayMs, this.retryOptions.maxDelayMs); + await sleep(delay); + continue; + } + + // Return a response with statusCode 0 to indicate network failure + return { + statusCode: 0, + body: lastError.message, + headers: {}, + }; + } + } + + // All retries exhausted + if (lastResponse) { + return lastResponse; + } + + return { + statusCode: 0, + body: lastError?.message || 'Request failed after retries', + headers: {}, + }; + } + + private async requestOnce(input: AwsRequestInput): Promise { const body = input.body ?? ''; const now = new Date(); const amzDate = toAmzDate(now); diff --git a/tests/failure-mode.test.ts b/tests/failure-mode.test.ts new file mode 100644 index 0000000..35223ef --- /dev/null +++ b/tests/failure-mode.test.ts @@ -0,0 +1,138 @@ +import { describe, it, expect } from 'vitest'; +import { + checkEvidenceFailures, + applyFailureMode, + DEFAULT_FAILURE_MODE, + PRO_DEFAULT_FAILURE_MODE, +} from '../src/core/failure-mode.js'; + +describe('failure-mode', () => { + describe('checkEvidenceFailures', () => { + it('returns no failures for mutations without missing evidence', () => { + const mutations = [ + { missingEvidence: [], intent: { target: { id: 'bucket-1' } } }, + { missingEvidence: [], intent: { target: { id: 'bucket-2' } } }, + ]; + + const result = checkEvidenceFailures(mutations); + + expect(result.hasFailures).toBe(false); + expect(result.failedResources).toHaveLength(0); + expect(result.failureReasons).toHaveLength(0); + }); + + it('detects failures when mutations have missing evidence', () => { + const mutations = [ + { + missingEvidence: [ + { key: 's3.versioning', description: 'Unable to verify s3.versioning' }, + ], + intent: { target: { id: 'my-bucket' } }, + }, + ]; + + const result = checkEvidenceFailures(mutations); + + expect(result.hasFailures).toBe(true); + expect(result.failedResources).toContain('my-bucket'); + expect(result.failureReasons).toContain('Unable to verify s3.versioning'); + }); + + it('deduplicates resources and reasons', () => { + const mutations = [ + { + missingEvidence: [ + { key: 's3.versioning', description: 'API error' }, + { key: 's3.replication', description: 'API error' }, + ], + intent: { target: { id: 'bucket-1' } }, + }, + { + missingEvidence: [{ key: 's3.lifecycle', description: 'API error' }], + intent: { target: { id: 'bucket-1' } }, + }, + ]; + + const result = checkEvidenceFailures(mutations); + + expect(result.failedResources).toEqual(['bucket-1']); + expect(result.failureReasons).toEqual(['API error']); + }); + }); + + describe('applyFailureMode', () => { + const failureCheck = { + hasFailures: true, + failedResources: ['my-bucket'], + failureReasons: ['Network timeout'], + }; + + const noFailures = { + hasFailures: false, + failedResources: [], + failureReasons: [], + }; + + it('returns original decision when no failures', () => { + const result = applyFailureMode('allow', 'All good', noFailures, 'closed'); + + expect(result.decision).toBe('allow'); + expect(result.reason).toBe('All good'); + }); + + describe('fail-closed mode', () => { + it('blocks when evidence is unavailable', () => { + const result = applyFailureMode('allow', 'Original', failureCheck, 'closed'); + + expect(result.decision).toBe('block'); + expect(result.reason).toContain('FAIL-CLOSED'); + expect(result.reason).toContain('my-bucket'); + }); + + it('blocks even if original decision was escalate', () => { + const result = applyFailureMode('escalate', 'Original', failureCheck, 'closed'); + + expect(result.decision).toBe('block'); + }); + }); + + describe('fail-review mode', () => { + it('escalates allow to review when evidence unavailable', () => { + const result = applyFailureMode('allow', 'Original', failureCheck, 'review'); + + expect(result.decision).toBe('escalate'); + expect(result.reason).toContain('FAIL-REVIEW'); + }); + + it('escalates warn to review', () => { + const result = applyFailureMode('warn', 'Original', failureCheck, 'review'); + + expect(result.decision).toBe('escalate'); + }); + + it('does not downgrade existing escalate/block', () => { + expect(applyFailureMode('escalate', 'Original', failureCheck, 'review').decision).toBe('escalate'); + expect(applyFailureMode('block', 'Original', failureCheck, 'review').decision).toBe('block'); + }); + }); + + describe('fail-open mode', () => { + it('keeps original decision but adds warning', () => { + const result = applyFailureMode('allow', 'Original', failureCheck, 'open'); + + expect(result.decision).toBe('allow'); + expect(result.reason).toContain('FAIL-OPEN WARNING'); + }); + }); + }); + + describe('default failure modes', () => { + it('OSS default is review', () => { + expect(DEFAULT_FAILURE_MODE).toBe('review'); + }); + + it('Pro default is closed', () => { + expect(PRO_DEFAULT_FAILURE_MODE).toBe('closed'); + }); + }); +}); diff --git a/tests/timing.test.ts b/tests/timing.test.ts new file mode 100644 index 0000000..1480a80 --- /dev/null +++ b/tests/timing.test.ts @@ -0,0 +1,96 @@ +import { describe, it, expect } from 'vitest'; +import { + EvaluationTimer, + SLA_TARGETS, + formatTiming, +} from '../src/core/timing.js'; + +describe('timing', () => { + describe('SLA_TARGETS', () => { + it('defines targets for all evaluation types', () => { + expect(SLA_TARGETS.localEvaluation).toBe(10); + expect(SLA_TARGETS.remoteEvaluation).toBe(500); + expect(SLA_TARGETS.planEvaluation).toBe(2000); + expect(SLA_TARGETS.shellEvaluation).toBe(5); + expect(SLA_TARGETS.mcpEvaluation).toBe(10); + }); + }); + + describe('EvaluationTimer', () => { + it('tracks total time', () => { + const timer = new EvaluationTimer('localEvaluation'); + const timing = timer.finish(); + + expect(timing.totalMs).toBeGreaterThanOrEqual(0); + expect(timing.slaTarget).toBe('localEvaluation'); + expect(timing.slaTargetMs).toBe(10); + }); + + it('tracks phases', () => { + const timer = new EvaluationTimer(); + timer.startPhase('parse'); + timer.endPhase('parse'); + timer.startPhase('analysis'); + timer.endPhase('analysis'); + + const timing = timer.finish(); + + expect(timing.parseMs).toBeDefined(); + expect(timing.analysisMs).toBeDefined(); + expect(timing.parseMs).toBeGreaterThanOrEqual(0); + expect(timing.analysisMs).toBeGreaterThanOrEqual(0); + }); + + it('reports SLA compliance', () => { + const timer = new EvaluationTimer('localEvaluation'); + const timing = timer.finish(); + + // Local evaluation should be fast enough to meet SLA + expect(timing.metSla).toBe(true); + }); + + it('allows changing SLA target', () => { + const timer = new EvaluationTimer('localEvaluation'); + timer.setSlaTarget('planEvaluation'); + const timing = timer.finish(); + + expect(timing.slaTarget).toBe('planEvaluation'); + expect(timing.slaTargetMs).toBe(2000); + }); + }); + + describe('formatTiming', () => { + it('formats timing with checkmark when SLA met', () => { + const timing = { + totalMs: 5.5, + parseMs: 1.2, + analysisMs: 4.3, + metSla: true, + slaTarget: 'localEvaluation' as const, + slaTargetMs: 10, + }; + + const formatted = formatTiming(timing); + + expect(formatted).toContain('✓'); + expect(formatted).toContain('5.5ms'); + expect(formatted).toContain('parse=1.2ms'); + expect(formatted).toContain('analysis=4.3ms'); + expect(formatted).toContain('target: 10ms'); + }); + + it('formats timing with warning when SLA missed', () => { + const timing = { + totalMs: 150.0, + metSla: false, + slaTarget: 'localEvaluation' as const, + slaTargetMs: 10, + }; + + const formatted = formatTiming(timing); + + expect(formatted).toContain('⚠'); + expect(formatted).toContain('150.0ms'); + }); + }); +}); diff --git a/vitest.config.ts b/vitest.config.ts index a37182f..6926412 100644 --- a/vitest.config.ts +++ b/vitest.config.ts @@ -3,5 +3,25 @@ import { configDefaults, defineConfig } from 'vitest/config'; export default defineConfig({ test: { exclude: [...configDefaults.exclude, '**/*visual.spec.ts'], + coverage: { + provider: 'v8', + reporter: ['text', 'json', 'html'], + include: ['src/**/*.ts'], + exclude: [ + 'src/**/*.d.ts', + 'src/**/types.ts', + 'src/tools/**', + 'src/index.ts', + ], + thresholds: { + // Minimum coverage thresholds (fail CI if below these) + // Current baseline (May 2026): ~54% lines, ~50% branches + // These prevent regression; increase targets as coverage improves + lines: 50, + functions: 50, + branches: 45, + statements: 50, + }, + }, }, });