From 06d19f74bb73b46f666e6b9b1aa59a5240c4e401 Mon Sep 17 00:00:00 2001 From: Jessie Hermosillo Date: Mon, 11 May 2026 12:10:31 -0400 Subject: [PATCH 1/3] feat: Add fail-closed mode for API errors (#52) Implements configurable failure mode for RecourseOS evaluations: - 'closed': Block action when evidence unavailable (safest) - 'review': Escalate to human review (default for OSS) - 'open': Allow despite missing evidence (dangerous) Changes: - Add FailureMode type and utilities in src/core/failure-mode.ts - Add failureMode option to LocalPolicy - Apply failure mode logic in terraform evaluator - Add retry with exponential backoff to AWS client (3 retries, jitter) - Add comprehensive tests for failure mode behavior Pro/Enterprise deployments should use 'closed' mode by default. OSS/self-hosted uses 'review' mode to escalate to human. Co-Authored-By: Claude Opus 4.5 --- docs/multi-agent-scaling-roadmap.md | 315 ++++++++++++++++++++++++++++ src/core/failure-mode.ts | 125 +++++++++++ src/core/index.ts | 13 ++ src/evaluator/terraform.ts | 37 +++- src/policy/local.ts | 10 +- src/state/aws/client.ts | 116 +++++++++- tests/failure-mode.test.ts | 138 ++++++++++++ 7 files changed, 747 insertions(+), 7 deletions(-) create mode 100644 docs/multi-agent-scaling-roadmap.md create mode 100644 src/core/failure-mode.ts create mode 100644 tests/failure-mode.test.ts diff --git a/docs/multi-agent-scaling-roadmap.md b/docs/multi-agent-scaling-roadmap.md new file mode 100644 index 0000000..3ac49ac --- /dev/null +++ b/docs/multi-agent-scaling-roadmap.md @@ -0,0 +1,315 @@ +# Multi-Agent Scaling Roadmap + +**Feature: RecourseOS Pro — Multi-Agent Support (10+ concurrent agents)** + +This document outlines the technical roadmap for scaling RecourseOS to support 10+ concurrent AI agents. This is a premium/enterprise feature. + +--- + +## Current State + +| Metric | Current Capacity | Target (Pro) | +|--------|------------------|--------------| +| Concurrent evaluations | 3/sec | 50+/sec | +| Max agents | 1-3 | 100+ | +| State lookup latency | 200-500ms | <50ms | +| Request latency under load | >5s | <500ms | + +**Bottlenecks identified:** +- Synchronous blast-radius evaluation blocks event loop +- No connection pooling for AWS/GCP/Azure API calls +- No state caching — every evaluation re-fetches +- In-memory attestations don't scale across processes +- No request queuing or backpressure + +--- + +## Phase 1: Core Parallelization (Week 1-2) + +### 1.1 Async Evaluation Pipeline +**Priority: Critical** + +Move blast-radius analysis off the main thread. + +``` +src/evaluator/terraform.ts +src/evaluator/shell.ts +src/evaluator/mcp.ts +``` + +**Changes:** +- [ ] Wrap `evaluateBlastRadius()` in Worker thread pool +- [ ] Use `piscina` or `workerpool` for thread management +- [ ] Max workers = CPU cores - 1 +- [ ] Fallback to async queue if workers exhausted + +**Impact:** 3 evals/sec → 15-20 evals/sec per process + +### 1.2 Request Queue with Backpressure +**Priority: Critical** + +``` +src/mcp/server.ts +src/http/server.ts +``` + +**Changes:** +- [ ] Add `p-queue` with concurrency limit +- [ ] Implement circuit breaker for downstream failures +- [ ] Add queue depth metrics +- [ ] Return 429 when queue exceeds threshold + +**Config:** +```typescript +{ + maxConcurrency: 10, + maxQueueSize: 100, + queueTimeout: 30000 +} +``` + +--- + +## Phase 2: Connection & Caching (Week 2-3) + +### 2.1 AWS SDK v3 Migration +**Priority: High** + +Replace custom `AwsSignedClient` with official SDK. + +``` +src/state/aws/*.ts +``` + +**Changes:** +- [ ] Migrate to `@aws-sdk/client-*` packages +- [ ] Enable HTTP/2 multiplexing +- [ ] Configure keep-alive connection pooling +- [ ] Add retry with exponential backoff + +**Impact:** 50 connections → 5-10 reused connections + +### 2.2 State Cache Layer +**Priority: High** + +Add Redis-backed cache for state lookups. + +``` +NEW: src/cache/state-cache.ts +NEW: src/cache/redis-client.ts +``` + +**Cache strategy:** +| Resource Type | TTL | Cache Key | +|--------------|-----|-----------| +| S3 bucket | 60s | `s3:{region}:{bucket}` | +| RDS instance | 30s | `rds:{region}:{instanceId}` | +| EC2 instance | 30s | `ec2:{region}:{instanceId}` | +| IAM role | 300s | `iam:{accountId}:{roleName}` | + +**Changes:** +- [ ] Add `ioredis` dependency +- [ ] Implement cache-aside pattern +- [ ] Add cache hit/miss metrics +- [ ] Support cache invalidation via MCP tool + +**Impact:** ~70% cache hit rate → 2-3x throughput + +### 2.3 GCP/Azure Parity +**Priority: Medium** + +Apply same optimizations to other cloud providers. + +``` +src/state/gcp/*.ts +src/state/azure/*.ts +``` + +--- + +## Phase 3: Distributed Architecture (Week 3-4) + +### 3.1 Redis-Backed Attestations +**Priority: High** + +Replace in-memory + file attestation storage. + +``` +src/attestation/service.ts +NEW: src/attestation/redis-store.ts +``` + +**Changes:** +- [ ] Implement `AttestationStore` interface +- [ ] Add Redis implementation with TTL +- [ ] Support cross-process attestation lookups +- [ ] Add attestation replication for HA + +**Schema:** +``` +attestation:{id} → JSON(Attestation) +attestation:by-resource:{resourceId} → SET(attestationIds) +``` + +### 3.2 Horizontal Scaling +**Priority: Medium** + +Support multi-process deployment. + +``` +NEW: src/cluster.ts +``` + +**Changes:** +- [ ] Add Node.js cluster mode support +- [ ] Sticky sessions for MCP connections +- [ ] Shared Redis for cross-process state +- [ ] PM2 ecosystem config + +**Deployment options:** +- Single node: 4-8 workers via cluster +- Multi-node: Load balancer + Redis + +### 3.3 Connection Multiplexing +**Priority: Medium** + +Single MCP connection serving multiple agents. + +``` +src/mcp/server.ts +NEW: src/mcp/multiplexer.ts +``` + +**Changes:** +- [ ] Add agent ID to request context +- [ ] Route responses to correct agent +- [ ] Per-agent rate limiting +- [ ] Agent isolation (no cross-contamination) + +--- + +## Phase 4: Observability & Hardening (Week 4-5) + +### 4.1 Metrics & Monitoring +**Priority: High** + +``` +NEW: src/metrics/prometheus.ts +``` + +**Metrics to track:** +- `recourse_evaluation_duration_seconds` (histogram) +- `recourse_queue_depth` (gauge) +- `recourse_cache_hit_ratio` (gauge) +- `recourse_active_agents` (gauge) +- `recourse_state_lookup_duration_seconds` (histogram) + +### 4.2 Rate Limiting +**Priority: High** + +Per-agent and global rate limits. + +``` +NEW: src/ratelimit/index.ts +``` + +**Tiers:** +| Plan | Evals/min | Agents | State lookups/min | +|------|-----------|--------|-------------------| +| Free | 10 | 1 | 100 | +| Pro | 500 | 25 | 5000 | +| Enterprise | Unlimited | 100+ | Unlimited | + +### 4.3 Graceful Degradation +**Priority: Medium** + +- [ ] Timeout slow evaluations (30s max) +- [ ] Fallback to cached state on API failures +- [ ] Circuit breaker for cloud provider APIs +- [ ] Health check endpoints + +--- + +## Phase 5: Enterprise Features (Week 5-6) + +### 5.1 Multi-Tenancy +**Priority: Enterprise** + +``` +NEW: src/tenant/index.ts +``` + +- [ ] Tenant isolation +- [ ] Per-tenant Redis namespaces +- [ ] Tenant-specific rate limits +- [ ] Usage metering per tenant + +### 5.2 Streaming Evaluations +**Priority: Enterprise** + +For large Terraform plans (1000+ resources). + +- [ ] Stream changes instead of buffering +- [ ] Incremental blast-radius updates +- [ ] Progress callbacks to agents + +### 5.3 Audit Logging +**Priority: Enterprise** + +- [ ] Structured audit log for all evaluations +- [ ] S3/CloudWatch export +- [ ] Compliance reporting + +--- + +## Dependencies + +```json +{ + "piscina": "^4.0.0", + "p-queue": "^7.0.0", + "ioredis": "^5.3.0", + "@aws-sdk/client-s3": "^3.500.0", + "@aws-sdk/client-rds": "^3.500.0", + "@aws-sdk/client-ec2": "^3.500.0", + "prom-client": "^15.0.0" +} +``` + +--- + +## Milestones + +| Milestone | Target | Deliverable | +|-----------|--------|-------------| +| M1: Parallel Eval | Week 2 | 15+ evals/sec single process | +| M2: Cached State | Week 3 | <50ms state lookups | +| M3: Multi-Process | Week 4 | 50+ evals/sec cluster | +| M4: Pro Beta | Week 5 | 25 agent support | +| M5: Enterprise GA | Week 6 | 100+ agent support | + +--- + +## Pricing Considerations + +| Feature | Free | Pro | Enterprise | +|---------|------|-----|------------| +| Concurrent agents | 1 | 25 | 100+ | +| Evaluations/month | 1,000 | 50,000 | Unlimited | +| State caching | No | Yes | Yes | +| Redis (managed) | No | Included | Dedicated | +| SLA | None | 99.5% | 99.9% | +| Support | Community | Email | Dedicated | + +--- + +## Open Questions + +1. **Redis hosting**: Managed (Upstash/Redis Cloud) vs self-hosted? +2. **Pricing model**: Per-agent vs per-evaluation vs flat tier? +3. **Agent authentication**: API keys per agent or per org? +4. **Data residency**: Regional Redis deployments needed? + +--- + +*Last updated: 2026-05-10* diff --git a/src/core/failure-mode.ts b/src/core/failure-mode.ts new file mode 100644 index 0000000..c9d7e38 --- /dev/null +++ b/src/core/failure-mode.ts @@ -0,0 +1,125 @@ +/** + * Failure mode configuration for RecourseOS evaluations. + * + * Determines how RecourseOS behaves when state lookups fail + * (network errors, API timeouts, permission denied, etc.) + */ + +/** + * Failure mode options: + * - 'closed': Block the action when evidence cannot be gathered (safest) + * - 'review': Escalate to human review when evidence unavailable (default) + * - 'open': Allow the action despite missing evidence (dangerous!) + */ +export type FailureMode = 'closed' | 'review' | 'open'; + +/** + * Result of checking for evidence failures in a consequence report. + */ +export interface EvidenceFailureCheck { + /** Whether any evidence gathering failed */ + hasFailures: boolean; + /** List of resources with missing evidence */ + failedResources: string[]; + /** Reasons for failures */ + failureReasons: string[]; +} + +/** + * Default failure modes by deployment context. + * - OSS/self-hosted: 'review' (escalate to human) + * - Pro/managed: 'closed' (fail-safe) + * - Explicit override: user's choice + */ +export const DEFAULT_FAILURE_MODE: FailureMode = 'review'; +export const PRO_DEFAULT_FAILURE_MODE: FailureMode = 'closed'; + +/** + * Check if a consequence report has evidence failures that should + * trigger failure mode handling. + */ +export function checkEvidenceFailures( + mutations: Array<{ + missingEvidence?: Array<{ key: string; description?: string }>; + intent?: { target?: { id?: string } }; + }> +): EvidenceFailureCheck { + const failedResources: string[] = []; + const failureReasons: string[] = []; + + for (const mutation of mutations) { + if (mutation.missingEvidence && mutation.missingEvidence.length > 0) { + const resourceId = mutation.intent?.target?.id || 'unknown'; + failedResources.push(resourceId); + + for (const missing of mutation.missingEvidence) { + failureReasons.push( + missing.description || `Missing evidence: ${missing.key}` + ); + } + } + } + + return { + hasFailures: failedResources.length > 0, + failedResources: Array.from(new Set(failedResources)), + failureReasons: Array.from(new Set(failureReasons)), + }; +} + +/** + * Apply failure mode to a consequence decision. + * + * @param currentDecision - The decision from normal policy evaluation + * @param failureCheck - Result of evidence failure check + * @param failureMode - The configured failure mode + * @returns The potentially modified decision and reason + */ +export function applyFailureMode( + currentDecision: 'allow' | 'warn' | 'escalate' | 'block', + currentReason: string, + failureCheck: EvidenceFailureCheck, + failureMode: FailureMode +): { decision: 'allow' | 'warn' | 'escalate' | 'block'; reason: string } { + // No failures, return original decision + if (!failureCheck.hasFailures) { + return { decision: currentDecision, reason: currentReason }; + } + + const failureContext = `Evidence unavailable for: ${failureCheck.failedResources.join(', ')}`; + + switch (failureMode) { + case 'closed': + // Fail-closed: always block when evidence is missing + return { + decision: 'block', + reason: `[FAIL-CLOSED] ${failureContext}. Action blocked due to inability to verify safety.`, + }; + + case 'review': + // Fail-review: escalate to human (current default behavior) + // Only upgrade if current decision is less severe than escalate + if (currentDecision === 'allow' || currentDecision === 'warn') { + return { + decision: 'escalate', + reason: `[FAIL-REVIEW] ${failureContext}. Human review required.`, + }; + } + return { decision: currentDecision, reason: currentReason }; + + case 'open': + // Fail-open: allow despite missing evidence (dangerous!) + // Log a warning but don't change the decision + return { + decision: currentDecision, + reason: `[FAIL-OPEN WARNING] ${failureContext}. Proceeding without complete evidence.`, + }; + + default: + // Unknown mode, fail safe + return { + decision: 'escalate', + reason: `Unknown failure mode. ${failureContext}`, + }; + } +} diff --git a/src/core/index.ts b/src/core/index.ts index 418b219..5802e00 100644 --- a/src/core/index.ts +++ b/src/core/index.ts @@ -72,3 +72,16 @@ export { getRegisteredResourceTypes, hasEvidenceRequirements, } from './evidence-requirements.js'; + +// Failure mode handling +export type { + FailureMode, + EvidenceFailureCheck, +} from './failure-mode.js'; + +export { + DEFAULT_FAILURE_MODE, + PRO_DEFAULT_FAILURE_MODE, + checkEvidenceFailures, + applyFailureMode, +} from './failure-mode.js'; diff --git a/src/evaluator/terraform.ts b/src/evaluator/terraform.ts index aca0712..c88eb3d 100644 --- a/src/evaluator/terraform.ts +++ b/src/evaluator/terraform.ts @@ -16,6 +16,12 @@ import type { VerificationStatusInfo, RequiredEvidence, EvidenceItem, + FailureMode, +} from '../core/index.js'; +import { + checkEvidenceFailures, + applyFailureMode, + DEFAULT_FAILURE_MODE, } from '../core/index.js'; import { TraceBuilder, @@ -274,6 +280,31 @@ export function evaluateTerraformPlanConsequences( ) : undefined; + // Check for evidence failures and apply failure mode + const failureCheck = checkEvidenceFailures(mutations); + const effectiveFailureMode = options.policy?.failureMode ?? DEFAULT_FAILURE_MODE; + + let finalDecision = finalPolicyEvaluation.decision; + let finalReason = hasUnrecoverableFromCrossAction + ? `${finalPolicyEvaluation.reason} (cross-action risk detected)` + : finalPolicyEvaluation.reason; + + // Apply failure mode if there are evidence failures + if (failureCheck.hasFailures) { + const failureModeResult = applyFailureMode( + finalDecision, + finalReason, + failureCheck, + effectiveFailureMode + ); + finalDecision = failureModeResult.decision; + finalReason = failureModeResult.reason; + + trace.step('failure_mode_check', `Evidence failures detected, applying ${effectiveFailureMode} mode`, { + decision: `failed_resources=${failureCheck.failedResources.length}, mode=${effectiveFailureMode}, result=${finalDecision}`, + }); + } + const report: ConsequenceReport = { mutations, summary: { @@ -283,10 +314,8 @@ export function evaluateTerraformPlanConsequences( hasUnrecoverable, dependencyImpactCount: blastRadiusReport.summary.cascadeImpactCount, }, - riskAssessment: finalPolicyEvaluation.decision, - assessmentReason: hasUnrecoverableFromCrossAction - ? `${finalPolicyEvaluation.reason} (cross-action risk detected)` - : finalPolicyEvaluation.reason, + riskAssessment: finalDecision, + assessmentReason: finalReason, // Always include cross-action risks (empty array if none detected) crossActionRisks, // Attestation richness fields diff --git a/src/policy/local.ts b/src/policy/local.ts index 8b1ee31..21f908d 100644 --- a/src/policy/local.ts +++ b/src/policy/local.ts @@ -4,13 +4,20 @@ import { type BlastRadiusReport, type RecoverabilityResult, } from '../resources/types.js'; -import type { ConsequenceDecision } from '../core/index.js'; +import type { ConsequenceDecision, FailureMode } from '../core/index.js'; export interface LocalPolicy { blockOn?: RecoverabilityTier; escalateOn?: RecoverabilityTier; warnOn?: RecoverabilityTier; requireReviewOnNeedsReview?: boolean; + /** + * How to handle evidence gathering failures (API errors, timeouts, etc.) + * - 'closed': Block action when evidence unavailable (safest, recommended for Pro) + * - 'review': Escalate to human review (default for OSS) + * - 'open': Allow action despite missing evidence (dangerous!) + */ + failureMode?: FailureMode; } export interface PolicyEvaluation { @@ -24,6 +31,7 @@ export const defaultLocalPolicy: Required = { escalateOn: RecoverabilityTier.NEEDS_REVIEW, warnOn: RecoverabilityTier.RECOVERABLE_FROM_BACKUP, requireReviewOnNeedsReview: true, + failureMode: 'review', // Default: escalate to human when evidence unavailable }; export function evaluateRecoverability( diff --git a/src/state/aws/client.ts b/src/state/aws/client.ts index ad7fa5a..9d870e2 100644 --- a/src/state/aws/client.ts +++ b/src/state/aws/client.ts @@ -28,13 +28,125 @@ export type AwsTransport = ( input: AwsRequestInput & { headers: Record; body: string } ) => Promise; +export interface RetryOptions { + /** Maximum number of retry attempts (default: 3) */ + maxRetries?: number; + /** Base delay in ms for exponential backoff (default: 100) */ + baseDelayMs?: number; + /** Maximum delay in ms (default: 5000) */ + maxDelayMs?: number; + /** Whether to retry on 5xx errors (default: true) */ + retryOn5xx?: boolean; + /** Whether to retry on network errors (default: true) */ + retryOnNetworkError?: boolean; +} + +const DEFAULT_RETRY_OPTIONS: Required = { + maxRetries: 3, + baseDelayMs: 100, + maxDelayMs: 5000, + retryOn5xx: true, + retryOnNetworkError: true, +}; + +/** + * Sleep for a given number of milliseconds. + */ +function sleep(ms: number): Promise { + return new Promise(resolve => setTimeout(resolve, ms)); +} + +/** + * Calculate delay with exponential backoff and jitter. + */ +function calculateBackoff(attempt: number, baseDelayMs: number, maxDelayMs: number): number { + const exponentialDelay = baseDelayMs * Math.pow(2, attempt); + const jitter = Math.random() * 0.3 * exponentialDelay; // 0-30% jitter + return Math.min(exponentialDelay + jitter, maxDelayMs); +} + +/** + * Check if a response should be retried. + */ +function shouldRetry( + statusCode: number, + options: Required +): boolean { + // Network error (statusCode 0) + if (statusCode === 0 && options.retryOnNetworkError) { + return true; + } + // 5xx server errors + if (statusCode >= 500 && options.retryOn5xx) { + return true; + } + // 429 Too Many Requests + if (statusCode === 429) { + return true; + } + return false; +} + export class AwsSignedClient { + private readonly retryOptions: Required; + constructor( private readonly credentials: AwsCredentials, - private readonly transport: AwsTransport = defaultHttpsTransport - ) {} + private readonly transport: AwsTransport = defaultHttpsTransport, + retryOptions: RetryOptions = {} + ) { + this.retryOptions = { ...DEFAULT_RETRY_OPTIONS, ...retryOptions }; + } async request(input: AwsRequestInput): Promise { + let lastError: Error | undefined; + let lastResponse: AwsHttpResponse | undefined; + + for (let attempt = 0; attempt <= this.retryOptions.maxRetries; attempt++) { + try { + const response = await this.requestOnce(input); + lastResponse = response; + + // Check if we should retry + if (attempt < this.retryOptions.maxRetries && shouldRetry(response.statusCode, this.retryOptions)) { + const delay = calculateBackoff(attempt, this.retryOptions.baseDelayMs, this.retryOptions.maxDelayMs); + await sleep(delay); + continue; + } + + return response; + } catch (error) { + lastError = error instanceof Error ? error : new Error(String(error)); + + // Network error - retry if configured + if (attempt < this.retryOptions.maxRetries && this.retryOptions.retryOnNetworkError) { + const delay = calculateBackoff(attempt, this.retryOptions.baseDelayMs, this.retryOptions.maxDelayMs); + await sleep(delay); + continue; + } + + // Return a response with statusCode 0 to indicate network failure + return { + statusCode: 0, + body: lastError.message, + headers: {}, + }; + } + } + + // All retries exhausted + if (lastResponse) { + return lastResponse; + } + + return { + statusCode: 0, + body: lastError?.message || 'Request failed after retries', + headers: {}, + }; + } + + private async requestOnce(input: AwsRequestInput): Promise { const body = input.body ?? ''; const now = new Date(); const amzDate = toAmzDate(now); diff --git a/tests/failure-mode.test.ts b/tests/failure-mode.test.ts new file mode 100644 index 0000000..35223ef --- /dev/null +++ b/tests/failure-mode.test.ts @@ -0,0 +1,138 @@ +import { describe, it, expect } from 'vitest'; +import { + checkEvidenceFailures, + applyFailureMode, + DEFAULT_FAILURE_MODE, + PRO_DEFAULT_FAILURE_MODE, +} from '../src/core/failure-mode.js'; + +describe('failure-mode', () => { + describe('checkEvidenceFailures', () => { + it('returns no failures for mutations without missing evidence', () => { + const mutations = [ + { missingEvidence: [], intent: { target: { id: 'bucket-1' } } }, + { missingEvidence: [], intent: { target: { id: 'bucket-2' } } }, + ]; + + const result = checkEvidenceFailures(mutations); + + expect(result.hasFailures).toBe(false); + expect(result.failedResources).toHaveLength(0); + expect(result.failureReasons).toHaveLength(0); + }); + + it('detects failures when mutations have missing evidence', () => { + const mutations = [ + { + missingEvidence: [ + { key: 's3.versioning', description: 'Unable to verify s3.versioning' }, + ], + intent: { target: { id: 'my-bucket' } }, + }, + ]; + + const result = checkEvidenceFailures(mutations); + + expect(result.hasFailures).toBe(true); + expect(result.failedResources).toContain('my-bucket'); + expect(result.failureReasons).toContain('Unable to verify s3.versioning'); + }); + + it('deduplicates resources and reasons', () => { + const mutations = [ + { + missingEvidence: [ + { key: 's3.versioning', description: 'API error' }, + { key: 's3.replication', description: 'API error' }, + ], + intent: { target: { id: 'bucket-1' } }, + }, + { + missingEvidence: [{ key: 's3.lifecycle', description: 'API error' }], + intent: { target: { id: 'bucket-1' } }, + }, + ]; + + const result = checkEvidenceFailures(mutations); + + expect(result.failedResources).toEqual(['bucket-1']); + expect(result.failureReasons).toEqual(['API error']); + }); + }); + + describe('applyFailureMode', () => { + const failureCheck = { + hasFailures: true, + failedResources: ['my-bucket'], + failureReasons: ['Network timeout'], + }; + + const noFailures = { + hasFailures: false, + failedResources: [], + failureReasons: [], + }; + + it('returns original decision when no failures', () => { + const result = applyFailureMode('allow', 'All good', noFailures, 'closed'); + + expect(result.decision).toBe('allow'); + expect(result.reason).toBe('All good'); + }); + + describe('fail-closed mode', () => { + it('blocks when evidence is unavailable', () => { + const result = applyFailureMode('allow', 'Original', failureCheck, 'closed'); + + expect(result.decision).toBe('block'); + expect(result.reason).toContain('FAIL-CLOSED'); + expect(result.reason).toContain('my-bucket'); + }); + + it('blocks even if original decision was escalate', () => { + const result = applyFailureMode('escalate', 'Original', failureCheck, 'closed'); + + expect(result.decision).toBe('block'); + }); + }); + + describe('fail-review mode', () => { + it('escalates allow to review when evidence unavailable', () => { + const result = applyFailureMode('allow', 'Original', failureCheck, 'review'); + + expect(result.decision).toBe('escalate'); + expect(result.reason).toContain('FAIL-REVIEW'); + }); + + it('escalates warn to review', () => { + const result = applyFailureMode('warn', 'Original', failureCheck, 'review'); + + expect(result.decision).toBe('escalate'); + }); + + it('does not downgrade existing escalate/block', () => { + expect(applyFailureMode('escalate', 'Original', failureCheck, 'review').decision).toBe('escalate'); + expect(applyFailureMode('block', 'Original', failureCheck, 'review').decision).toBe('block'); + }); + }); + + describe('fail-open mode', () => { + it('keeps original decision but adds warning', () => { + const result = applyFailureMode('allow', 'Original', failureCheck, 'open'); + + expect(result.decision).toBe('allow'); + expect(result.reason).toContain('FAIL-OPEN WARNING'); + }); + }); + }); + + describe('default failure modes', () => { + it('OSS default is review', () => { + expect(DEFAULT_FAILURE_MODE).toBe('review'); + }); + + it('Pro default is closed', () => { + expect(PRO_DEFAULT_FAILURE_MODE).toBe('closed'); + }); + }); +}); From a52c4a9577bd86d5b1f2437d3633ea6dba49a222 Mon Sep 17 00:00:00 2001 From: Jessie Hermosillo Date: Mon, 11 May 2026 12:14:20 -0400 Subject: [PATCH 2/3] feat: Enable classifier by default for unknown resources (#53) Changed useClassifier default from false to true. Unknown resource types now automatically run through the decision tree classifier instead of just using the weak default handler. This provides better safety coverage for resources without explicit handlers. The classifier can be explicitly disabled via options.classifier = false if needed. Co-Authored-By: Claude Opus 4.5 --- src/analyzer/blast-radius.ts | 2 +- src/http/server.ts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/analyzer/blast-radius.ts b/src/analyzer/blast-radius.ts index 611bd8b..ae18d10 100644 --- a/src/analyzer/blast-radius.ts +++ b/src/analyzer/blast-radius.ts @@ -24,7 +24,7 @@ export function analyzeBlastRadius( state: TerraformState | null, options: AnalyzeOptions = {} ): BlastRadiusReport { - const { includeNonDestructive = true, useClassifier = false } = options; + const { includeNonDestructive = true, useClassifier = true } = options; // Get changes to analyze let changes: ResourceChange[]; diff --git a/src/http/server.ts b/src/http/server.ts index 1d31705..ccfa3e3 100644 --- a/src/http/server.ts +++ b/src/http/server.ts @@ -222,7 +222,7 @@ function evaluate(request: EvaluateRequest) { : JSON.stringify(request.input); const plan = parsePlanJson(planJson); return evaluateTerraformPlanConsequences(plan, null, { - useClassifier: request.options?.classifier ?? false, + useClassifier: request.options?.classifier ?? true, adapterContext, }); } From ec49ba6b8f5f5ef49ccd2493dfdca14b378f13f2 Mon Sep 17 00:00:00 2001 From: Jessie Hermosillo Date: Mon, 11 May 2026 14:37:16 -0400 Subject: [PATCH 3/3] Pre-monetization security hardening and quality fixes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Security Fixes (Critical) - Fix URL command injection in http/server.ts (exec → execFile) - Add MAX_BODY_SIZE limits to HTTP endpoints (DoS prevention) - Add MAX_FRAME_SIZE validation to MCP server (DoS prevention) ## Security Fixes (High) - Add RECOURSE_INSTANCE_URL env var with production warning for localhost - Log attestation persistence failures instead of silent swallowing ## Code Quality Fixes (Medium) - Add parseRiskLevels() validator to replace unsafe 'as any' casts - Add input validation to shell command execution - Add error logging for file read failures - Document shell:true usage with security rationale ## Features - Add SLA_TARGETS and EvaluationTimer for latency benchmarking - Add timing metrics to all evaluator outputs - Add test:coverage script with 50% threshold gates - Add coverage step to CI workflow ## Cleanup - Convert incomplete TODOs to documented future enhancements - Remove references to unimplemented PagerDuty/Opsgenie integrations - Update notification docs to reflect actual supported channels Fixes: #52, #53, #54, #55, #56 Co-Authored-By: Claude Opus 4.5 --- .github/workflows/ci.yml | 3 + package.json | 1 + src/analyzer/cross-action-patterns.ts | 6 +- src/attestation/service.ts | 23 ++++- src/cli.ts | 19 +++- src/core/consequence.ts | 7 ++ src/core/index.ts | 12 +++ src/core/timing.ts | 142 ++++++++++++++++++++++++++ src/evaluator/mcp.ts | 11 ++ src/evaluator/shell.ts | 10 ++ src/evaluator/terraform.ts | 17 +++ src/http/server.ts | 32 +++++- src/iam/broker-server.ts | 21 +++- src/iam/session-broker.ts | 8 +- src/mcp/gateway.ts | 34 +++++- src/mcp/server.ts | 9 ++ src/notifications/index.ts | 15 ++- tests/timing.test.ts | 96 +++++++++++++++++ vitest.config.ts | 20 ++++ 19 files changed, 455 insertions(+), 31 deletions(-) create mode 100644 src/core/timing.ts create mode 100644 tests/timing.test.ts diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index baac125..69d3cfc 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -37,6 +37,9 @@ jobs: - name: Test run: npm run test:all + - name: Test Coverage + run: npm run test:coverage + - name: Install Playwright Chromium run: npx playwright install --with-deps chromium diff --git a/package.json b/package.json index d580cf6..27c3bea 100644 --- a/package.json +++ b/package.json @@ -14,6 +14,7 @@ "pack:check": "npm pack --dry-run --cache .npm-cache", "dev": "tsc --watch", "test": "vitest", + "test:coverage": "vitest run --coverage", "test:cli": "npm run build && vitest --run tests/cli-scenarios.test.ts", "test:all": "npm run build && vitest --run && vitest --run tests/cli-scenarios.test.ts", "test:aws-live": "RUN_AWS_LIVE_TESTS=1 vitest --run tests/aws-live.test.ts", diff --git a/src/analyzer/cross-action-patterns.ts b/src/analyzer/cross-action-patterns.ts index e52fd9f..80e0233 100644 --- a/src/analyzer/cross-action-patterns.ts +++ b/src/analyzer/cross-action-patterns.ts @@ -113,8 +113,10 @@ function detectBackupRelationship( } } - // TODO: State lookup for additional relationships - // TODO: Naming convention detection (low confidence) + // Future enhancements (not blocking for v1): + // - State lookup: Query live state for snapshot->instance relationships not in plan + // - Naming convention: Detect patterns like "mydb-snapshot" -> "mydb" (low confidence) + // These would improve detection but explicit_reference covers the critical cases. return null; } diff --git a/src/attestation/service.ts b/src/attestation/service.ts index efebe9a..3d04d6e 100644 --- a/src/attestation/service.ts +++ b/src/attestation/service.ts @@ -95,10 +95,24 @@ export class AttestationService { constructor(config: AttestationServiceConfig = {}) { this.configDir = config.configDir ?? join(homedir(), '.recourse'); this.instanceId = config.instanceId ?? 'recourse-local'; - this.instanceBaseUrl = config.instanceBaseUrl ?? 'http://localhost:3001'; this.evaluatorVersion = config.evaluatorVersion ?? '1.0.0'; this.registry = createRegistry(); this.attestationsDir = join(this.configDir, 'attestations'); + + // Resolve instance base URL with production validation + const envUrl = process.env.RECOURSE_INSTANCE_URL; + const defaultUrl = 'http://localhost:3001'; + this.instanceBaseUrl = config.instanceBaseUrl ?? envUrl ?? defaultUrl; + + // Warn if using localhost in production mode + const isProduction = process.env.NODE_ENV === 'production'; + const isLocalhost = this.instanceBaseUrl.includes('localhost') || this.instanceBaseUrl.includes('127.0.0.1'); + if (isProduction && isLocalhost) { + console.warn( + '[WARN] Attestation service using localhost URL in production mode. ' + + 'Set RECOURSE_INSTANCE_URL to a publicly accessible URL for valid attestations.' + ); + } } /** @@ -256,9 +270,10 @@ export class AttestationService { encoding: 'utf8', mode: 0o644, }); - } catch { - // Silently fail - disk persistence is best-effort - // Memory storage is still available for this process + } catch (err) { + // Log persistence failures - audit trail should not silently disappear + const message = err instanceof Error ? err.message : String(err); + console.warn(`[WARN] Failed to persist attestation ${id}: ${message}. Memory storage only.`); } } diff --git a/src/cli.ts b/src/cli.ts index d8a2b54..de30599 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -15,6 +15,7 @@ import { evaluateTerraformPlanConsequences, } from './evaluator/index.js'; import type { McpToolCall } from './adapters/index.js'; +import { parseRiskLevels } from './mcp/gateway.js'; import { analyzeDynamoDbTableDeletionEvidence, analyzeIamRoleDeletionEvidence, @@ -189,7 +190,7 @@ mcp } if (options.allow) { - config.allowedRiskLevels = options.allow.split(',') as any; + config.allowedRiskLevels = parseRiskLevels(options.allow); } if (options.verbose !== undefined) { @@ -935,7 +936,23 @@ program } }); +/** + * Execute a shell command after RecourseOS evaluation. + * + * SECURITY NOTE: shell:true is intentional here because: + * 1. This is the `recourse exec` CLI command where user explicitly provides a command + * 2. Commands may include shell features (pipes, redirections, etc.) + * 3. The command is validated by RecourseOS before execution + * + * This is NOT used for programmatic/untrusted input. + */ function executeCommand(command: string): void { + // Validate command is a non-empty string + if (typeof command !== 'string' || command.trim().length === 0) { + console.error('Invalid command: must be a non-empty string'); + process.exit(1); + } + const child = spawn(command, { shell: true, stdio: 'inherit', diff --git a/src/core/consequence.ts b/src/core/consequence.ts index b2789b8..98aecb0 100644 --- a/src/core/consequence.ts +++ b/src/core/consequence.ts @@ -3,6 +3,7 @@ import type { DependencyImpact, EvidenceItem, MissingEvidence, MutationIntent, V import type { EvidenceRequirementLevel, EvidenceSufficiency } from './state-schema.js'; import type { CrossActionRisk } from '../analyzer/cross-action.js'; import type { ReasoningTrace, VerificationInstructions } from '../evaluator/trace.js'; +import type { EvaluationTiming } from './timing.js'; export type ConsequenceDecision = 'allow' | 'warn' | 'block' | 'escalate'; @@ -135,4 +136,10 @@ export interface ConsequenceReport { * Includes CLI commands and API calls a verifier can run. */ verification?: VerificationInstructions; + + /** + * Performance timing metrics for this evaluation. + * Includes total time, phase breakdown, and SLA compliance. + */ + timing?: EvaluationTiming; } diff --git a/src/core/index.ts b/src/core/index.ts index 5802e00..a15abb5 100644 --- a/src/core/index.ts +++ b/src/core/index.ts @@ -85,3 +85,15 @@ export { checkEvidenceFailures, applyFailureMode, } from './failure-mode.js'; + +// Performance timing +export type { + EvaluationTiming, + SLATarget, +} from './timing.js'; + +export { + SLA_TARGETS, + EvaluationTimer, + formatTiming, +} from './timing.js'; diff --git a/src/core/timing.ts b/src/core/timing.ts new file mode 100644 index 0000000..fc9b1b5 --- /dev/null +++ b/src/core/timing.ts @@ -0,0 +1,142 @@ +/** + * Performance timing utilities for RecourseOS evaluations. + * + * Used to measure latency and enforce SLA targets. + */ + +/** + * SLA targets for different operation types (in milliseconds). + * + * These are p95 targets - 95% of evaluations should complete within these times. + */ +export const SLA_TARGETS = { + /** Single resource evaluation without network calls */ + localEvaluation: 10, + /** Single resource evaluation with AWS state lookup */ + remoteEvaluation: 500, + /** Full plan evaluation (up to 50 resources) */ + planEvaluation: 2000, + /** Shell command parsing and evaluation */ + shellEvaluation: 5, + /** MCP tool call evaluation */ + mcpEvaluation: 10, +} as const; + +export type SLATarget = keyof typeof SLA_TARGETS; + +/** + * Timing metrics for an evaluation. + */ +export interface EvaluationTiming { + /** Total wall-clock time in milliseconds */ + totalMs: number; + /** Time spent in parsing/preparation */ + parseMs?: number; + /** Time spent in blast radius analysis */ + analysisMs?: number; + /** Time spent in policy evaluation */ + policyMs?: number; + /** Time spent waiting for remote state lookups */ + remoteMs?: number; + /** Whether the evaluation met its SLA target */ + metSla: boolean; + /** The SLA target used for comparison */ + slaTarget: SLATarget; + /** The target time in milliseconds */ + slaTargetMs: number; +} + +/** + * Timer for tracking evaluation performance. + */ +export class EvaluationTimer { + private startTime: number; + private phases: Map = new Map(); + private slaTarget: SLATarget; + + constructor(slaTarget: SLATarget = 'localEvaluation') { + this.startTime = performance.now(); + this.slaTarget = slaTarget; + } + + /** + * Start timing a phase. + */ + startPhase(name: string): void { + this.phases.set(name, { start: performance.now() }); + } + + /** + * End timing a phase. + */ + endPhase(name: string): number { + const phase = this.phases.get(name); + if (!phase) { + return 0; + } + phase.end = performance.now(); + return phase.end - phase.start; + } + + /** + * Get the duration of a completed phase. + */ + getPhaseMs(name: string): number | undefined { + const phase = this.phases.get(name); + if (!phase || phase.end === undefined) { + return undefined; + } + return phase.end - phase.start; + } + + /** + * Set the SLA target for this evaluation. + */ + setSlaTarget(target: SLATarget): void { + this.slaTarget = target; + } + + /** + * Complete timing and return metrics. + */ + finish(): EvaluationTiming { + const totalMs = performance.now() - this.startTime; + const slaTargetMs = SLA_TARGETS[this.slaTarget]; + + return { + totalMs: Math.round(totalMs * 100) / 100, + parseMs: this.getPhaseMs('parse'), + analysisMs: this.getPhaseMs('analysis'), + policyMs: this.getPhaseMs('policy'), + remoteMs: this.getPhaseMs('remote'), + metSla: totalMs <= slaTargetMs, + slaTarget: this.slaTarget, + slaTargetMs, + }; + } +} + +/** + * Format timing metrics for human-readable output. + */ +export function formatTiming(timing: EvaluationTiming): string { + const status = timing.metSla ? '✓' : '⚠'; + const parts = [`${status} ${timing.totalMs.toFixed(1)}ms`]; + + if (timing.parseMs !== undefined) { + parts.push(`parse=${timing.parseMs.toFixed(1)}ms`); + } + if (timing.analysisMs !== undefined) { + parts.push(`analysis=${timing.analysisMs.toFixed(1)}ms`); + } + if (timing.policyMs !== undefined) { + parts.push(`policy=${timing.policyMs.toFixed(1)}ms`); + } + if (timing.remoteMs !== undefined) { + parts.push(`remote=${timing.remoteMs.toFixed(1)}ms`); + } + + parts.push(`(target: ${timing.slaTargetMs}ms ${timing.slaTarget})`); + + return parts.join(' | '); +} diff --git a/src/evaluator/mcp.ts b/src/evaluator/mcp.ts index 70149e6..8594ff2 100644 --- a/src/evaluator/mcp.ts +++ b/src/evaluator/mcp.ts @@ -13,6 +13,7 @@ import { buildRequiredEvidence, getEvidenceRequirements, DEFAULT_UNKNOWN_REQUIREMENTS, + EvaluationTimer, } from '../core/index.js'; import { RecoverabilityLabels, @@ -62,7 +63,13 @@ export function evaluateMcpToolCallConsequences( call: McpToolCall, options: McpConsequenceOptions = {} ): ConsequenceReport { + const timer = new EvaluationTimer('mcpEvaluation'); + timer.startPhase('parse'); + const intent = mcpToolCallToMutation(call, options.adapterContext); + timer.endPhase('parse'); + + timer.startPhase('analysis'); const s3Analysis = getS3Analysis(intent, options.awsEvidence?.s3Buckets); const rdsAnalysis = getRdsAnalysis(intent, options.awsEvidence?.rdsInstances); const dynamoDbAnalysis = getDynamoDbAnalysis(intent, options.awsEvidence?.dynamoDbTables); @@ -105,6 +112,9 @@ export function evaluateMcpToolCallConsequences( // Build required evidence for the consequence report const requiredEvidence = buildRequiredEvidenceForIntent(intent, mutation.evidence); + timer.endPhase('analysis'); + const timing = timer.finish(); + return { mutations: [mutation], summary: { @@ -117,6 +127,7 @@ export function evaluateMcpToolCallConsequences( riskAssessment: policyEvaluation.decision, assessmentReason: policyEvaluation.reason, requiredEvidence, + timing, }; } diff --git a/src/evaluator/shell.ts b/src/evaluator/shell.ts index 0a5571f..19b4404 100644 --- a/src/evaluator/shell.ts +++ b/src/evaluator/shell.ts @@ -6,6 +6,7 @@ import type { ConsequenceReport, MutationIntent, } from '../core/index.js'; +import { EvaluationTimer } from '../core/index.js'; import { RecoverabilityLabels, RecoverabilityTier, @@ -44,6 +45,9 @@ export function evaluateShellCommandConsequences( input: ShellCommandInput | string, options: ShellConsequenceOptions = {} ): ConsequenceReport { + const timer = new EvaluationTimer('shellEvaluation'); + timer.startPhase('parse'); + const command = typeof input === 'string' ? input : input.command; const intent = shellCommandToMutation(command, { ...options.adapterContext, @@ -52,7 +56,9 @@ export function evaluateShellCommandConsequences( cwd: typeof input === 'string' ? undefined : input.cwd, }, }); + timer.endPhase('parse'); + timer.startPhase('analysis'); const s3Analysis = getS3Analysis(intent, options.awsEvidence?.s3Buckets); const rdsAnalysis = getRdsAnalysis(intent, options.awsEvidence?.rdsInstances); const dynamoDbAnalysis = getDynamoDbAnalysis(intent, options.awsEvidence?.dynamoDbTables); @@ -92,6 +98,9 @@ export function evaluateShellCommandConsequences( dependencyImpact: [], }; + timer.endPhase('analysis'); + const timing = timer.finish(); + return { mutations: [mutation], summary: { @@ -103,6 +112,7 @@ export function evaluateShellCommandConsequences( }, riskAssessment: policyEvaluation.decision, assessmentReason: policyEvaluation.reason, + timing, }; } diff --git a/src/evaluator/terraform.ts b/src/evaluator/terraform.ts index c88eb3d..ec00f73 100644 --- a/src/evaluator/terraform.ts +++ b/src/evaluator/terraform.ts @@ -22,6 +22,8 @@ import { checkEvidenceFailures, applyFailureMode, DEFAULT_FAILURE_MODE, + EvaluationTimer, + type EvaluationTiming, } from '../core/index.js'; import { TraceBuilder, @@ -59,6 +61,10 @@ export function evaluateTerraformPlanConsequences( state: TerraformState | null, options: TerraformConsequenceOptions = {} ): ConsequenceReport { + // Initialize timing + const timer = new EvaluationTimer('planEvaluation'); + timer.startPhase('parse'); + // Initialize trace capture const trace = new TraceBuilder(); trace.source('terraform-plan'); @@ -67,7 +73,9 @@ export function evaluateTerraformPlanConsequences( } trace.step('parse_input', `Parsed Terraform plan with ${plan.resourceChanges?.length ?? 0} resource changes`); + timer.endPhase('parse'); + timer.startPhase('analysis'); const blastRadiusReport = analyzeBlastRadius(plan, state, { useClassifier: options.useClassifier, }); @@ -75,7 +83,9 @@ export function evaluateTerraformPlanConsequences( trace.step('analyze_blast_radius', `Analyzed ${blastRadiusReport.changes.length} changes`, { decision: `total_changes=${blastRadiusReport.summary.totalChanges}, has_unrecoverable=${blastRadiusReport.summary.hasUnrecoverable}`, }); + timer.endPhase('analysis'); + timer.startPhase('policy'); const policyEvaluation = evaluateBlastRadiusReport( blastRadiusReport, options.policy @@ -305,6 +315,11 @@ export function evaluateTerraformPlanConsequences( }); } + timer.endPhase('policy'); + + // Finalize timing + const timing = timer.finish(); + const report: ConsequenceReport = { mutations, summary: { @@ -321,6 +336,8 @@ export function evaluateTerraformPlanConsequences( // Attestation richness fields trace: trace.build(), verification: verificationInstructions, + // Performance timing + timing, }; // Add verification protocol fields diff --git a/src/http/server.ts b/src/http/server.ts index ccfa3e3..fdfedd8 100644 --- a/src/http/server.ts +++ b/src/http/server.ts @@ -2,7 +2,7 @@ import { createServer, type IncomingMessage, type ServerResponse } from 'http'; import { readFileSync, existsSync } from 'fs'; import { join, extname } from 'path'; import { fileURLToPath } from 'url'; -import { exec } from 'child_process'; +import { execFile } from 'child_process'; import { parsePlanJson } from '../parsers/plan.js'; import { evaluateMcpToolCallConsequences, @@ -153,7 +153,10 @@ export async function runHttpServer(options: HttpServerOptions = {}): Promise { + if (err) { + console.error('Failed to open browser:', err.message); + } + }); } function evaluate(request: EvaluateRequest) { @@ -254,10 +263,23 @@ function evaluate(request: EvaluateRequest) { } } +// Maximum request body size (10MB) to prevent DoS +const MAX_BODY_SIZE = 10 * 1024 * 1024; + function readBody(req: IncomingMessage): Promise { return new Promise((resolve, reject) => { const chunks: Buffer[] = []; - req.on('data', chunk => chunks.push(chunk)); + let totalSize = 0; + + req.on('data', (chunk: Buffer) => { + totalSize += chunk.length; + if (totalSize > MAX_BODY_SIZE) { + req.destroy(); + reject(new Error(`Request body exceeds maximum size of ${MAX_BODY_SIZE} bytes`)); + return; + } + chunks.push(chunk); + }); req.on('end', () => resolve(Buffer.concat(chunks).toString('utf8'))); req.on('error', reject); }); diff --git a/src/iam/broker-server.ts b/src/iam/broker-server.ts index ceec32f..8b15be5 100644 --- a/src/iam/broker-server.ts +++ b/src/iam/broker-server.ts @@ -88,14 +88,27 @@ export async function startBrokerServer( }); } +// Maximum request body size (1MB) to prevent DoS +const MAX_BODY_SIZE = 1024 * 1024; + /** - * Read request body + * Read request body with size limit */ function readBody(req: http.IncomingMessage): Promise { return new Promise((resolve, reject) => { - let body = ''; - req.on('data', (chunk) => (body += chunk.toString())); - req.on('end', () => resolve(body)); + const chunks: Buffer[] = []; + let totalSize = 0; + + req.on('data', (chunk: Buffer) => { + totalSize += chunk.length; + if (totalSize > MAX_BODY_SIZE) { + req.destroy(); + reject(new Error(`Request body exceeds maximum size of ${MAX_BODY_SIZE} bytes`)); + return; + } + chunks.push(chunk); + }); + req.on('end', () => resolve(Buffer.concat(chunks).toString('utf8'))); req.on('error', reject); }); } diff --git a/src/iam/session-broker.ts b/src/iam/session-broker.ts index 1b7623d..6bb7b4d 100644 --- a/src/iam/session-broker.ts +++ b/src/iam/session-broker.ts @@ -29,6 +29,7 @@ import { } from '../evaluator/index.js'; import { toConsequenceJson } from '../output/consequence-json.js'; import { getAttestationService, type AttestationService } from '../attestation/service.js'; +import { parseRiskLevels } from '../mcp/gateway.js'; // Session request export interface SessionRequest { @@ -492,10 +493,9 @@ export function createBrokerFromEnv(): SessionBroker { return new SessionBroker({ brokerRoleArn: roleArn, - allowedRiskLevels: (process.env.RECOURSE_ALLOWED_LEVELS?.split(',') as any) ?? [ - 'allow', - 'warn', - ], + allowedRiskLevels: process.env.RECOURSE_ALLOWED_LEVELS + ? parseRiskLevels(process.env.RECOURSE_ALLOWED_LEVELS) + : ['allow', 'warn'], defaultDurationSeconds: parseInt(process.env.RECOURSE_SESSION_DURATION ?? '900'), maxDurationSeconds: parseInt(process.env.RECOURSE_MAX_SESSION_DURATION ?? '3600'), attestation: process.env.RECOURSE_ATTESTATION !== 'false', diff --git a/src/mcp/gateway.ts b/src/mcp/gateway.ts index 2d070ef..dad954b 100644 --- a/src/mcp/gateway.ts +++ b/src/mcp/gateway.ts @@ -20,6 +20,34 @@ import { import { toConsequenceJson } from '../output/consequence-json.js'; import { getAttestationService, type AttestationService } from '../attestation/service.js'; +// Valid risk level values +export type RiskLevel = 'allow' | 'warn' | 'escalate' | 'block'; +const VALID_RISK_LEVELS: RiskLevel[] = ['allow', 'warn', 'escalate', 'block']; + +/** + * Parse and validate risk levels from a comma-separated string. + * Invalid values are filtered out with a warning. + */ +export function parseRiskLevels(input: string): RiskLevel[] { + const levels = input.split(',').map(s => s.trim().toLowerCase()); + const valid: RiskLevel[] = []; + const invalid: string[] = []; + + for (const level of levels) { + if (VALID_RISK_LEVELS.includes(level as RiskLevel)) { + valid.push(level as RiskLevel); + } else if (level.length > 0) { + invalid.push(level); + } + } + + if (invalid.length > 0) { + console.warn(`[WARN] Invalid risk levels ignored: ${invalid.join(', ')}. Valid: ${VALID_RISK_LEVELS.join(', ')}`); + } + + return valid.length > 0 ? valid : ['allow', 'warn']; // Default if all invalid +} + // Gateway configuration export interface GatewayConfig { // Upstream MCP servers to proxy @@ -193,7 +221,9 @@ async function evaluateToolCall( attestation = attestationService.createAttestation(input, jsonReport); } - const allowed = config.allowedRiskLevels.includes(riskAssessment as any); + // Validate risk assessment is a known level before checking policy + const isValidLevel = VALID_RISK_LEVELS.includes(riskAssessment as RiskLevel); + const allowed = isValidLevel && config.allowedRiskLevels.includes(riskAssessment as RiskLevel); return { allowed, report: jsonReport, attestation }; } @@ -458,7 +488,7 @@ export function loadGatewayConfig(configPath?: string): GatewayConfig { } if (process.env.RECOURSE_ALLOWED_LEVELS) { - defaultConfig.allowedRiskLevels = process.env.RECOURSE_ALLOWED_LEVELS.split(',') as any; + defaultConfig.allowedRiskLevels = parseRiskLevels(process.env.RECOURSE_ALLOWED_LEVELS); } return defaultConfig; diff --git a/src/mcp/server.ts b/src/mcp/server.ts index d69d0b1..e32d80d 100644 --- a/src/mcp/server.ts +++ b/src/mcp/server.ts @@ -879,6 +879,9 @@ function error(id: JsonRpcRequest['id'], code: number, message: string): Record< }; } +// Maximum MCP frame size (10MB) to prevent DoS +const MAX_FRAME_SIZE = 10 * 1024 * 1024; + function readFrame(buffer: Buffer): { body: Buffer; remaining: Buffer } | null { const headerEnd = buffer.indexOf('\r\n\r\n'); if (headerEnd === -1) return null; @@ -890,6 +893,12 @@ function readFrame(buffer: Buffer): { body: Buffer; remaining: Buffer } | null { } const contentLength = Number(match[1]); + + // Validate frame size to prevent memory exhaustion + if (contentLength > MAX_FRAME_SIZE) { + throw new Error(`MCP frame size ${contentLength} exceeds maximum allowed size of ${MAX_FRAME_SIZE} bytes`); + } + const bodyStart = headerEnd + 4; const frameEnd = bodyStart + contentLength; if (buffer.length < frameEnd) return null; diff --git a/src/notifications/index.ts b/src/notifications/index.ts index 57ff878..0aaf084 100644 --- a/src/notifications/index.ts +++ b/src/notifications/index.ts @@ -1,20 +1,19 @@ /** * Notification system for RecourseOS escalations. * - * Supports: + * Currently supported: * - Slack webhooks (RECOURSE_SLACK_WEBHOOK) * - Discord webhooks (RECOURSE_DISCORD_WEBHOOK) - * - PagerDuty Events API (PAGERDUTY_ROUTING_KEY) - * - Opsgenie Alerts API (OPSGENIE_API_KEY) + * + * Planned (not yet implemented): + * - PagerDuty Events API + * - Opsgenie Alerts API * * Notifications are sent automatically when risk is 'escalate' or 'block'. */ export { sendSlackNotification, createSlackNotifier, formatSlackMessage } from './slack.js'; export { sendDiscordNotification, createDiscordNotifier, formatDiscordMessage } from './discord.js'; -// PagerDuty and Opsgenie temporarily disabled - type fixes needed -// export { ... } from './pagerduty.js'; -// export { ... } from './opsgenie.js'; export interface ConsequenceAlert { riskAssessment: 'allow' | 'warn' | 'escalate' | 'block'; @@ -68,9 +67,7 @@ export function createNotifier(): Notifier | null { export function hasNotifications(): boolean { return !!( process.env.RECOURSE_SLACK_WEBHOOK || - process.env.RECOURSE_DISCORD_WEBHOOK || - process.env.PAGERDUTY_ROUTING_KEY || - process.env.OPSGENIE_API_KEY + process.env.RECOURSE_DISCORD_WEBHOOK ); } diff --git a/tests/timing.test.ts b/tests/timing.test.ts new file mode 100644 index 0000000..1480a80 --- /dev/null +++ b/tests/timing.test.ts @@ -0,0 +1,96 @@ +import { describe, it, expect } from 'vitest'; +import { + EvaluationTimer, + SLA_TARGETS, + formatTiming, +} from '../src/core/timing.js'; + +describe('timing', () => { + describe('SLA_TARGETS', () => { + it('defines targets for all evaluation types', () => { + expect(SLA_TARGETS.localEvaluation).toBe(10); + expect(SLA_TARGETS.remoteEvaluation).toBe(500); + expect(SLA_TARGETS.planEvaluation).toBe(2000); + expect(SLA_TARGETS.shellEvaluation).toBe(5); + expect(SLA_TARGETS.mcpEvaluation).toBe(10); + }); + }); + + describe('EvaluationTimer', () => { + it('tracks total time', () => { + const timer = new EvaluationTimer('localEvaluation'); + const timing = timer.finish(); + + expect(timing.totalMs).toBeGreaterThanOrEqual(0); + expect(timing.slaTarget).toBe('localEvaluation'); + expect(timing.slaTargetMs).toBe(10); + }); + + it('tracks phases', () => { + const timer = new EvaluationTimer(); + timer.startPhase('parse'); + timer.endPhase('parse'); + timer.startPhase('analysis'); + timer.endPhase('analysis'); + + const timing = timer.finish(); + + expect(timing.parseMs).toBeDefined(); + expect(timing.analysisMs).toBeDefined(); + expect(timing.parseMs).toBeGreaterThanOrEqual(0); + expect(timing.analysisMs).toBeGreaterThanOrEqual(0); + }); + + it('reports SLA compliance', () => { + const timer = new EvaluationTimer('localEvaluation'); + const timing = timer.finish(); + + // Local evaluation should be fast enough to meet SLA + expect(timing.metSla).toBe(true); + }); + + it('allows changing SLA target', () => { + const timer = new EvaluationTimer('localEvaluation'); + timer.setSlaTarget('planEvaluation'); + const timing = timer.finish(); + + expect(timing.slaTarget).toBe('planEvaluation'); + expect(timing.slaTargetMs).toBe(2000); + }); + }); + + describe('formatTiming', () => { + it('formats timing with checkmark when SLA met', () => { + const timing = { + totalMs: 5.5, + parseMs: 1.2, + analysisMs: 4.3, + metSla: true, + slaTarget: 'localEvaluation' as const, + slaTargetMs: 10, + }; + + const formatted = formatTiming(timing); + + expect(formatted).toContain('✓'); + expect(formatted).toContain('5.5ms'); + expect(formatted).toContain('parse=1.2ms'); + expect(formatted).toContain('analysis=4.3ms'); + expect(formatted).toContain('target: 10ms'); + }); + + it('formats timing with warning when SLA missed', () => { + const timing = { + totalMs: 150.0, + metSla: false, + slaTarget: 'localEvaluation' as const, + slaTargetMs: 10, + }; + + const formatted = formatTiming(timing); + + expect(formatted).toContain('⚠'); + expect(formatted).toContain('150.0ms'); + }); + }); +}); diff --git a/vitest.config.ts b/vitest.config.ts index a37182f..6926412 100644 --- a/vitest.config.ts +++ b/vitest.config.ts @@ -3,5 +3,25 @@ import { configDefaults, defineConfig } from 'vitest/config'; export default defineConfig({ test: { exclude: [...configDefaults.exclude, '**/*visual.spec.ts'], + coverage: { + provider: 'v8', + reporter: ['text', 'json', 'html'], + include: ['src/**/*.ts'], + exclude: [ + 'src/**/*.d.ts', + 'src/**/types.ts', + 'src/tools/**', + 'src/index.ts', + ], + thresholds: { + // Minimum coverage thresholds (fail CI if below these) + // Current baseline (May 2026): ~54% lines, ~50% branches + // These prevent regression; increase targets as coverage improves + lines: 50, + functions: 50, + branches: 45, + statements: 50, + }, + }, }, });