From 18267dbeaaf0c04174c2862af65d77441b4aa7c1 Mon Sep 17 00:00:00 2001 From: Anders Hassis Date: Mon, 4 May 2026 07:20:23 +0200 Subject: [PATCH 01/12] update models for anthropic and openai --- .env.example | 4 ++-- packages/core/schemas/eroderc.schema.json | 8 ++++---- .../src/providers/__tests__/provider-factory.test.ts | 12 ++++++------ .../providers/anthropic/__tests__/provider.test.ts | 2 +- packages/core/src/providers/anthropic/models.ts | 4 ++-- .../src/providers/openai/__tests__/provider.test.ts | 2 +- packages/core/src/providers/openai/models.ts | 4 ++-- packages/web/public/schemas/v0/eroderc.schema.json | 8 ++++---- .../src/content/docs/docs/reference/ai-providers.md | 12 ++++++------ 9 files changed, 28 insertions(+), 28 deletions(-) diff --git a/.env.example b/.env.example index 372c0fbd..c43df56a 100644 --- a/.env.example +++ b/.env.example @@ -16,8 +16,8 @@ ERODE_GITHUB_TOKEN= # Model overrides — FAST for extraction stages (1, 2), ADVANCED for analysis stages (3, 4) # ERODE_GEMINI_FAST_MODEL=gemini-2.5-flash # Default # ERODE_GEMINI_ADVANCED_MODEL=gemini-2.5-pro # Default -# ERODE_ANTHROPIC_FAST_MODEL=claude-haiku-4-5-20251001 -# ERODE_ANTHROPIC_ADVANCED_MODEL=claude-sonnet-4-5-20250929 +# ERODE_ANTHROPIC_FAST_MODEL=claude-haiku-4-5 +# ERODE_ANTHROPIC_ADVANCED_MODEL=claude-sonnet-4-6 # Architecture model format # ERODE_MODEL_FORMAT=likec4 diff --git a/packages/core/schemas/eroderc.schema.json b/packages/core/schemas/eroderc.schema.json index a6944300..d11906d6 100644 --- a/packages/core/schemas/eroderc.schema.json +++ b/packages/core/schemas/eroderc.schema.json @@ -164,11 +164,11 @@ "maximum": 300000 }, "fastModel": { - "default": "claude-haiku-4-5-20251001", + "default": "claude-haiku-4-5", "type": "string" }, "advancedModel": { - "default": "claude-sonnet-4-5-20250929", + "default": "claude-sonnet-4-6", "type": "string" } }, @@ -210,11 +210,11 @@ "maximum": 300000 }, "fastModel": { - "default": "gpt-4.1-mini", + "default": "gpt-5.3-mini", "type": "string" }, "advancedModel": { - "default": "gpt-4.1", + "default": "gpt-5.3", "type": "string" } }, diff --git a/packages/core/src/providers/__tests__/provider-factory.test.ts b/packages/core/src/providers/__tests__/provider-factory.test.ts index d1e980ae..4e466690 100644 --- a/packages/core/src/providers/__tests__/provider-factory.test.ts +++ b/packages/core/src/providers/__tests__/provider-factory.test.ts @@ -35,8 +35,8 @@ const { }, openai: { apiKey: 'test-openai-key', - fastModel: 'gpt-4.1-mini', - advancedModel: 'gpt-4.1', + fastModel: 'gpt-5.3-mini', + advancedModel: 'gpt-5.3', }, anthropic: { apiKey: 'test-anthropic-key', @@ -94,8 +94,8 @@ describe('createAIProvider', () => { mockConfig.gemini.fastModel = 'gemini-flash'; mockConfig.gemini.advancedModel = 'gemini-pro'; mockConfig.openai.apiKey = 'test-openai-key'; - mockConfig.openai.fastModel = 'gpt-4.1-mini'; - mockConfig.openai.advancedModel = 'gpt-4.1'; + mockConfig.openai.fastModel = 'gpt-5.3-mini'; + mockConfig.openai.advancedModel = 'gpt-5.3'; mockConfig.anthropic.apiKey = 'test-anthropic-key'; mockConfig.anthropic.fastModel = 'claude-haiku'; mockConfig.anthropic.advancedModel = 'claude-sonnet'; @@ -122,8 +122,8 @@ describe('createAIProvider', () => { expect(result).toBe(mockOpenAIInstance); expect(OpenAIProvider).toHaveBeenCalledWith({ apiKey: 'test-openai-key', - fastModel: 'gpt-4.1-mini', - advancedModel: 'gpt-4.1', + fastModel: 'gpt-5.3-mini', + advancedModel: 'gpt-5.3', }); }); diff --git a/packages/core/src/providers/anthropic/__tests__/provider.test.ts b/packages/core/src/providers/anthropic/__tests__/provider.test.ts index 62c91d70..80894fa1 100644 --- a/packages/core/src/providers/anthropic/__tests__/provider.test.ts +++ b/packages/core/src/providers/anthropic/__tests__/provider.test.ts @@ -312,7 +312,7 @@ describe('AnthropicProvider', () => { expect(mockCreate).toHaveBeenCalled(); const callArg = mockCreate.mock.calls[0]?.[0] as { model?: string } | undefined; - expect(callArg?.model).toBe('claude-haiku-4-5-20251001'); + expect(callArg?.model).toBe('claude-haiku-4-5'); }); it('should return patched content', async () => { diff --git a/packages/core/src/providers/anthropic/models.ts b/packages/core/src/providers/anthropic/models.ts index 227ae62e..d09bbbc5 100644 --- a/packages/core/src/providers/anthropic/models.ts +++ b/packages/core/src/providers/anthropic/models.ts @@ -1,4 +1,4 @@ export const ANTHROPIC_MODELS = { - FAST: 'claude-haiku-4-5-20251001', - ADVANCED: 'claude-sonnet-4-5-20250929', + FAST: 'claude-haiku-4-5', + ADVANCED: 'claude-sonnet-4-6', } as const; diff --git a/packages/core/src/providers/openai/__tests__/provider.test.ts b/packages/core/src/providers/openai/__tests__/provider.test.ts index 863f2425..4fd36ed0 100644 --- a/packages/core/src/providers/openai/__tests__/provider.test.ts +++ b/packages/core/src/providers/openai/__tests__/provider.test.ts @@ -310,7 +310,7 @@ describe('OpenAIProvider', () => { expect(mockCreate).toHaveBeenCalled(); const callArg = mockCreate.mock.calls[0]?.[0] as { model?: string } | undefined; - expect(callArg?.model).toBe('gpt-4.1-mini'); + expect(callArg?.model).toBe('gpt-5.3-mini'); }); it('should return patched content', async () => { diff --git a/packages/core/src/providers/openai/models.ts b/packages/core/src/providers/openai/models.ts index acf08b56..1626e6ef 100644 --- a/packages/core/src/providers/openai/models.ts +++ b/packages/core/src/providers/openai/models.ts @@ -1,4 +1,4 @@ export const OPENAI_MODELS = { - FAST: 'gpt-4.1-mini', - ADVANCED: 'gpt-4.1', + FAST: 'gpt-5.3-mini', + ADVANCED: 'gpt-5.3', } as const; diff --git a/packages/web/public/schemas/v0/eroderc.schema.json b/packages/web/public/schemas/v0/eroderc.schema.json index a6944300..d11906d6 100644 --- a/packages/web/public/schemas/v0/eroderc.schema.json +++ b/packages/web/public/schemas/v0/eroderc.schema.json @@ -164,11 +164,11 @@ "maximum": 300000 }, "fastModel": { - "default": "claude-haiku-4-5-20251001", + "default": "claude-haiku-4-5", "type": "string" }, "advancedModel": { - "default": "claude-sonnet-4-5-20250929", + "default": "claude-sonnet-4-6", "type": "string" } }, @@ -210,11 +210,11 @@ "maximum": 300000 }, "fastModel": { - "default": "gpt-4.1-mini", + "default": "gpt-5.3-mini", "type": "string" }, "advancedModel": { - "default": "gpt-4.1", + "default": "gpt-5.3", "type": "string" } }, diff --git a/packages/web/src/content/docs/docs/reference/ai-providers.md b/packages/web/src/content/docs/docs/reference/ai-providers.md index dc3259c5..b1ba7d94 100644 --- a/packages/web/src/content/docs/docs/reference/ai-providers.md +++ b/packages/web/src/content/docs/docs/reference/ai-providers.md @@ -35,15 +35,15 @@ Each provider uses two model tiers to balance cost and quality: | Tier | Default model | | -------- | -------------- | -| Fast | `gpt-4.1-mini` | -| Advanced | `gpt-4.1` | +| Fast | `gpt-5.3-mini` | +| Advanced | `gpt-5.3` | ### Anthropic (experimental) -| Tier | Default model | -| -------- | ---------------------------- | -| Fast | `claude-haiku-4-5-20251001` | -| Advanced | `claude-sonnet-4-5-20250929` | +| Tier | Default model | +| -------- | ------------------- | +| Fast | `claude-haiku-4-5` | +| Advanced | `claude-sonnet-4-6` | :::caution Anthropic support is experimental and may not produce consistent results across all codebases. Use Gemini or OpenAI for production workflows. From 9289f902bacdab6967874b8773f04ad88ca56ad5 Mon Sep 17 00:00:00 2001 From: Anders Hassis Date: Mon, 4 May 2026 12:07:37 +0200 Subject: [PATCH 02/12] refactor openai provider with new api --- packages/core/schemas/eroderc.schema.json | 4 +- .../__tests__/provider-factory.test.ts | 12 +-- .../openai/__tests__/provider.test.ts | 2 +- packages/core/src/providers/openai/models.ts | 4 +- .../core/src/providers/openai/provider.ts | 86 +++++++++++++------ .../web/public/schemas/v0/eroderc.schema.json | 4 +- .../docs/docs/reference/ai-providers.md | 8 +- 7 files changed, 76 insertions(+), 44 deletions(-) diff --git a/packages/core/schemas/eroderc.schema.json b/packages/core/schemas/eroderc.schema.json index d11906d6..863f4a05 100644 --- a/packages/core/schemas/eroderc.schema.json +++ b/packages/core/schemas/eroderc.schema.json @@ -210,11 +210,11 @@ "maximum": 300000 }, "fastModel": { - "default": "gpt-5.3-mini", + "default": "gpt-5-mini", "type": "string" }, "advancedModel": { - "default": "gpt-5.3", + "default": "gpt-5", "type": "string" } }, diff --git a/packages/core/src/providers/__tests__/provider-factory.test.ts b/packages/core/src/providers/__tests__/provider-factory.test.ts index 4e466690..f62aba03 100644 --- a/packages/core/src/providers/__tests__/provider-factory.test.ts +++ b/packages/core/src/providers/__tests__/provider-factory.test.ts @@ -35,8 +35,8 @@ const { }, openai: { apiKey: 'test-openai-key', - fastModel: 'gpt-5.3-mini', - advancedModel: 'gpt-5.3', + fastModel: 'gpt-5-mini', + advancedModel: 'gpt-5', }, anthropic: { apiKey: 'test-anthropic-key', @@ -94,8 +94,8 @@ describe('createAIProvider', () => { mockConfig.gemini.fastModel = 'gemini-flash'; mockConfig.gemini.advancedModel = 'gemini-pro'; mockConfig.openai.apiKey = 'test-openai-key'; - mockConfig.openai.fastModel = 'gpt-5.3-mini'; - mockConfig.openai.advancedModel = 'gpt-5.3'; + mockConfig.openai.fastModel = 'gpt-5-mini'; + mockConfig.openai.advancedModel = 'gpt-5'; mockConfig.anthropic.apiKey = 'test-anthropic-key'; mockConfig.anthropic.fastModel = 'claude-haiku'; mockConfig.anthropic.advancedModel = 'claude-sonnet'; @@ -122,8 +122,8 @@ describe('createAIProvider', () => { expect(result).toBe(mockOpenAIInstance); expect(OpenAIProvider).toHaveBeenCalledWith({ apiKey: 'test-openai-key', - fastModel: 'gpt-5.3-mini', - advancedModel: 'gpt-5.3', + fastModel: 'gpt-5-mini', + advancedModel: 'gpt-5', }); }); diff --git a/packages/core/src/providers/openai/__tests__/provider.test.ts b/packages/core/src/providers/openai/__tests__/provider.test.ts index 4fd36ed0..7ebafa7e 100644 --- a/packages/core/src/providers/openai/__tests__/provider.test.ts +++ b/packages/core/src/providers/openai/__tests__/provider.test.ts @@ -310,7 +310,7 @@ describe('OpenAIProvider', () => { expect(mockCreate).toHaveBeenCalled(); const callArg = mockCreate.mock.calls[0]?.[0] as { model?: string } | undefined; - expect(callArg?.model).toBe('gpt-5.3-mini'); + expect(callArg?.model).toBe('gpt-5-mini'); }); it('should return patched content', async () => { diff --git a/packages/core/src/providers/openai/models.ts b/packages/core/src/providers/openai/models.ts index 1626e6ef..edcaa503 100644 --- a/packages/core/src/providers/openai/models.ts +++ b/packages/core/src/providers/openai/models.ts @@ -1,4 +1,4 @@ export const OPENAI_MODELS = { - FAST: 'gpt-5.3-mini', - ADVANCED: 'gpt-5.3', + FAST: 'gpt-5-mini', + ADVANCED: 'gpt-5', } as const; diff --git a/packages/core/src/providers/openai/provider.ts b/packages/core/src/providers/openai/provider.ts index 386f779f..b2bf1ac9 100644 --- a/packages/core/src/providers/openai/provider.ts +++ b/packages/core/src/providers/openai/provider.ts @@ -2,7 +2,7 @@ import OpenAI from 'openai'; import { BaseProvider, type ProviderConfig } from '../base-provider.js'; import { ErodeError, ErrorCode, ApiError } from '../../errors.js'; import { ENV_VAR_NAMES, RC_FILENAME } from '../../utils/config.js'; -import type { AnalysisPhase } from '../analysis-phase.js'; +import { AnalysisPhase } from '../analysis-phase.js'; import { OPENAI_MODELS } from './models.js'; export class OpenAIProvider extends BaseProvider { @@ -30,33 +30,39 @@ export class OpenAIProvider extends BaseProvider { maxTokens: number ): Promise { try { - const response = await this.client.chat.completions.create({ + const response = await this.client.responses.create({ model, - max_tokens: maxTokens, - messages: [{ role: 'user', content: prompt }], + input: prompt, + max_output_tokens: maxTokens, + + reasoning: { + effort: getReasoningEffort(phase), + }, }); - const choice = response.choices[0]; - if (!choice) { + if ( + response.status === 'incomplete' && + response.incomplete_details?.reason === 'max_output_tokens' + ) { + // Optional: retry once with higher budget + if (maxTokens < 1000) { + return await this.callModel(model, prompt, phase, maxTokens * 2); + } + throw new ErodeError( - 'OpenAI returned an empty response', + 'Model ran out of tokens before producing output', ErrorCode.PROVIDER_INVALID_RESPONSE, - 'The OpenAI API returned no content', - { model, phase } + 'The AI used all tokens for reasoning. Increase max_output_tokens or reduce reasoning effort.', + { model, phase, maxTokens } ); } - if (choice.finish_reason === 'content_filter') { - throw new ErodeError( - 'OpenAI safety filters blocked the response', - ErrorCode.PROVIDER_SAFETY_BLOCK, - 'Content was blocked by the AI provider safety filters. Try simplifying the input.', - { model, phase } - ); - } + const text = extractText(response); - const text = choice.message.content; if (!text) { + console.error('response'); + console.error(response); + throw new ErodeError( 'OpenAI returned an empty response', ErrorCode.PROVIDER_INVALID_RESPONSE, @@ -65,15 +71,6 @@ export class OpenAIProvider extends BaseProvider { ); } - if (choice.finish_reason === 'length') { - throw new ErodeError( - 'OpenAI response was cut short (max_tokens reached)', - ErrorCode.PROVIDER_INVALID_RESPONSE, - 'The AI response was truncated. The output may be partial.', - { model, phase, maxTokens } - ); - } - return text; } catch (error) { if (error instanceof ErodeError) { @@ -81,5 +78,40 @@ export class OpenAIProvider extends BaseProvider { } throw ApiError.fromOpenAIError(error); } + + function extractText(response: OpenAI.Responses.Response): string { + if (response.output_text.length > 0) { + return response.output_text; + } + + let result = ''; + + for (const item of response.output) { + // ✅ Narrow to message items only + if (item.type !== 'message') continue; + + for (const content of item.content) { + if (content.type === 'output_text') { + result += content.text; + } + } + } + return result; + } + + function getReasoningEffort(phase: AnalysisPhase) { + switch (phase) { + case AnalysisPhase.COMPONENT_RESOLUTION: + return 'low'; + case AnalysisPhase.CHANGE_ANALYSIS: + return 'low'; + case AnalysisPhase.DEPENDENCY_SCAN: + return 'low'; + case AnalysisPhase.MODEL_UPDATE: + return 'medium'; + default: + return 'low'; + } + } } } diff --git a/packages/web/public/schemas/v0/eroderc.schema.json b/packages/web/public/schemas/v0/eroderc.schema.json index d11906d6..863f4a05 100644 --- a/packages/web/public/schemas/v0/eroderc.schema.json +++ b/packages/web/public/schemas/v0/eroderc.schema.json @@ -210,11 +210,11 @@ "maximum": 300000 }, "fastModel": { - "default": "gpt-5.3-mini", + "default": "gpt-5-mini", "type": "string" }, "advancedModel": { - "default": "gpt-5.3", + "default": "gpt-5", "type": "string" } }, diff --git a/packages/web/src/content/docs/docs/reference/ai-providers.md b/packages/web/src/content/docs/docs/reference/ai-providers.md index b1ba7d94..1a6c70c6 100644 --- a/packages/web/src/content/docs/docs/reference/ai-providers.md +++ b/packages/web/src/content/docs/docs/reference/ai-providers.md @@ -33,10 +33,10 @@ Each provider uses two model tiers to balance cost and quality: ### OpenAI -| Tier | Default model | -| -------- | -------------- | -| Fast | `gpt-5.3-mini` | -| Advanced | `gpt-5.3` | +| Tier | Default model | +| -------- | ------------- | +| Fast | `gpt-5-mini` | +| Advanced | `gpt-5` | ### Anthropic (experimental) From 051e2a97f304a9d4d9327d4428164d719dcd882c Mon Sep 17 00:00:00 2001 From: Anders Hassis Date: Mon, 4 May 2026 12:08:54 +0200 Subject: [PATCH 03/12] remove adr log --- .claude-adr.log | 2 -- .githooks/post-commit | 28 +--------------------------- 2 files changed, 1 insertion(+), 29 deletions(-) delete mode 100644 .claude-adr.log diff --git a/.claude-adr.log b/.claude-adr.log deleted file mode 100644 index 9b6ce8e5..00000000 --- a/.claude-adr.log +++ /dev/null @@ -1,2 +0,0 @@ -nohup: .githooks/adr-generator.sh: No such file or directory -nohup: .githooks/adr-generator.sh: No such file or directory diff --git a/.githooks/post-commit b/.githooks/post-commit index 72d5128b..9816d41b 100755 --- a/.githooks/post-commit +++ b/.githooks/post-commit @@ -1,29 +1,3 @@ #!/bin/sh - -# Post-commit hook: Auto-generate ADR for architectural changes -# Runs Claude Code headlessly in background to analyze commits - -COMMIT_MSG=$(git log -1 --pretty=%B) - -# Opt-out checks -if echo "$COMMIT_MSG" | grep -qiE "\[(skip-adr|no-adr)\]"; then - exit 0 -fi -[ "$SKIP_ADR" = "1" ] && exit 0 - -# Check for Claude CLI -command -v claude >/dev/null 2>&1 || exit 0 - -# Check for architectural indicators -if echo "$COMMIT_MSG" | grep -qiE "(refactor|architecture|migrate|introduce|domain layer|api version)"; then - echo "" - echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" - echo " Architectural change detected" - echo " Running ADR generation in background..." - echo " Check .claude-adr.log for output" - echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" - - nohup "$(dirname "$0")/adr-generator.sh" >> .claude-adr.log 2>&1 & -fi - +echo "Post-commit checks passed!" exit 0 From f31d4d5937c5a49063de2fff949a9808013cc84d Mon Sep 17 00:00:00 2001 From: Anders Hassis Date: Tue, 5 May 2026 11:56:46 +0200 Subject: [PATCH 04/12] refactor: use intent-based provider generation profiles Move output sizing intent into shared generation profiles. Keep provider-specific token parameters inside each AI provider. Document the provider boundary change and add web lint coverage to CI. --- .../__tests__/generation-profile.test.ts | 38 ++++++ .../anthropic/__tests__/provider.test.ts | 23 +++- .../core/src/providers/anthropic/provider.ts | 15 ++- packages/core/src/providers/base-provider.ts | 50 +++++-- .../gemini/__tests__/provider.test.ts | 18 +++ .../core/src/providers/gemini/provider.ts | 12 +- .../core/src/providers/generation-profile.ts | 23 ++++ .../openai/__tests__/provider.test.ts | 95 +++++++++++-- .../core/src/providers/openai/provider.ts | 126 +++++++++++------- packages/eslint-config/base.js | 2 + packages/eslint-config/index.d.ts | 1 + packages/eslint-config/index.js | 2 +- packages/web/eslint.config.mjs | 6 +- packages/web/package.json | 2 +- ...tent-based-provider-generation-profiles.md | 59 ++++++++ .../docs/docs/contributing/adrs/index.md | 25 ++-- 16 files changed, 410 insertions(+), 87 deletions(-) create mode 100644 packages/core/src/providers/__tests__/generation-profile.test.ts create mode 100644 packages/core/src/providers/generation-profile.ts create mode 100644 packages/web/src/content/docs/docs/contributing/adrs/011-intent-based-provider-generation-profiles.md diff --git a/packages/core/src/providers/__tests__/generation-profile.test.ts b/packages/core/src/providers/__tests__/generation-profile.test.ts new file mode 100644 index 00000000..d8648b46 --- /dev/null +++ b/packages/core/src/providers/__tests__/generation-profile.test.ts @@ -0,0 +1,38 @@ +import { readFileSync } from 'node:fs'; +import { join } from 'node:path'; +import { describe, expect, it } from 'vitest'; +import { AnalysisPhase } from '../analysis-phase.js'; +import { getGenerationProfileForPhase } from '../generation-profile.js'; + +describe('getGenerationProfileForPhase', () => { + it('uses small low-effort generation for simple phases', () => { + expect(getGenerationProfileForPhase(AnalysisPhase.COMPONENT_RESOLUTION)).toEqual({ + outputSize: 'small', + reasoningEffort: 'low', + }); + expect(getGenerationProfileForPhase(AnalysisPhase.DEPENDENCY_SCAN)).toEqual({ + outputSize: 'small', + reasoningEffort: 'low', + }); + }); + + it('uses medium low-effort generation for drift analysis', () => { + expect(getGenerationProfileForPhase(AnalysisPhase.CHANGE_ANALYSIS)).toEqual({ + outputSize: 'medium', + reasoningEffort: 'low', + }); + }); + + it('uses medium medium-effort generation for model updates', () => { + expect(getGenerationProfileForPhase(AnalysisPhase.MODEL_UPDATE)).toEqual({ + outputSize: 'medium', + reasoningEffort: 'medium', + }); + }); + + it('does not require raw maxTokens in shared stage orchestration', () => { + const source = readFileSync(join(import.meta.dirname, '../base-provider.ts'), 'utf8'); + + expect(source).not.toContain('maxTokens'); + }); +}); diff --git a/packages/core/src/providers/anthropic/__tests__/provider.test.ts b/packages/core/src/providers/anthropic/__tests__/provider.test.ts index 80894fa1..13683c94 100644 --- a/packages/core/src/providers/anthropic/__tests__/provider.test.ts +++ b/packages/core/src/providers/anthropic/__tests__/provider.test.ts @@ -117,6 +117,12 @@ describe('AnthropicProvider', () => { makeStage1Data(['comp.frontend', 'comp.backend']) ); expect(result).toBe('comp.backend'); + expect(mockCreate).toHaveBeenCalledWith( + expect.objectContaining({ + model: 'claude-haiku-4-5', + max_tokens: 600, + }) + ); }); it('should return null when no component matches', async () => { @@ -167,6 +173,12 @@ describe('AnthropicProvider', () => { expect(result.dependencies).toHaveLength(1); expect(result.dependencies[0]?.dependency).toBe('redis'); expect(result.summary).toBe('Added Redis dependency'); + expect(mockCreate).toHaveBeenCalledWith( + expect.objectContaining({ + model: 'claude-haiku-4-5', + max_tokens: 600, + }) + ); }); it('should throw on non-JSON response', async () => { @@ -208,6 +220,12 @@ describe('AnthropicProvider', () => { expect(result.metadata).toBe(data.changeRequest); expect(result.component).toBe(data.component); expect(result.dependencyChanges).toBe(data.dependencies); + expect(mockCreate).toHaveBeenCalledWith( + expect.objectContaining({ + model: 'claude-sonnet-4-6', + max_tokens: 1500, + }) + ); }); }); @@ -311,8 +329,11 @@ describe('AnthropicProvider', () => { await provider.patchModel('model {\n}\n', [' comp.a -> comp.b'], 'likec4'); expect(mockCreate).toHaveBeenCalled(); - const callArg = mockCreate.mock.calls[0]?.[0] as { model?: string } | undefined; + const callArg = mockCreate.mock.calls[0]?.[0] as + | { max_tokens?: number; model?: string } + | undefined; expect(callArg?.model).toBe('claude-haiku-4-5'); + expect(callArg?.max_tokens).toBe(1500); }); it('should return patched content', async () => { diff --git a/packages/core/src/providers/anthropic/provider.ts b/packages/core/src/providers/anthropic/provider.ts index d958a7e2..edf5078d 100644 --- a/packages/core/src/providers/anthropic/provider.ts +++ b/packages/core/src/providers/anthropic/provider.ts @@ -3,8 +3,15 @@ import { BaseProvider, type ProviderConfig } from '../base-provider.js'; import { ApiError, ErodeError, ErrorCode } from '../../errors.js'; import { ENV_VAR_NAMES, RC_FILENAME } from '../../utils/config.js'; import type { AnalysisPhase } from '../analysis-phase.js'; +import type { GenerationProfile, OutputSize } from '../generation-profile.js'; import { ANTHROPIC_MODELS } from './models.js'; +const MAX_TOKENS_BY_OUTPUT_SIZE = { + small: 600, + medium: 1500, + large: 3000, +} satisfies Record; + export class AnthropicProvider extends BaseProvider { private readonly client: Anthropic; @@ -27,12 +34,14 @@ export class AnthropicProvider extends BaseProvider { model: string, prompt: string, phase: AnalysisPhase, - maxTokens: number + generationProfile: GenerationProfile ): Promise { + const outputTokenLimit = MAX_TOKENS_BY_OUTPUT_SIZE[generationProfile.outputSize]; + try { const response = await this.client.messages.create({ model, - max_tokens: maxTokens, + max_tokens: outputTokenLimit, messages: [{ role: 'user', content: prompt }], }); @@ -62,7 +71,7 @@ export class AnthropicProvider extends BaseProvider { 'Anthropic response was cut short (max_tokens reached)', ErrorCode.PROVIDER_INVALID_RESPONSE, 'The AI response was truncated. The output may be partial.', - { model, phase, maxTokens } + { model, phase, outputTokenLimit } ); } diff --git a/packages/core/src/providers/base-provider.ts b/packages/core/src/providers/base-provider.ts index 58f38fe1..f76bffdb 100644 --- a/packages/core/src/providers/base-provider.ts +++ b/packages/core/src/providers/base-provider.ts @@ -15,6 +15,7 @@ import { ErodeError, ErrorCode, ApiError } from '../errors.js'; import { withRetry } from '../utils/retry.js'; import { AnalysisPhase } from './analysis-phase.js'; import { CONFIG } from '../utils/config.js'; +import { getGenerationProfileForPhase, type GenerationProfile } from './generation-profile.js'; function debugLog(msg: string, data?: unknown): void { if (CONFIG.debug.verbose) { @@ -22,6 +23,14 @@ function debugLog(msg: string, data?: unknown): void { } } +function formatDuration(startedAt: bigint): string { + const elapsedMs = Number(process.hrtime.bigint() - startedAt) / 1_000_000; + if (elapsedMs < 1000) { + return `${String(Math.round(elapsedMs))}ms`; + } + return `${(elapsedMs / 1000).toFixed(2)}s`; +} + export interface ProviderConfig { apiKey: string; fastModel?: string; @@ -48,26 +57,35 @@ export abstract class BaseProvider implements AIProvider { * @param model - The model identifier to use * @param prompt - The prompt text to send * @param phase - The analysis phase (for error context) - * @param maxTokens - Maximum tokens for the response (some providers may ignore this) + * @param generationProfile - Provider-agnostic output intent * @returns The text content of the model response */ protected abstract callModel( model: string, prompt: string, phase: AnalysisPhase, - maxTokens: number + generationProfile: GenerationProfile ): Promise; async selectComponent(data: ComponentSelectionPromptData): Promise { const prompt = PromptBuilder.buildComponentSelectionPrompt(data); + debugLog('selectComponent using model', this.fastModel); + const startedAt = process.hrtime.bigint(); const responseText = await withRetry( - () => this.callModel(this.fastModel, prompt, AnalysisPhase.COMPONENT_RESOLUTION, 256), + () => + this.callModel( + this.fastModel, + prompt, + AnalysisPhase.COMPONENT_RESOLUTION, + getGenerationProfileForPhase(AnalysisPhase.COMPONENT_RESOLUTION) + ), { retries: 2, shouldRetry: (error) => this.isRetryableError(error), } ); + debugLog('selectComponent completed in', formatDuration(startedAt)); if (!responseText) { return null; @@ -89,7 +107,6 @@ export abstract class BaseProvider implements AIProvider { prompt: PromptBuilder.buildDependencyExtractionPrompt(data), schema: DependencyExtractionResultSchema, schemaName: 'DependencyExtractionResult', - maxTokens: 4096, }); } @@ -100,7 +117,6 @@ export abstract class BaseProvider implements AIProvider { prompt: PromptBuilder.buildDriftAnalysisPrompt(data), schema: DriftAnalysisResponseSchema, schemaName: 'DriftAnalysisResponse', - maxTokens: 8192, }); return { @@ -121,17 +137,23 @@ export abstract class BaseProvider implements AIProvider { linesToInsert, modelFormat, }); - // Estimate tokens: ~4 chars per token, add 20% headroom for inserted lines - const estimatedTokens = Math.ceil(fileContent.length / 4) + linesToInsert.length * 50; - const maxTokens = Math.max(4096, Math.ceil(estimatedTokens * 1.2)); debugLog('patchModel using model', this.fastModel); + const startedAt = process.hrtime.bigint(); return withRetry( - () => this.callModel(this.fastModel, prompt, AnalysisPhase.MODEL_UPDATE, maxTokens), + () => + this.callModel( + this.fastModel, + prompt, + AnalysisPhase.MODEL_UPDATE, + getGenerationProfileForPhase(AnalysisPhase.MODEL_UPDATE) + ), { retries: 2, shouldRetry: (error) => this.isRetryableError(error), } - ); + ).finally(() => { + debugLog('patchModel completed in', formatDuration(startedAt)); + }); } private async executeStage(config: { @@ -140,16 +162,20 @@ export abstract class BaseProvider implements AIProvider { prompt: string; schema: z.ZodType; schemaName: string; - maxTokens: number; + generationProfile?: GenerationProfile; }): Promise { debugLog(`executeStage ${config.phase} using model`, config.model); + const generationProfile = + config.generationProfile ?? getGenerationProfileForPhase(config.phase); + const startedAt = process.hrtime.bigint(); const responseText = await withRetry( - () => this.callModel(config.model, config.prompt, config.phase, config.maxTokens), + () => this.callModel(config.model, config.prompt, config.phase, generationProfile), { retries: 2, shouldRetry: (error) => this.isRetryableError(error), } ); + debugLog(`executeStage ${config.phase} completed in`, formatDuration(startedAt)); const jsonStr = PromptBuilder.extractJson(responseText); if (!jsonStr) { diff --git a/packages/core/src/providers/gemini/__tests__/provider.test.ts b/packages/core/src/providers/gemini/__tests__/provider.test.ts index 53b76e16..acf41709 100644 --- a/packages/core/src/providers/gemini/__tests__/provider.test.ts +++ b/packages/core/src/providers/gemini/__tests__/provider.test.ts @@ -121,6 +121,12 @@ describe('GeminiProvider', () => { makeStage1Data(['comp.frontend', 'comp.backend']) ); expect(result).toBe('comp.backend'); + expect(mockGenerateContent).toHaveBeenCalledWith( + expect.objectContaining({ + model: 'gemini-2.5-flash', + config: { maxOutputTokens: 600 }, + }) + ); }); it('should return null when no component matches', async () => { @@ -177,6 +183,12 @@ describe('GeminiProvider', () => { expect(result.dependencies).toHaveLength(1); expect(result.dependencies[0]?.dependency).toBe('redis'); expect(result.summary).toBe('Added Redis dependency'); + expect(mockGenerateContent).toHaveBeenCalledWith( + expect.objectContaining({ + model: 'gemini-2.5-flash', + config: { maxOutputTokens: 600 }, + }) + ); }); it('should throw on non-JSON response', async () => { @@ -224,6 +236,12 @@ describe('GeminiProvider', () => { expect(result.metadata).toBe(data.changeRequest); expect(result.component).toBe(data.component); expect(result.dependencyChanges).toBe(data.dependencies); + expect(mockGenerateContent).toHaveBeenCalledWith( + expect.objectContaining({ + model: 'gemini-2.5-pro', + config: { maxOutputTokens: 1500 }, + }) + ); }); }); diff --git a/packages/core/src/providers/gemini/provider.ts b/packages/core/src/providers/gemini/provider.ts index ef3d9e07..6feeb80f 100644 --- a/packages/core/src/providers/gemini/provider.ts +++ b/packages/core/src/providers/gemini/provider.ts @@ -3,8 +3,15 @@ import { BaseProvider, type ProviderConfig } from '../base-provider.js'; import { ApiError, ErodeError, ErrorCode } from '../../errors.js'; import { ENV_VAR_NAMES, RC_FILENAME } from '../../utils/config.js'; import type { AnalysisPhase } from '../analysis-phase.js'; +import type { GenerationProfile, OutputSize } from '../generation-profile.js'; import { GEMINI_MODELS } from './models.js'; +const MAX_OUTPUT_TOKENS_BY_OUTPUT_SIZE = { + small: 600, + medium: 1500, + large: 3000, +} satisfies Record; + export class GeminiProvider extends BaseProvider { private readonly client: GoogleGenAI; @@ -27,12 +34,15 @@ export class GeminiProvider extends BaseProvider { model: string, prompt: string, phase: AnalysisPhase, - _maxTokens: number + generationProfile: GenerationProfile ): Promise { + const maxOutputTokens = MAX_OUTPUT_TOKENS_BY_OUTPUT_SIZE[generationProfile.outputSize]; + try { const response = await this.client.models.generateContent({ model, contents: prompt, + config: { maxOutputTokens }, }); const candidate = response.candidates?.[0]; diff --git a/packages/core/src/providers/generation-profile.ts b/packages/core/src/providers/generation-profile.ts new file mode 100644 index 00000000..b193a57f --- /dev/null +++ b/packages/core/src/providers/generation-profile.ts @@ -0,0 +1,23 @@ +import { AnalysisPhase } from './analysis-phase.js'; + +export type OutputSize = 'small' | 'medium' | 'large'; +export type ReasoningEffort = 'low' | 'medium' | 'high'; + +export interface GenerationProfile { + outputSize: OutputSize; + reasoningEffort?: ReasoningEffort; +} + +export function getGenerationProfileForPhase(phase: AnalysisPhase): GenerationProfile { + switch (phase) { + case AnalysisPhase.COMPONENT_RESOLUTION: + case AnalysisPhase.DEPENDENCY_SCAN: + return { outputSize: 'small', reasoningEffort: 'low' }; + case AnalysisPhase.MODEL_UPDATE: + return { outputSize: 'medium', reasoningEffort: 'medium' }; + case AnalysisPhase.CHANGE_ANALYSIS: + return { outputSize: 'medium', reasoningEffort: 'low' }; + default: + return { outputSize: 'small', reasoningEffort: 'low' }; + } +} diff --git a/packages/core/src/providers/openai/__tests__/provider.test.ts b/packages/core/src/providers/openai/__tests__/provider.test.ts index 7ebafa7e..33d0000b 100644 --- a/packages/core/src/providers/openai/__tests__/provider.test.ts +++ b/packages/core/src/providers/openai/__tests__/provider.test.ts @@ -6,7 +6,7 @@ const mockCreate = vi.fn(); vi.mock('openai', () => { return { default: class MockOpenAI { - chat = { completions: { create: mockCreate } }; + responses = { create: mockCreate }; }, }; }); @@ -78,9 +78,39 @@ function makePrAnalysisData(): DriftAnalysisPromptData { }; } -function makeOpenAIResponse(content: string | null, finishReason = 'stop') { +function makeOpenAIResponse(content: string) { return { - choices: [{ message: { content }, finish_reason: finishReason }], + status: 'completed', + incomplete_details: null, + output_text: content, + output: [], + }; +} + +function makeOpenAIMessageResponse(content: string) { + return { + status: 'completed', + incomplete_details: null, + output_text: '', + output: [ + { + type: 'reasoning', + content: [{ type: 'output_text', text: 'ignore me' }], + }, + { + type: 'message', + content: [{ type: 'output_text', text: content }], + }, + ], + }; +} + +function makeIncompleteOpenAIResponse(reason: 'max_output_tokens' | 'content_filter') { + return { + status: 'incomplete', + incomplete_details: { reason }, + output_text: '', + output: [], }; } @@ -110,6 +140,13 @@ describe('OpenAIProvider', () => { makeStage1Data(['comp.frontend', 'comp.backend']) ); expect(result).toBe('comp.backend'); + expect(mockCreate).toHaveBeenCalledWith( + expect.objectContaining({ + model: 'gpt-5-mini', + max_output_tokens: 1500, + reasoning: { effort: 'minimal' }, + }) + ); }); it('should return null when no component matches', async () => { @@ -123,7 +160,7 @@ describe('OpenAIProvider', () => { }); it('should throw on empty response', async () => { - mockCreate.mockResolvedValueOnce(makeOpenAIResponse(null)); + mockCreate.mockResolvedValueOnce(makeOpenAIResponse('')); const provider = createProvider(); await expect(provider.selectComponent(makeStage1Data(['comp.frontend']))).rejects.toThrow( @@ -156,6 +193,13 @@ describe('OpenAIProvider', () => { expect(result.dependencies).toHaveLength(1); expect(result.dependencies[0]?.dependency).toBe('redis'); expect(result.summary).toBe('Added Redis dependency'); + expect(mockCreate).toHaveBeenCalledWith( + expect.objectContaining({ + model: 'gpt-5-mini', + max_output_tokens: 1500, + reasoning: { effort: 'minimal' }, + }) + ); }); it('should throw on non-JSON response', async () => { @@ -195,12 +239,19 @@ describe('OpenAIProvider', () => { expect(result.metadata).toBe(data.changeRequest); expect(result.component).toBe(data.component); expect(result.dependencyChanges).toBe(data.dependencies); + expect(mockCreate).toHaveBeenCalledWith( + expect.objectContaining({ + model: 'gpt-5', + max_output_tokens: 6000, + reasoning: { effort: 'minimal' }, + }) + ); }); }); describe('safety filter handling', () => { it('should throw PROVIDER_SAFETY_BLOCK on content_filter', async () => { - mockCreate.mockResolvedValueOnce(makeOpenAIResponse('blocked', 'content_filter')); + mockCreate.mockResolvedValueOnce(makeIncompleteOpenAIResponse('content_filter')); const provider = createProvider(); try { @@ -214,8 +265,8 @@ describe('OpenAIProvider', () => { }); describe('truncation handling', () => { - it('should throw PROVIDER_INVALID_RESPONSE on length', async () => { - mockCreate.mockResolvedValueOnce(makeOpenAIResponse('partial...', 'length')); + it('should throw PROVIDER_INVALID_RESPONSE on max_output_tokens', async () => { + mockCreate.mockResolvedValueOnce(makeIncompleteOpenAIResponse('max_output_tokens')); const provider = createProvider(); try { @@ -223,11 +274,33 @@ describe('OpenAIProvider', () => { expect.fail('Expected error to be thrown'); } catch (error) { expect(error).toBeInstanceOf(ErodeError); - expect((error as ErodeError).code).toBe(ErrorCode.PROVIDER_INVALID_RESPONSE); + const erodeError = error as ErodeError; + expect(erodeError.code).toBe(ErrorCode.PROVIDER_INVALID_RESPONSE); + expect(erodeError.userMessage).toContain('output budget'); } }); }); + describe('response text extraction', () => { + it('should read fallback output text from message items only', async () => { + mockCreate.mockResolvedValueOnce(makeOpenAIMessageResponse('comp.api')); + + const provider = createProvider(); + const result = await provider.selectComponent(makeStage1Data(['comp.api'])); + + expect(result).toBe('comp.api'); + }); + + it('should throw on empty output after fallback', async () => { + mockCreate.mockResolvedValueOnce(makeOpenAIMessageResponse('')); + + const provider = createProvider(); + await expect(provider.selectComponent(makeStage1Data(['comp.api']))).rejects.toThrow( + ErodeError + ); + }); + }); + describe('retry on rate limit', () => { it('should retry on 429 and eventually succeed', async () => { const rateLimitError = new ApiError('Rate limited', 429); @@ -309,8 +382,12 @@ describe('OpenAIProvider', () => { await provider.patchModel('model {\n}\n', [' comp.a -> comp.b'], 'likec4'); expect(mockCreate).toHaveBeenCalled(); - const callArg = mockCreate.mock.calls[0]?.[0] as { model?: string } | undefined; + const callArg = mockCreate.mock.calls[0]?.[0] as + | { max_output_tokens?: number; model?: string; reasoning?: { effort?: string } } + | undefined; expect(callArg?.model).toBe('gpt-5-mini'); + expect(callArg?.max_output_tokens).toBe(6000); + expect(callArg?.reasoning?.effort).toBe('medium'); }); it('should return patched content', async () => { diff --git a/packages/core/src/providers/openai/provider.ts b/packages/core/src/providers/openai/provider.ts index b2bf1ac9..c0476fec 100644 --- a/packages/core/src/providers/openai/provider.ts +++ b/packages/core/src/providers/openai/provider.ts @@ -2,9 +2,18 @@ import OpenAI from 'openai'; import { BaseProvider, type ProviderConfig } from '../base-provider.js'; import { ErodeError, ErrorCode, ApiError } from '../../errors.js'; import { ENV_VAR_NAMES, RC_FILENAME } from '../../utils/config.js'; -import { AnalysisPhase } from '../analysis-phase.js'; +import type { AnalysisPhase } from '../analysis-phase.js'; +import type { GenerationProfile, OutputSize, ReasoningEffort } from '../generation-profile.js'; import { OPENAI_MODELS } from './models.js'; +type OpenAIReasoningEffort = 'minimal' | 'low' | 'medium' | 'high'; + +const MAX_OUTPUT_TOKENS_BY_OUTPUT_SIZE = { + small: 1500, + medium: 6000, + large: 10000, +} satisfies Record; + export class OpenAIProvider extends BaseProvider { private readonly client: OpenAI; @@ -27,42 +36,33 @@ export class OpenAIProvider extends BaseProvider { model: string, prompt: string, phase: AnalysisPhase, - maxTokens: number + generationProfile: GenerationProfile ): Promise { + const maxOutputTokens = MAX_OUTPUT_TOKENS_BY_OUTPUT_SIZE[generationProfile.outputSize]; + const reasoningEffort = getOpenAIReasoningEffort(generationProfile.reasoningEffort); + try { const response = await this.client.responses.create({ model, input: prompt, - max_output_tokens: maxTokens, - - reasoning: { - effort: getReasoningEffort(phase), - }, + max_output_tokens: maxOutputTokens, + ...(supportsReasoningEffort(model) ? { reasoning: { effort: reasoningEffort } } : {}), }); - if ( - response.status === 'incomplete' && - response.incomplete_details?.reason === 'max_output_tokens' - ) { - // Optional: retry once with higher budget - if (maxTokens < 1000) { - return await this.callModel(model, prompt, phase, maxTokens * 2); - } - - throw new ErodeError( - 'Model ran out of tokens before producing output', - ErrorCode.PROVIDER_INVALID_RESPONSE, - 'The AI used all tokens for reasoning. Increase max_output_tokens or reduce reasoning effort.', - { model, phase, maxTokens } + if (response.status === 'incomplete') { + handleIncompleteResponse( + response, + model, + phase, + maxOutputTokens, + generationProfile, + reasoningEffort ); } const text = extractText(response); if (!text) { - console.error('response'); - console.error(response); - throw new ErodeError( 'OpenAI returned an empty response', ErrorCode.PROVIDER_INVALID_RESPONSE, @@ -79,38 +79,72 @@ export class OpenAIProvider extends BaseProvider { throw ApiError.fromOpenAIError(error); } + function handleIncompleteResponse( + response: OpenAI.Responses.Response, + incompleteModel: string, + incompletePhase: AnalysisPhase, + incompleteMaxOutputTokens: number, + incompleteGenerationProfile: GenerationProfile, + incompleteReasoningEffort: OpenAIReasoningEffort + ): void { + if (response.incomplete_details?.reason === 'max_output_tokens') { + throw new ErodeError( + 'Model ran out of output budget before producing a complete response', + ErrorCode.PROVIDER_INVALID_RESPONSE, + 'The AI response used the available output budget before completion. Try a smaller change or tune the provider output budget or reasoning effort.', + { + model: incompleteModel, + phase: incompletePhase, + maxOutputTokens: incompleteMaxOutputTokens, + outputSize: incompleteGenerationProfile.outputSize, + reasoningEffort: incompleteGenerationProfile.reasoningEffort, + providerReasoningEffort: incompleteReasoningEffort, + } + ); + } + + if (response.incomplete_details?.reason === 'content_filter') { + throw new ErodeError( + 'OpenAI safety filters blocked the response', + ErrorCode.PROVIDER_SAFETY_BLOCK, + 'Content was blocked by the AI provider safety filters. Try simplifying the input.', + { model: incompleteModel, phase: incompletePhase } + ); + } + } + function extractText(response: OpenAI.Responses.Response): string { if (response.output_text.length > 0) { return response.output_text; } - let result = ''; - - for (const item of response.output) { - // ✅ Narrow to message items only - if (item.type !== 'message') continue; + return response.output + .filter((item) => item.type === 'message') + .flatMap((item) => item.content) + .filter((content) => content.type === 'output_text') + .map((content) => content.text) + .join(''); + } - for (const content of item.content) { - if (content.type === 'output_text') { - result += content.text; - } - } - } - return result; + function supportsReasoningEffort(reasoningModel: string): boolean { + return ['gpt-5', 'o1', 'o3', 'o4'].some((prefix) => { + return reasoningModel === prefix || reasoningModel.startsWith(`${prefix}-`); + }); } - function getReasoningEffort(phase: AnalysisPhase) { - switch (phase) { - case AnalysisPhase.COMPONENT_RESOLUTION: - return 'low'; - case AnalysisPhase.CHANGE_ANALYSIS: - return 'low'; - case AnalysisPhase.DEPENDENCY_SCAN: - return 'low'; - case AnalysisPhase.MODEL_UPDATE: + function getOpenAIReasoningEffort( + reasoningIntent: ReasoningEffort | undefined + ): OpenAIReasoningEffort { + switch (reasoningIntent) { + case 'high': + return 'high'; + case 'medium': return 'medium'; + case 'low': + case undefined: + return 'minimal'; default: - return 'low'; + return 'minimal'; } } } diff --git a/packages/eslint-config/base.js b/packages/eslint-config/base.js index ae243828..fdede397 100644 --- a/packages/eslint-config/base.js +++ b/packages/eslint-config/base.js @@ -43,3 +43,5 @@ export function createBaseConfig(tsconfigRootDir) { export const ignores = { ignores: ['dist/', 'node_modules/'], }; + +export const disableTypeChecked = tseslint.configs.disableTypeChecked; diff --git a/packages/eslint-config/index.d.ts b/packages/eslint-config/index.d.ts index df465f7f..20b029a2 100644 --- a/packages/eslint-config/index.d.ts +++ b/packages/eslint-config/index.d.ts @@ -1,4 +1,5 @@ import type { Linter } from 'eslint'; export function createBaseConfig(tsconfigRootDir: string): Linter.Config[]; +export const disableTypeChecked: Linter.Config; export const ignores: Linter.Config; diff --git a/packages/eslint-config/index.js b/packages/eslint-config/index.js index 3143cf0e..93fa68eb 100644 --- a/packages/eslint-config/index.js +++ b/packages/eslint-config/index.js @@ -1 +1 @@ -export { createBaseConfig, ignores } from './base.js'; +export { createBaseConfig, disableTypeChecked, ignores } from './base.js'; diff --git a/packages/web/eslint.config.mjs b/packages/web/eslint.config.mjs index ea1bb0b0..c57d5569 100644 --- a/packages/web/eslint.config.mjs +++ b/packages/web/eslint.config.mjs @@ -1,10 +1,14 @@ -import { createBaseConfig, ignores } from '@erode-app/eslint-config'; +import { createBaseConfig, disableTypeChecked, ignores } from '@erode-app/eslint-config'; import eslintPluginAstro from 'eslint-plugin-astro'; export default [ ignores, ...createBaseConfig(import.meta.dirname), ...eslintPluginAstro.configs.recommended, + { + files: ['**/*.astro'], + ...disableTypeChecked, + }, { ignores: ['astro.config.mjs', '.astro/', 'src/content/', 'public/architecture/'], }, diff --git a/packages/web/package.json b/packages/web/package.json index 5b79c618..9ecc7261 100644 --- a/packages/web/package.json +++ b/packages/web/package.json @@ -11,7 +11,7 @@ "build": "npm run prebuild:diagrams && npm run generate:skills && astro check && astro build", "preview": "astro preview", "typecheck": "npx tsx scripts/generate-agent-skills.ts --check && astro check --minimumFailingSeverity hint", - "check:ci": "npm run typecheck && astro build", + "check:ci": "npm run lint && npm run typecheck && astro build", "lint": "eslint . --report-unused-disable-directives --max-warnings 0" }, "dependencies": { diff --git a/packages/web/src/content/docs/docs/contributing/adrs/011-intent-based-provider-generation-profiles.md b/packages/web/src/content/docs/docs/contributing/adrs/011-intent-based-provider-generation-profiles.md new file mode 100644 index 00000000..769c145c --- /dev/null +++ b/packages/web/src/content/docs/docs/contributing/adrs/011-intent-based-provider-generation-profiles.md @@ -0,0 +1,59 @@ +--- +title: 'ADR-011: Intent-based provider generation profiles' +description: Shared analysis stages describe generation intent, and providers translate that intent into request budgets. +--- + +**Status:** Accepted\ +**Date:** 2026-05-05\ +**Authors:** Anders Hassis + +## Context + +Erode runs the same analysis stages across Gemini, Anthropic, and OpenAI. The shared +provider flow used raw token counts when it called each model. That made stage code +carry provider-specific budget details. + +OpenAI's Responses API names the limit `max_output_tokens`, and reasoning models can +spend part of that budget before producing visible output. Anthropic and Gemini expose +different request parameters and have different response behavior. The shared pipeline +needs to describe what it wants, not how each provider should size the request. + +## Decision + +Introduce a provider-agnostic generation profile for model calls. A profile describes +the expected output size and reasoning effort. `BaseProvider` maps each analysis phase +to a default profile, and concrete providers translate the profile into SDK-specific +request parameters. + +Keep model tier selection unchanged. Component resolution, dependency scanning, and +model patching use the fast model. Drift analysis uses the advanced model. + +## Rationale + +The analysis stages know the shape of the work. Providers know their API parameters and +model behavior. Keeping those concerns separate makes it easier to tune OpenAI, +Anthropic, and Gemini independently without changing the shared pipeline. + +The profile also makes cost intent visible. Simple extraction work stays on cheaper +models with small outputs and low reasoning effort. Drift analysis keeps the stronger +model, but still defaults to low reasoning effort to keep feedback fast. Model updates +use medium reasoning because they generate a concrete patch. + +## Consequences + +### Positive + +- Provider-specific token parameters stay inside provider implementations. +- OpenAI can use `max_output_tokens` and reasoning effort without leaking those names + into shared analysis code. +- Cost-aware model tier selection remains explicit in `BaseProvider`. + +### Negative + +- Output profile names become a shared contract that providers must translate. +- Large model patch outputs may need provider-local tuning if medium output is too + small for some repositories. + +## Related commits + +- `9289f90` - refactor openai provider with new api diff --git a/packages/web/src/content/docs/docs/contributing/adrs/index.md b/packages/web/src/content/docs/docs/contributing/adrs/index.md index 5502dd44..3b083c63 100644 --- a/packages/web/src/content/docs/docs/contributing/adrs/index.md +++ b/packages/web/src/content/docs/docs/contributing/adrs/index.md @@ -7,18 +7,19 @@ This project tracks significant architectural decisions as ADRs. Each record cap ## Records -| ADR | Decision | Date | Status | -| ----------------------------------------------------------------------- | ----------------------------------- | ---------- | -------- | -| [001](/docs/contributing/adrs/001-multi-stage-analysis-pipeline/) | Multi-stage AI analysis pipeline | 2026-02-24 | Accepted | -| [002](/docs/contributing/adrs/002-provider-agnostic-ai-interface/) | Provider-agnostic AI interface | 2026-02-26 | Accepted | -| [003](/docs/contributing/adrs/003-architecture-model-adapter-system/) | Architecture model adapter system | 2026-02-27 | Accepted | -| [004](/docs/contributing/adrs/004-template-based-prompt-system/) | Template-based prompt system | 2026-02-24 | Accepted | -| [005](/docs/contributing/adrs/005-layered-configuration-with-zod/) | Layered configuration with Zod | 2026-02-25 | Accepted | -| [006](/docs/contributing/adrs/006-structured-error-hierarchy/) | Structured error hierarchy | 2026-02-27 | Accepted | -| [007](/docs/contributing/adrs/007-multi-platform-vcs-abstraction/) | Multi-platform VCS abstraction | 2026-02-26 | Accepted | -| [008](/docs/contributing/adrs/008-monorepo-workspace-structure/) | Monorepo workspace structure | 2026-02-25 | Accepted | -| [009](/docs/contributing/adrs/009-docker-and-github-action-deployment/) | Docker and GitHub Action deployment | 2026-02-24 | Accepted | -| [010](/docs/contributing/adrs/010-local-diff-check-command/) | Local diff check command | 2026-03-07 | Accepted | +| ADR | Decision | Date | Status | +| ----------------------------------------------------------------------------- | ----------------------------------------- | ---------- | -------- | +| [001](/docs/contributing/adrs/001-multi-stage-analysis-pipeline/) | Multi-stage AI analysis pipeline | 2026-02-24 | Accepted | +| [002](/docs/contributing/adrs/002-provider-agnostic-ai-interface/) | Provider-agnostic AI interface | 2026-02-26 | Accepted | +| [003](/docs/contributing/adrs/003-architecture-model-adapter-system/) | Architecture model adapter system | 2026-02-27 | Accepted | +| [004](/docs/contributing/adrs/004-template-based-prompt-system/) | Template-based prompt system | 2026-02-24 | Accepted | +| [005](/docs/contributing/adrs/005-layered-configuration-with-zod/) | Layered configuration with Zod | 2026-02-25 | Accepted | +| [006](/docs/contributing/adrs/006-structured-error-hierarchy/) | Structured error hierarchy | 2026-02-27 | Accepted | +| [007](/docs/contributing/adrs/007-multi-platform-vcs-abstraction/) | Multi-platform VCS abstraction | 2026-02-26 | Accepted | +| [008](/docs/contributing/adrs/008-monorepo-workspace-structure/) | Monorepo workspace structure | 2026-02-25 | Accepted | +| [009](/docs/contributing/adrs/009-docker-and-github-action-deployment/) | Docker and GitHub Action deployment | 2026-02-24 | Accepted | +| [010](/docs/contributing/adrs/010-local-diff-check-command/) | Local diff check command | 2026-03-07 | Accepted | +| [011](/docs/contributing/adrs/011-intent-based-provider-generation-profiles/) | Intent-based provider generation profiles | 2026-05-05 | Accepted | ## Adding a new ADR From b953e7e04b9f6949a3d81605b198de8615f00655 Mon Sep 17 00:00:00 2001 From: Anders Hassis Date: Tue, 5 May 2026 17:10:59 +0200 Subject: [PATCH 05/12] fix(core): tune gemini generation controls Disable Gemini thinking for low-reasoning phases. Raise Gemini output budgets and report max-token truncation clearly. --- .../gemini/__tests__/provider.test.ts | 27 ++++++++++++-- .../core/src/providers/gemini/provider.ts | 36 +++++++++++++++---- 2 files changed, 54 insertions(+), 9 deletions(-) diff --git a/packages/core/src/providers/gemini/__tests__/provider.test.ts b/packages/core/src/providers/gemini/__tests__/provider.test.ts index acf41709..53d7e228 100644 --- a/packages/core/src/providers/gemini/__tests__/provider.test.ts +++ b/packages/core/src/providers/gemini/__tests__/provider.test.ts @@ -124,7 +124,7 @@ describe('GeminiProvider', () => { expect(mockGenerateContent).toHaveBeenCalledWith( expect.objectContaining({ model: 'gemini-2.5-flash', - config: { maxOutputTokens: 600 }, + config: { maxOutputTokens: 1500, thinkingConfig: { thinkingBudget: 0 } }, }) ); }); @@ -186,7 +186,7 @@ describe('GeminiProvider', () => { expect(mockGenerateContent).toHaveBeenCalledWith( expect.objectContaining({ model: 'gemini-2.5-flash', - config: { maxOutputTokens: 600 }, + config: { maxOutputTokens: 1500, thinkingConfig: { thinkingBudget: 0 } }, }) ); }); @@ -239,7 +239,7 @@ describe('GeminiProvider', () => { expect(mockGenerateContent).toHaveBeenCalledWith( expect.objectContaining({ model: 'gemini-2.5-pro', - config: { maxOutputTokens: 1500 }, + config: { maxOutputTokens: 3000, thinkingConfig: { thinkingBudget: 0 } }, }) ); }); @@ -264,6 +264,27 @@ describe('GeminiProvider', () => { }); }); + describe('truncation handling', () => { + it('should throw PROVIDER_INVALID_RESPONSE on max tokens', async () => { + mockGenerateContent.mockResolvedValueOnce({ + text: '```json\n{"dependencies": [', + candidates: [{ finishReason: 'MAX_TOKENS' }], + usageMetadata: { promptTokenCount: 500, candidatesTokenCount: 1500 }, + }); + + const provider = createProvider(); + try { + await provider.extractDependencies(makePreprocessingData()); + expect.fail('Expected error to be thrown'); + } catch (error) { + expect(error).toBeInstanceOf(ErodeError); + const erodeError = error as ErodeError; + expect(erodeError.code).toBe(ErrorCode.PROVIDER_INVALID_RESPONSE); + expect(erodeError.userMessage).toContain('output budget'); + } + }); + }); + describe('retry on rate limit', () => { it('should retry on 429 and eventually succeed', async () => { const rateLimitError = new ApiError('Rate limited', 429); diff --git a/packages/core/src/providers/gemini/provider.ts b/packages/core/src/providers/gemini/provider.ts index 6feeb80f..d7b74b7d 100644 --- a/packages/core/src/providers/gemini/provider.ts +++ b/packages/core/src/providers/gemini/provider.ts @@ -1,15 +1,15 @@ -import { FinishReason, GoogleGenAI } from '@google/genai'; +import { FinishReason, GoogleGenAI, ThinkingLevel, type ThinkingConfig } from '@google/genai'; import { BaseProvider, type ProviderConfig } from '../base-provider.js'; import { ApiError, ErodeError, ErrorCode } from '../../errors.js'; import { ENV_VAR_NAMES, RC_FILENAME } from '../../utils/config.js'; import type { AnalysisPhase } from '../analysis-phase.js'; -import type { GenerationProfile, OutputSize } from '../generation-profile.js'; +import type { GenerationProfile, OutputSize, ReasoningEffort } from '../generation-profile.js'; import { GEMINI_MODELS } from './models.js'; const MAX_OUTPUT_TOKENS_BY_OUTPUT_SIZE = { - small: 600, - medium: 1500, - large: 3000, + small: 1500, + medium: 3000, + large: 6000, } satisfies Record; export class GeminiProvider extends BaseProvider { @@ -37,12 +37,13 @@ export class GeminiProvider extends BaseProvider { generationProfile: GenerationProfile ): Promise { const maxOutputTokens = MAX_OUTPUT_TOKENS_BY_OUTPUT_SIZE[generationProfile.outputSize]; + const thinkingConfig = getThinkingConfig(generationProfile.reasoningEffort); try { const response = await this.client.models.generateContent({ model, contents: prompt, - config: { maxOutputTokens }, + config: { maxOutputTokens, thinkingConfig }, }); const candidate = response.candidates?.[0]; @@ -55,6 +56,15 @@ export class GeminiProvider extends BaseProvider { ); } + if (candidate?.finishReason === FinishReason.MAX_TOKENS) { + throw new ErodeError( + 'Gemini response was cut short by the output token limit', + ErrorCode.PROVIDER_INVALID_RESPONSE, + 'The Gemini response used the available output budget before completion. Try a smaller change or tune the provider output budget.', + { model, phase, maxOutputTokens, outputSize: generationProfile.outputSize } + ); + } + const text = response.text; if (!text) { throw new ErodeError( @@ -72,5 +82,19 @@ export class GeminiProvider extends BaseProvider { } throw ApiError.fromGeminiError(error); } + + function getThinkingConfig(reasoningIntent: ReasoningEffort | undefined): ThinkingConfig { + switch (reasoningIntent) { + case 'high': + return { thinkingLevel: ThinkingLevel.HIGH }; + case 'medium': + return { thinkingLevel: ThinkingLevel.MEDIUM }; + case 'low': + case undefined: + return { thinkingBudget: 0 }; + default: + return { thinkingBudget: 0 }; + } + } } } From ff18c197fba9d441a0a7e29bf0502d13cf519ab8 Mon Sep 17 00:00:00 2001 From: Anders Hassis Date: Tue, 5 May 2026 18:41:19 +0200 Subject: [PATCH 06/12] fix(core): tune provider generation controls --- .../__tests__/generation-profile.test.ts | 15 +++- .../anthropic/__tests__/provider.test.ts | 13 +++- .../core/src/providers/anthropic/provider.ts | 11 ++- packages/core/src/providers/base-provider.ts | 8 +- .../gemini/__tests__/provider.test.ts | 68 ++++++++++++++++- .../core/src/providers/gemini/provider.ts | 76 +++++++++++++++++-- .../core/src/providers/generation-profile.ts | 20 +++++ .../openai/__tests__/provider.test.ts | 11 +++ .../core/src/providers/openai/provider.ts | 11 ++- 9 files changed, 221 insertions(+), 12 deletions(-) diff --git a/packages/core/src/providers/__tests__/generation-profile.test.ts b/packages/core/src/providers/__tests__/generation-profile.test.ts index d8648b46..461dac2e 100644 --- a/packages/core/src/providers/__tests__/generation-profile.test.ts +++ b/packages/core/src/providers/__tests__/generation-profile.test.ts @@ -2,7 +2,10 @@ import { readFileSync } from 'node:fs'; import { join } from 'node:path'; import { describe, expect, it } from 'vitest'; import { AnalysisPhase } from '../analysis-phase.js'; -import { getGenerationProfileForPhase } from '../generation-profile.js'; +import { + getGenerationProfileForModelPatch, + getGenerationProfileForPhase, +} from '../generation-profile.js'; describe('getGenerationProfileForPhase', () => { it('uses small low-effort generation for simple phases', () => { @@ -35,4 +38,14 @@ describe('getGenerationProfileForPhase', () => { expect(source).not.toContain('maxTokens'); }); + + it('adds a dynamic output content hint for model patches', () => { + const profile = getGenerationProfileForModelPatch('x'.repeat(40_000), [' comp.a -> comp.b']); + + expect(profile).toMatchObject({ + outputSize: 'medium', + reasoningEffort: 'medium', + }); + expect(profile.outputContentHint?.characters).toBeGreaterThan(16_384); + }); }); diff --git a/packages/core/src/providers/anthropic/__tests__/provider.test.ts b/packages/core/src/providers/anthropic/__tests__/provider.test.ts index 13683c94..ce1493b2 100644 --- a/packages/core/src/providers/anthropic/__tests__/provider.test.ts +++ b/packages/core/src/providers/anthropic/__tests__/provider.test.ts @@ -333,7 +333,18 @@ describe('AnthropicProvider', () => { | { max_tokens?: number; model?: string } | undefined; expect(callArg?.model).toBe('claude-haiku-4-5'); - expect(callArg?.max_tokens).toBe(1500); + expect(callArg?.max_tokens).toBe(4096); + }); + + it('should increase the output budget for large model files', async () => { + const patchedContent = 'model {\n comp.a -> comp.b\n}\n'; + mockCreate.mockResolvedValueOnce(makeAnthropicResponse(patchedContent)); + + const provider = createProvider(); + await provider.patchModel('x'.repeat(40_000), [' comp.a -> comp.b'], 'likec4'); + + const callArg = mockCreate.mock.calls[0]?.[0] as { max_tokens?: number } | undefined; + expect(callArg?.max_tokens).toBeGreaterThan(4096); }); it('should return patched content', async () => { diff --git a/packages/core/src/providers/anthropic/provider.ts b/packages/core/src/providers/anthropic/provider.ts index edf5078d..e800527a 100644 --- a/packages/core/src/providers/anthropic/provider.ts +++ b/packages/core/src/providers/anthropic/provider.ts @@ -36,7 +36,7 @@ export class AnthropicProvider extends BaseProvider { phase: AnalysisPhase, generationProfile: GenerationProfile ): Promise { - const outputTokenLimit = MAX_TOKENS_BY_OUTPUT_SIZE[generationProfile.outputSize]; + const outputTokenLimit = getOutputTokenLimit(generationProfile); try { const response = await this.client.messages.create({ @@ -82,5 +82,14 @@ export class AnthropicProvider extends BaseProvider { } throw ApiError.fromAnthropicError(error); } + + function getOutputTokenLimit(profile: GenerationProfile): number { + const profileLimit = MAX_TOKENS_BY_OUTPUT_SIZE[profile.outputSize]; + const hintedLimit = profile.outputContentHint + ? Math.ceil(profile.outputContentHint.characters / 4) + : 0; + + return Math.max(profileLimit, hintedLimit); + } } } diff --git a/packages/core/src/providers/base-provider.ts b/packages/core/src/providers/base-provider.ts index f76bffdb..c787c498 100644 --- a/packages/core/src/providers/base-provider.ts +++ b/packages/core/src/providers/base-provider.ts @@ -15,7 +15,11 @@ import { ErodeError, ErrorCode, ApiError } from '../errors.js'; import { withRetry } from '../utils/retry.js'; import { AnalysisPhase } from './analysis-phase.js'; import { CONFIG } from '../utils/config.js'; -import { getGenerationProfileForPhase, type GenerationProfile } from './generation-profile.js'; +import { + getGenerationProfileForModelPatch, + getGenerationProfileForPhase, + type GenerationProfile, +} from './generation-profile.js'; function debugLog(msg: string, data?: unknown): void { if (CONFIG.debug.verbose) { @@ -145,7 +149,7 @@ export abstract class BaseProvider implements AIProvider { this.fastModel, prompt, AnalysisPhase.MODEL_UPDATE, - getGenerationProfileForPhase(AnalysisPhase.MODEL_UPDATE) + getGenerationProfileForModelPatch(fileContent, linesToInsert) ), { retries: 2, diff --git a/packages/core/src/providers/gemini/__tests__/provider.test.ts b/packages/core/src/providers/gemini/__tests__/provider.test.ts index 53d7e228..1c7650dd 100644 --- a/packages/core/src/providers/gemini/__tests__/provider.test.ts +++ b/packages/core/src/providers/gemini/__tests__/provider.test.ts @@ -23,6 +23,11 @@ vi.mock('@google/genai', () => { SPII: 'SPII', MALFORMED_FUNCTION_CALL: 'MALFORMED_FUNCTION_CALL', }, + ThinkingLevel: { + LOW: 'LOW', + MEDIUM: 'MEDIUM', + HIGH: 'HIGH', + }, }; }); @@ -239,7 +244,68 @@ describe('GeminiProvider', () => { expect(mockGenerateContent).toHaveBeenCalledWith( expect.objectContaining({ model: 'gemini-2.5-pro', - config: { maxOutputTokens: 3000, thinkingConfig: { thinkingBudget: 0 } }, + config: { maxOutputTokens: 3000, thinkingConfig: { thinkingBudget: -1 } }, + }) + ); + }); + }); + + describe('patchModel', () => { + it('should use 2.5 Flash thinking budgets and dynamic output headroom for patching', async () => { + const patchedContent = 'model {\n comp.a -> comp.b\n}\n'; + mockGenerateContent.mockResolvedValueOnce({ + text: patchedContent, + candidates: [{ finishReason: 'STOP' }], + usageMetadata: { promptTokenCount: 100, candidatesTokenCount: 20 }, + }); + + const provider = createProvider(); + await provider.patchModel('model {\n}\n', [' comp.a -> comp.b'], 'likec4'); + + expect(mockGenerateContent).toHaveBeenCalledWith( + expect.objectContaining({ + model: 'gemini-2.5-flash', + config: { maxOutputTokens: 4096, thinkingConfig: { thinkingBudget: -1 } }, + }) + ); + }); + + it('should increase the output budget for large model files', async () => { + const patchedContent = 'model {\n comp.a -> comp.b\n}\n'; + mockGenerateContent.mockResolvedValueOnce({ + text: patchedContent, + candidates: [{ finishReason: 'STOP' }], + usageMetadata: { promptTokenCount: 100, candidatesTokenCount: 20 }, + }); + + const provider = createProvider(); + await provider.patchModel('x'.repeat(40_000), [' comp.a -> comp.b'], 'likec4'); + + const callArg = mockGenerateContent.mock.calls[0]?.[0] as + | { config?: { maxOutputTokens?: number } } + | undefined; + expect(callArg?.config?.maxOutputTokens).toBeGreaterThan(4096); + }); + + it('should use thinkingLevel for Gemini 3 style models', async () => { + const patchedContent = 'model {\n comp.a -> comp.b\n}\n'; + mockGenerateContent.mockResolvedValueOnce({ + text: patchedContent, + candidates: [{ finishReason: 'STOP' }], + usageMetadata: { promptTokenCount: 100, candidatesTokenCount: 20 }, + }); + + const provider = new GeminiProvider({ + apiKey: 'test-api-key', + fastModel: 'gemini-3-flash-preview', + advancedModel: 'gemini-3-pro-preview', + }); + await provider.patchModel('model {\n}\n', [' comp.a -> comp.b'], 'likec4'); + + expect(mockGenerateContent).toHaveBeenCalledWith( + expect.objectContaining({ + model: 'gemini-3-flash-preview', + config: { maxOutputTokens: 4096, thinkingConfig: { thinkingLevel: 'MEDIUM' } }, }) ); }); diff --git a/packages/core/src/providers/gemini/provider.ts b/packages/core/src/providers/gemini/provider.ts index d7b74b7d..219d748b 100644 --- a/packages/core/src/providers/gemini/provider.ts +++ b/packages/core/src/providers/gemini/provider.ts @@ -36,8 +36,8 @@ export class GeminiProvider extends BaseProvider { phase: AnalysisPhase, generationProfile: GenerationProfile ): Promise { - const maxOutputTokens = MAX_OUTPUT_TOKENS_BY_OUTPUT_SIZE[generationProfile.outputSize]; - const thinkingConfig = getThinkingConfig(generationProfile.reasoningEffort); + const maxOutputTokens = getMaxOutputTokens(generationProfile); + const thinkingConfig = getThinkingConfig(model, generationProfile.reasoningEffort); try { const response = await this.client.models.generateContent({ @@ -83,12 +83,42 @@ export class GeminiProvider extends BaseProvider { throw ApiError.fromGeminiError(error); } - function getThinkingConfig(reasoningIntent: ReasoningEffort | undefined): ThinkingConfig { + function getMaxOutputTokens(profile: GenerationProfile): number { + const profileLimit = MAX_OUTPUT_TOKENS_BY_OUTPUT_SIZE[profile.outputSize]; + const hintedLimit = profile.outputContentHint + ? Math.ceil(profile.outputContentHint.characters / 4) + : 0; + + return Math.max(profileLimit, hintedLimit); + } + + function getThinkingConfig( + thinkingModel: string, + reasoningIntent: ReasoningEffort | undefined + ): ThinkingConfig { + if (isGemini25Model(thinkingModel)) { + return getGemini25ThinkingConfig(thinkingModel, reasoningIntent); + } + + if (isGemini3Model(thinkingModel)) { + return getGemini3ThinkingConfig(thinkingModel, reasoningIntent); + } + + return {}; + } + + function getGemini25ThinkingConfig( + thinkingModel: string, + reasoningIntent: ReasoningEffort | undefined + ): ThinkingConfig { + if (isGemini25ProModel(thinkingModel)) { + return { thinkingBudget: -1 }; + } + switch (reasoningIntent) { case 'high': - return { thinkingLevel: ThinkingLevel.HIGH }; case 'medium': - return { thinkingLevel: ThinkingLevel.MEDIUM }; + return { thinkingBudget: -1 }; case 'low': case undefined: return { thinkingBudget: 0 }; @@ -96,5 +126,41 @@ export class GeminiProvider extends BaseProvider { return { thinkingBudget: 0 }; } } + + function getGemini3ThinkingConfig( + thinkingModel: string, + reasoningIntent: ReasoningEffort | undefined + ): ThinkingConfig { + switch (reasoningIntent) { + case 'high': + return { thinkingLevel: ThinkingLevel.HIGH }; + case 'medium': + if (isGemini3ProModel(thinkingModel)) { + return { thinkingLevel: ThinkingLevel.HIGH }; + } + return { thinkingLevel: ThinkingLevel.MEDIUM }; + case 'low': + case undefined: + return { thinkingLevel: ThinkingLevel.LOW }; + default: + return { thinkingLevel: ThinkingLevel.LOW }; + } + } + + function isGemini25Model(thinkingModel: string): boolean { + return thinkingModel.startsWith('gemini-2.5-'); + } + + function isGemini25ProModel(thinkingModel: string): boolean { + return thinkingModel.startsWith('gemini-2.5-pro'); + } + + function isGemini3Model(thinkingModel: string): boolean { + return thinkingModel.startsWith('gemini-3-'); + } + + function isGemini3ProModel(thinkingModel: string): boolean { + return thinkingModel.startsWith('gemini-3-pro'); + } } } diff --git a/packages/core/src/providers/generation-profile.ts b/packages/core/src/providers/generation-profile.ts index b193a57f..06c95535 100644 --- a/packages/core/src/providers/generation-profile.ts +++ b/packages/core/src/providers/generation-profile.ts @@ -6,6 +6,9 @@ export type ReasoningEffort = 'low' | 'medium' | 'high'; export interface GenerationProfile { outputSize: OutputSize; reasoningEffort?: ReasoningEffort; + outputContentHint?: { + characters: number; + }; } export function getGenerationProfileForPhase(phase: AnalysisPhase): GenerationProfile { @@ -21,3 +24,20 @@ export function getGenerationProfileForPhase(phase: AnalysisPhase): GenerationPr return { outputSize: 'small', reasoningEffort: 'low' }; } } + +export function getGenerationProfileForModelPatch( + fileContent: string, + linesToInsert: string[] +): GenerationProfile { + const insertedLineCharacters = linesToInsert.length * 200; + const estimatedCharacters = Math.ceil((fileContent.length + insertedLineCharacters) * 1.2); + const minimumCharacters = 4096 * 4; + + return { + outputSize: 'medium', + reasoningEffort: 'medium', + outputContentHint: { + characters: Math.max(minimumCharacters, estimatedCharacters), + }, + }; +} diff --git a/packages/core/src/providers/openai/__tests__/provider.test.ts b/packages/core/src/providers/openai/__tests__/provider.test.ts index 33d0000b..1478d991 100644 --- a/packages/core/src/providers/openai/__tests__/provider.test.ts +++ b/packages/core/src/providers/openai/__tests__/provider.test.ts @@ -390,6 +390,17 @@ describe('OpenAIProvider', () => { expect(callArg?.reasoning?.effort).toBe('medium'); }); + it('should increase the output budget for large model files', async () => { + const patchedContent = 'model {\n comp.a -> comp.b\n}\n'; + mockCreate.mockResolvedValueOnce(makeOpenAIResponse(patchedContent)); + + const provider = createProvider(); + await provider.patchModel('x'.repeat(40_000), [' comp.a -> comp.b'], 'likec4'); + + const callArg = mockCreate.mock.calls[0]?.[0] as { max_output_tokens?: number } | undefined; + expect(callArg?.max_output_tokens).toBeGreaterThan(6000); + }); + it('should return patched content', async () => { const patchedContent = 'model {\n comp.a -> comp.b\n}\n'; mockCreate.mockResolvedValueOnce(makeOpenAIResponse(patchedContent)); diff --git a/packages/core/src/providers/openai/provider.ts b/packages/core/src/providers/openai/provider.ts index c0476fec..065e6335 100644 --- a/packages/core/src/providers/openai/provider.ts +++ b/packages/core/src/providers/openai/provider.ts @@ -38,7 +38,7 @@ export class OpenAIProvider extends BaseProvider { phase: AnalysisPhase, generationProfile: GenerationProfile ): Promise { - const maxOutputTokens = MAX_OUTPUT_TOKENS_BY_OUTPUT_SIZE[generationProfile.outputSize]; + const maxOutputTokens = getMaxOutputTokens(generationProfile); const reasoningEffort = getOpenAIReasoningEffort(generationProfile.reasoningEffort); try { @@ -147,5 +147,14 @@ export class OpenAIProvider extends BaseProvider { return 'minimal'; } } + + function getMaxOutputTokens(profile: GenerationProfile): number { + const profileLimit = MAX_OUTPUT_TOKENS_BY_OUTPUT_SIZE[profile.outputSize]; + const hintedLimit = profile.outputContentHint + ? Math.ceil(profile.outputContentHint.characters / 4) + : 0; + + return Math.max(profileLimit, hintedLimit); + } } } From 97d51a9f172de0a0d61d8e674eff9a770b71af3d Mon Sep 17 00:00:00 2001 From: Anders Hassis Date: Tue, 5 May 2026 18:45:42 +0200 Subject: [PATCH 07/12] fix(core): improve drift model update coverage --- .../analysis/__tests__/prompt-builder.test.ts | 59 +++++++++++++++++++ .../__tests__/section-formatters.test.ts | 18 ++++++ .../src/analysis/prompts/drift-analysis.md | 17 ++++++ .../core/src/analysis/section-formatters.ts | 5 +- 4 files changed, 98 insertions(+), 1 deletion(-) diff --git a/packages/core/src/analysis/__tests__/prompt-builder.test.ts b/packages/core/src/analysis/__tests__/prompt-builder.test.ts index fcb8fe1b..dbbaca6d 100644 --- a/packages/core/src/analysis/__tests__/prompt-builder.test.ts +++ b/packages/core/src/analysis/__tests__/prompt-builder.test.ts @@ -301,6 +301,65 @@ describe('PromptBuilder', () => { expect(result).toContain('REMOVED'); expect(result).toContain('memcached'); }); + + it('should preserve dependency evidence for drift analysis', () => { + const result = PromptBuilder.buildDriftAnalysisPrompt({ + changeRequest: { + number: 1, + title: 'Add service dependency', + description: null, + repository: 'org/repo', + author: { login: 'dev' }, + base: { ref: 'main', sha: 'base' }, + head: { ref: 'feature', sha: 'head' }, + stats: { commits: 1, additions: 10, deletions: 0, files_changed: 2 }, + commits: [{ sha: 'head', message: 'Test', author: 'dev' }], + }, + component: { id: 'api_gateway', name: 'API Gateway', type: 'service', tags: [] }, + dependencies: { + dependencies: [ + { + type: 'added', + file: 'packages/api-gateway/src/index.ts', + dependency: 'Order Service', + description: 'Existing component calls newly introduced service', + code: 'const ORDER_SERVICE = "http://order-service:3005";', + }, + { + type: 'added', + file: 'packages/order-service/src/index.ts', + dependency: 'Product Service', + description: 'Newly introduced service calls existing component', + code: 'const PRODUCT_SERVICE = "http://product-service:3002";', + }, + { + type: 'added', + file: 'packages/product-service/src/index.ts', + dependency: 'User Service', + description: 'Existing component calls existing component', + code: 'const USER_SERVICE = "http://user-service:3001";', + }, + ], + summary: 'Added service relationships', + }, + architectural: { dependencies: [], dependents: [], relationships: [] }, + allRelationships: [], + }); + + expect(result).toContain('Evidence: const ORDER_SERVICE'); + expect(result).toContain('Evidence: const PRODUCT_SERVICE'); + expect(result).toContain('Evidence: const USER_SERVICE'); + }); + + it('should instruct drift analysis to account for every added dependency', () => { + const templateDir = join(dirname(fileURLToPath(import.meta.url)), '..', 'prompts'); + const template = readFileSync(join(templateDir, 'drift-analysis.md'), 'utf-8'); + + expect(template).toContain('For every ADDED dependency'); + expect(template).toContain('Classify each dependency'); + expect(template).toContain('New component plus relationship to add'); + expect(template).toContain('Do not let one dependency that created a new component'); + }); }); describe('buildModelPatchPrompt', () => { diff --git a/packages/core/src/analysis/__tests__/section-formatters.test.ts b/packages/core/src/analysis/__tests__/section-formatters.test.ts index 1d623660..3c3a25ad 100644 --- a/packages/core/src/analysis/__tests__/section-formatters.test.ts +++ b/packages/core/src/analysis/__tests__/section-formatters.test.ts @@ -93,6 +93,24 @@ describe('section-formatters', () => { expect(result).toContain('Added Redis'); }); + it('should include code evidence when present', () => { + const result = formatDependencyChanges({ + dependencies: [ + { + type: 'added', + file: 'src/gateway.ts', + dependency: 'Order Service', + description: 'External order service via HTTP', + code: 'const ORDER_SERVICE = "http://order-service:3005";', + }, + ], + summary: '', + }); + + expect(result).toContain('Evidence: const ORDER_SERVICE'); + expect(result).toContain('http://order-service:3005'); + }); + it('should format modified dependencies', () => { const result = formatDependencyChanges({ dependencies: [ diff --git a/packages/core/src/analysis/prompts/drift-analysis.md b/packages/core/src/analysis/prompts/drift-analysis.md index 5ac93fe7..0c31554b 100644 --- a/packages/core/src/analysis/prompts/drift-analysis.md +++ b/packages/core/src/analysis/prompts/drift-analysis.md @@ -64,6 +64,23 @@ These are ALL relationships currently declared in the architecture model: Cross-reference the dependency changes above against the architecture model and assess: +### Dependency Coverage + +For every ADDED dependency in the DEPENDENCY CHANGES DETECTED section, account for it +explicitly. Classify each dependency as one of: + +- Already declared in the model +- New relationship to add +- New component plus relationship to add +- External package or third-party dependency that should not be modeled +- Ignored with a brief reason + +If an existing modeled component gains a dependency on a newly introduced component, +include both the new component and that relationship. If a newly introduced component +depends on an existing modeled component, include both the new component and that +relationship. Do not let one dependency that created a new component hide other +relationships to or from that component. + ### 1. New Dependencies NOT in Model (Potential Drift) - Are there new dependencies that aren't in the allowed dependencies list? diff --git a/packages/core/src/analysis/section-formatters.ts b/packages/core/src/analysis/section-formatters.ts index 5416bd99..dd54d5cb 100644 --- a/packages/core/src/analysis/section-formatters.ts +++ b/packages/core/src/analysis/section-formatters.ts @@ -57,7 +57,10 @@ export function formatDependencyChanges(dependencies: DependencyExtractionResult if (items && items.length > 0) { section += `**${label} Dependencies:**\n`; section += items - .map((dep) => `- ${dep.dependency} (${dep.file})\n ${dep.description}`) + .map((dep) => { + const evidence = dep.code.trim() ? `\n Evidence: ${dep.code.trim()}` : ''; + return `- ${dep.dependency} (${dep.file})\n ${dep.description}${evidence}`; + }) .join('\n'); section += '\n\n'; } From 8e3821bf6fa48b88938eaf7de8c73539cb6088b3 Mon Sep 17 00:00:00 2001 From: Anders Hassis Date: Tue, 5 May 2026 20:13:46 +0200 Subject: [PATCH 08/12] chore: address provider review cleanup --- .env.example | 6 +++++- .../src/providers/__tests__/generation-profile.test.ts | 8 -------- .../adrs/011-intent-based-provider-generation-profiles.md | 4 ++-- 3 files changed, 7 insertions(+), 11 deletions(-) diff --git a/.env.example b/.env.example index c43df56a..5bddcfe8 100644 --- a/.env.example +++ b/.env.example @@ -1,11 +1,12 @@ # Environment variables override values from .eroderc.json. # Use .eroderc.json for project settings and .env for secrets. -# AI provider: "gemini" (default) or "anthropic" +# AI provider: "gemini" (default), "openai", or "anthropic" ERODE_AI_PROVIDER=gemini # API keys (required for the selected provider) ERODE_GEMINI_API_KEY= +ERODE_OPENAI_API_KEY= ERODE_ANTHROPIC_API_KEY= # GitHub / GitLab tokens @@ -16,6 +17,8 @@ ERODE_GITHUB_TOKEN= # Model overrides — FAST for extraction stages (1, 2), ADVANCED for analysis stages (3, 4) # ERODE_GEMINI_FAST_MODEL=gemini-2.5-flash # Default # ERODE_GEMINI_ADVANCED_MODEL=gemini-2.5-pro # Default +# ERODE_OPENAI_FAST_MODEL=gpt-5-mini # Default +# ERODE_OPENAI_ADVANCED_MODEL=gpt-5 # Default # ERODE_ANTHROPIC_FAST_MODEL=claude-haiku-4-5 # ERODE_ANTHROPIC_ADVANCED_MODEL=claude-sonnet-4-6 @@ -29,6 +32,7 @@ ERODE_GITHUB_TOKEN= # Timeouts (ms) # ERODE_GEMINI_TIMEOUT=60000 +# ERODE_OPENAI_TIMEOUT=60000 # ERODE_ANTHROPIC_TIMEOUT=60000 # ERODE_GITHUB_TIMEOUT=30000 diff --git a/packages/core/src/providers/__tests__/generation-profile.test.ts b/packages/core/src/providers/__tests__/generation-profile.test.ts index 461dac2e..c85a96aa 100644 --- a/packages/core/src/providers/__tests__/generation-profile.test.ts +++ b/packages/core/src/providers/__tests__/generation-profile.test.ts @@ -1,5 +1,3 @@ -import { readFileSync } from 'node:fs'; -import { join } from 'node:path'; import { describe, expect, it } from 'vitest'; import { AnalysisPhase } from '../analysis-phase.js'; import { @@ -33,12 +31,6 @@ describe('getGenerationProfileForPhase', () => { }); }); - it('does not require raw maxTokens in shared stage orchestration', () => { - const source = readFileSync(join(import.meta.dirname, '../base-provider.ts'), 'utf8'); - - expect(source).not.toContain('maxTokens'); - }); - it('adds a dynamic output content hint for model patches', () => { const profile = getGenerationProfileForModelPatch('x'.repeat(40_000), [' comp.a -> comp.b']); diff --git a/packages/web/src/content/docs/docs/contributing/adrs/011-intent-based-provider-generation-profiles.md b/packages/web/src/content/docs/docs/contributing/adrs/011-intent-based-provider-generation-profiles.md index 769c145c..b0cbfb19 100644 --- a/packages/web/src/content/docs/docs/contributing/adrs/011-intent-based-provider-generation-profiles.md +++ b/packages/web/src/content/docs/docs/contributing/adrs/011-intent-based-provider-generation-profiles.md @@ -51,8 +51,8 @@ use medium reasoning because they generate a concrete patch. ### Negative - Output profile names become a shared contract that providers must translate. -- Large model patch outputs may need provider-local tuning if medium output is too - small for some repositories. +- Dynamic output hints can still affect provider cost, latency, quota usage, or model + output caps. ## Related commits From 2927b1b133da39d61997247b2a003399c7ed20af Mon Sep 17 00:00:00 2001 From: Anders Hassis Date: Wed, 6 May 2026 07:22:55 +0200 Subject: [PATCH 09/12] fix(core): address provider review comments --- .../analysis/__tests__/prompt-builder.test.ts | 7 +-- .../__tests__/section-formatters.test.ts | 27 ++++++++++- .../core/src/analysis/section-formatters.ts | 8 +++- .../anthropic/__tests__/provider.test.ts | 17 +++++++ .../core/src/providers/anthropic/provider.ts | 2 +- .../gemini/__tests__/provider.test.ts | 23 +++++++++ .../core/src/providers/gemini/provider.ts | 4 +- .../openai/__tests__/provider.test.ts | 48 ++++++++++++++++++- .../core/src/providers/openai/provider.ts | 23 ++++++++- 9 files changed, 149 insertions(+), 10 deletions(-) diff --git a/packages/core/src/analysis/__tests__/prompt-builder.test.ts b/packages/core/src/analysis/__tests__/prompt-builder.test.ts index dbbaca6d..7b2c60ec 100644 --- a/packages/core/src/analysis/__tests__/prompt-builder.test.ts +++ b/packages/core/src/analysis/__tests__/prompt-builder.test.ts @@ -346,9 +346,10 @@ describe('PromptBuilder', () => { allRelationships: [], }); - expect(result).toContain('Evidence: const ORDER_SERVICE'); - expect(result).toContain('Evidence: const PRODUCT_SERVICE'); - expect(result).toContain('Evidence: const USER_SERVICE'); + const vars = JSON.parse(result) as DriftAnalysisPromptVars; + expect(vars.dependencyChangesSection).toContain('Evidence:\n const ORDER_SERVICE'); + expect(vars.dependencyChangesSection).toContain('Evidence:\n const PRODUCT_SERVICE'); + expect(vars.dependencyChangesSection).toContain('Evidence:\n const USER_SERVICE'); }); it('should instruct drift analysis to account for every added dependency', () => { diff --git a/packages/core/src/analysis/__tests__/section-formatters.test.ts b/packages/core/src/analysis/__tests__/section-formatters.test.ts index 3c3a25ad..50f15484 100644 --- a/packages/core/src/analysis/__tests__/section-formatters.test.ts +++ b/packages/core/src/analysis/__tests__/section-formatters.test.ts @@ -107,10 +107,35 @@ describe('section-formatters', () => { summary: '', }); - expect(result).toContain('Evidence: const ORDER_SERVICE'); + expect(result).toContain('Evidence:\n const ORDER_SERVICE'); expect(result).toContain('http://order-service:3005'); }); + it('should keep multiline code evidence indented under the dependency bullet', () => { + const result = formatDependencyChanges({ + dependencies: [ + { + type: 'added', + file: 'src/gateway.ts', + dependency: 'Order Service', + description: 'External order service via HTTP', + code: 'const ORDER_SERVICE = "http://order-service:3005";\nawait fetch(ORDER_SERVICE);', + }, + ], + summary: '', + }); + + expect(result).toContain( + [ + '- Order Service (src/gateway.ts)', + ' External order service via HTTP', + ' Evidence:', + ' const ORDER_SERVICE = "http://order-service:3005";', + ' await fetch(ORDER_SERVICE);', + ].join('\n') + ); + }); + it('should format modified dependencies', () => { const result = formatDependencyChanges({ dependencies: [ diff --git a/packages/core/src/analysis/section-formatters.ts b/packages/core/src/analysis/section-formatters.ts index dd54d5cb..750c5d9b 100644 --- a/packages/core/src/analysis/section-formatters.ts +++ b/packages/core/src/analysis/section-formatters.ts @@ -58,7 +58,13 @@ export function formatDependencyChanges(dependencies: DependencyExtractionResult section += `**${label} Dependencies:**\n`; section += items .map((dep) => { - const evidence = dep.code.trim() ? `\n Evidence: ${dep.code.trim()}` : ''; + const trimmedCode = dep.code.trim(); + const evidence = trimmedCode + ? `\n Evidence:\n${trimmedCode + .split('\n') + .map((line) => ` ${line}`) + .join('\n')}` + : ''; return `- ${dep.dependency} (${dep.file})\n ${dep.description}${evidence}`; }) .join('\n'); diff --git a/packages/core/src/providers/anthropic/__tests__/provider.test.ts b/packages/core/src/providers/anthropic/__tests__/provider.test.ts index ce1493b2..3e118533 100644 --- a/packages/core/src/providers/anthropic/__tests__/provider.test.ts +++ b/packages/core/src/providers/anthropic/__tests__/provider.test.ts @@ -248,6 +248,23 @@ describe('AnthropicProvider', () => { }); }); + describe('truncation handling', () => { + it('should explain output budget exhaustion on max_tokens', async () => { + mockCreate.mockResolvedValueOnce(makeAnthropicResponse('partial response', 'max_tokens')); + + const provider = createProvider(); + try { + await provider.selectComponent(makeStage1Data(['comp.api'])); + expect.fail('Expected error to be thrown'); + } catch (error) { + expect(error).toBeInstanceOf(ErodeError); + const erodeError = error as ErodeError; + expect(erodeError.code).toBe(ErrorCode.PROVIDER_INVALID_RESPONSE); + expect(erodeError.userMessage).toContain('output budget'); + } + }); + }); + describe('retry on rate limit', () => { it('should retry on 429 and eventually succeed', async () => { const rateLimitError = new ApiError('Rate limited', 429); diff --git a/packages/core/src/providers/anthropic/provider.ts b/packages/core/src/providers/anthropic/provider.ts index e800527a..df78d918 100644 --- a/packages/core/src/providers/anthropic/provider.ts +++ b/packages/core/src/providers/anthropic/provider.ts @@ -70,7 +70,7 @@ export class AnthropicProvider extends BaseProvider { throw new ErodeError( 'Anthropic response was cut short (max_tokens reached)', ErrorCode.PROVIDER_INVALID_RESPONSE, - 'The AI response was truncated. The output may be partial.', + 'The Anthropic response used the available output budget before completion. Try a smaller change or tune the provider output budget.', { model, phase, outputTokenLimit } ); } diff --git a/packages/core/src/providers/gemini/__tests__/provider.test.ts b/packages/core/src/providers/gemini/__tests__/provider.test.ts index 1c7650dd..79df5105 100644 --- a/packages/core/src/providers/gemini/__tests__/provider.test.ts +++ b/packages/core/src/providers/gemini/__tests__/provider.test.ts @@ -309,6 +309,29 @@ describe('GeminiProvider', () => { }) ); }); + + it('should use thinkingLevel for Gemini 3 point-release style models', async () => { + const patchedContent = 'model {\n comp.a -> comp.b\n}\n'; + mockGenerateContent.mockResolvedValueOnce({ + text: patchedContent, + candidates: [{ finishReason: 'STOP' }], + usageMetadata: { promptTokenCount: 100, candidatesTokenCount: 20 }, + }); + + const provider = new GeminiProvider({ + apiKey: 'test-api-key', + fastModel: 'gemini-3.5-flash', + advancedModel: 'gemini-3.5-pro', + }); + await provider.patchModel('model {\n}\n', [' comp.a -> comp.b'], 'likec4'); + + expect(mockGenerateContent).toHaveBeenCalledWith( + expect.objectContaining({ + model: 'gemini-3.5-flash', + config: { maxOutputTokens: 4096, thinkingConfig: { thinkingLevel: 'MEDIUM' } }, + }) + ); + }); }); describe('safety filter handling', () => { diff --git a/packages/core/src/providers/gemini/provider.ts b/packages/core/src/providers/gemini/provider.ts index 219d748b..edb569bf 100644 --- a/packages/core/src/providers/gemini/provider.ts +++ b/packages/core/src/providers/gemini/provider.ts @@ -156,11 +156,11 @@ export class GeminiProvider extends BaseProvider { } function isGemini3Model(thinkingModel: string): boolean { - return thinkingModel.startsWith('gemini-3-'); + return thinkingModel.startsWith('gemini-3-') || thinkingModel.startsWith('gemini-3.'); } function isGemini3ProModel(thinkingModel: string): boolean { - return thinkingModel.startsWith('gemini-3-pro'); + return thinkingModel.startsWith('gemini-3-pro') || /^gemini-3\.[^-]+-pro/.test(thinkingModel); } } } diff --git a/packages/core/src/providers/openai/__tests__/provider.test.ts b/packages/core/src/providers/openai/__tests__/provider.test.ts index 1478d991..47bd79c8 100644 --- a/packages/core/src/providers/openai/__tests__/provider.test.ts +++ b/packages/core/src/providers/openai/__tests__/provider.test.ts @@ -105,7 +105,7 @@ function makeOpenAIMessageResponse(content: string) { }; } -function makeIncompleteOpenAIResponse(reason: 'max_output_tokens' | 'content_filter') { +function makeIncompleteOpenAIResponse(reason: string) { return { status: 'incomplete', incomplete_details: { reason }, @@ -149,6 +149,37 @@ describe('OpenAIProvider', () => { ); }); + it('should omit reasoning for chat-tuned GPT-5 models', async () => { + mockCreate.mockResolvedValueOnce(makeOpenAIResponse('comp.backend')); + + const provider = new OpenAIProvider({ + apiKey: 'test-api-key', + fastModel: 'gpt-5-chat-latest', + advancedModel: 'gpt-5', + }); + await provider.selectComponent(makeStage1Data(['comp.backend'])); + + const callArg = mockCreate.mock.calls[0]?.[0] as { reasoning?: unknown } | undefined; + expect(callArg).not.toHaveProperty('reasoning'); + }); + + it('should send reasoning for GPT-5 family models', async () => { + mockCreate.mockResolvedValueOnce(makeOpenAIResponse('comp.backend')); + + const provider = new OpenAIProvider({ + apiKey: 'test-api-key', + fastModel: 'gpt-5o-mini', + advancedModel: 'gpt-5', + }); + await provider.selectComponent(makeStage1Data(['comp.backend'])); + + expect(mockCreate).toHaveBeenCalledWith( + expect.objectContaining({ + reasoning: { effort: 'minimal' }, + }) + ); + }); + it('should return null when no component matches', async () => { mockCreate.mockResolvedValueOnce(makeOpenAIResponse('I cannot determine the component')); @@ -279,6 +310,21 @@ describe('OpenAIProvider', () => { expect(erodeError.userMessage).toContain('output budget'); } }); + + it('should throw PROVIDER_INVALID_RESPONSE on unknown incomplete reasons', async () => { + mockCreate.mockResolvedValueOnce(makeIncompleteOpenAIResponse('system_error')); + + const provider = createProvider(); + try { + await provider.selectComponent(makeStage1Data(['comp.api'])); + expect.fail('Expected error to be thrown'); + } catch (error) { + expect(error).toBeInstanceOf(ErodeError); + const erodeError = error as ErodeError; + expect(erodeError.code).toBe(ErrorCode.PROVIDER_INVALID_RESPONSE); + expect(erodeError.userMessage).toContain('unknown provider reason'); + } + }); }); describe('response text extraction', () => { diff --git a/packages/core/src/providers/openai/provider.ts b/packages/core/src/providers/openai/provider.ts index 065e6335..93c8b348 100644 --- a/packages/core/src/providers/openai/provider.ts +++ b/packages/core/src/providers/openai/provider.ts @@ -111,6 +111,19 @@ export class OpenAIProvider extends BaseProvider { { model: incompleteModel, phase: incompletePhase } ); } + + throw new ErodeError( + 'OpenAI returned an incomplete response', + ErrorCode.PROVIDER_INVALID_RESPONSE, + 'The OpenAI response was incomplete for an unknown provider reason. Try again or tune the provider output budget.', + { + model: incompleteModel, + phase: incompletePhase, + reason: response.incomplete_details?.reason, + maxOutputTokens: incompleteMaxOutputTokens, + outputSize: incompleteGenerationProfile.outputSize, + } + ); } function extractText(response: OpenAI.Responses.Response): string { @@ -127,7 +140,15 @@ export class OpenAIProvider extends BaseProvider { } function supportsReasoningEffort(reasoningModel: string): boolean { - return ['gpt-5', 'o1', 'o3', 'o4'].some((prefix) => { + if (reasoningModel.includes('chat')) { + return false; + } + + if (reasoningModel.startsWith('gpt-5')) { + return true; + } + + return ['o1', 'o3', 'o4'].some((prefix) => { return reasoningModel === prefix || reasoningModel.startsWith(`${prefix}-`); }); } From 3be29e234eb913aeffe4082cfee6c65b9ee1c3ca Mon Sep 17 00:00:00 2001 From: Anders Hassis Date: Wed, 6 May 2026 07:33:31 +0200 Subject: [PATCH 10/12] fix(core): unwrap fenced model patch responses --- packages/core/src/providers/base-provider.ts | 21 ++++++++++++++++--- .../openai/__tests__/provider.test.ts | 12 +++++++++++ 2 files changed, 30 insertions(+), 3 deletions(-) diff --git a/packages/core/src/providers/base-provider.ts b/packages/core/src/providers/base-provider.ts index c787c498..35ba2e43 100644 --- a/packages/core/src/providers/base-provider.ts +++ b/packages/core/src/providers/base-provider.ts @@ -155,9 +155,11 @@ export abstract class BaseProvider implements AIProvider { retries: 2, shouldRetry: (error) => this.isRetryableError(error), } - ).finally(() => { - debugLog('patchModel completed in', formatDuration(startedAt)); - }); + ) + .then((response) => unwrapModelPatchResponse(response)) + .finally(() => { + debugLog('patchModel completed in', formatDuration(startedAt)); + }); } private async executeStage(config: { @@ -203,3 +205,16 @@ export abstract class BaseProvider implements AIProvider { return error instanceof ApiError && (error.isRateLimited || error.isTimeout); } } + +function unwrapModelPatchResponse(response: string): string { + const trimmed = response.trim(); + const lines = trimmed.split(/\r?\n/); + const firstLine = lines[0]; + const lastLine = lines.at(-1); + + if (firstLine?.startsWith('```') && lastLine === '```') { + return lines.slice(1, -1).join('\n'); + } + + return response; +} diff --git a/packages/core/src/providers/openai/__tests__/provider.test.ts b/packages/core/src/providers/openai/__tests__/provider.test.ts index 47bd79c8..a062f204 100644 --- a/packages/core/src/providers/openai/__tests__/provider.test.ts +++ b/packages/core/src/providers/openai/__tests__/provider.test.ts @@ -457,6 +457,18 @@ describe('OpenAIProvider', () => { expect(result).toBe(patchedContent); }); + it('should unwrap markdown fences from patched content', async () => { + const patchedContent = 'model {\n comp.a -> comp.b\n}'; + mockCreate.mockResolvedValueOnce( + makeOpenAIResponse(`\`\`\`likec4\n${patchedContent}\n\`\`\``) + ); + + const provider = createProvider(); + const result = await provider.patchModel('model {\n}\n', [' comp.a -> comp.b'], 'likec4'); + + expect(result).toBe(patchedContent); + }); + it('should retry on rate limit', async () => { const patchedContent = 'model {\n comp.a -> comp.b\n}\n'; const rateLimitError = new ApiError('Rate limited', 429); From 936ad1c60018f47fcae83b4b5f221068158f50bb Mon Sep 17 00:00:00 2001 From: Anders Hassis Date: Wed, 6 May 2026 07:52:02 +0200 Subject: [PATCH 11/12] fix: address review comments and web lint --- .../__tests__/generation-profile.test.ts | 14 ++++++++++ .../core/src/providers/anthropic/provider.ts | 17 +++++------- .../gemini/__tests__/provider.test.ts | 23 ++++++++++++++++ .../core/src/providers/gemini/provider.ts | 27 +++++++++---------- .../core/src/providers/generation-profile.ts | 12 +++++++++ .../openai/__tests__/provider.test.ts | 8 +++--- .../core/src/providers/openai/provider.ts | 22 +++++++-------- packages/web/package.json | 2 +- 8 files changed, 84 insertions(+), 41 deletions(-) diff --git a/packages/core/src/providers/__tests__/generation-profile.test.ts b/packages/core/src/providers/__tests__/generation-profile.test.ts index c85a96aa..b3268e25 100644 --- a/packages/core/src/providers/__tests__/generation-profile.test.ts +++ b/packages/core/src/providers/__tests__/generation-profile.test.ts @@ -3,6 +3,7 @@ import { AnalysisPhase } from '../analysis-phase.js'; import { getGenerationProfileForModelPatch, getGenerationProfileForPhase, + resolveOutputTokenLimit, } from '../generation-profile.js'; describe('getGenerationProfileForPhase', () => { @@ -40,4 +41,17 @@ describe('getGenerationProfileForPhase', () => { }); expect(profile.outputContentHint?.characters).toBeGreaterThan(16_384); }); + + it('resolves output token limits from profile size and content hints', () => { + expect( + resolveOutputTokenLimit( + { outputSize: 'medium', outputContentHint: { characters: 40_000 } }, + { small: 600, medium: 1500, large: 3000 } + ) + ).toBe(10_000); + + expect( + resolveOutputTokenLimit({ outputSize: 'medium' }, { small: 600, medium: 1500, large: 3000 }) + ).toBe(1500); + }); }); diff --git a/packages/core/src/providers/anthropic/provider.ts b/packages/core/src/providers/anthropic/provider.ts index df78d918..75cd68db 100644 --- a/packages/core/src/providers/anthropic/provider.ts +++ b/packages/core/src/providers/anthropic/provider.ts @@ -3,7 +3,11 @@ import { BaseProvider, type ProviderConfig } from '../base-provider.js'; import { ApiError, ErodeError, ErrorCode } from '../../errors.js'; import { ENV_VAR_NAMES, RC_FILENAME } from '../../utils/config.js'; import type { AnalysisPhase } from '../analysis-phase.js'; -import type { GenerationProfile, OutputSize } from '../generation-profile.js'; +import { + resolveOutputTokenLimit, + type GenerationProfile, + type OutputSize, +} from '../generation-profile.js'; import { ANTHROPIC_MODELS } from './models.js'; const MAX_TOKENS_BY_OUTPUT_SIZE = { @@ -36,7 +40,7 @@ export class AnthropicProvider extends BaseProvider { phase: AnalysisPhase, generationProfile: GenerationProfile ): Promise { - const outputTokenLimit = getOutputTokenLimit(generationProfile); + const outputTokenLimit = resolveOutputTokenLimit(generationProfile, MAX_TOKENS_BY_OUTPUT_SIZE); try { const response = await this.client.messages.create({ @@ -82,14 +86,5 @@ export class AnthropicProvider extends BaseProvider { } throw ApiError.fromAnthropicError(error); } - - function getOutputTokenLimit(profile: GenerationProfile): number { - const profileLimit = MAX_TOKENS_BY_OUTPUT_SIZE[profile.outputSize]; - const hintedLimit = profile.outputContentHint - ? Math.ceil(profile.outputContentHint.characters / 4) - : 0; - - return Math.max(profileLimit, hintedLimit); - } } } diff --git a/packages/core/src/providers/gemini/__tests__/provider.test.ts b/packages/core/src/providers/gemini/__tests__/provider.test.ts index 79df5105..9ba8bf41 100644 --- a/packages/core/src/providers/gemini/__tests__/provider.test.ts +++ b/packages/core/src/providers/gemini/__tests__/provider.test.ts @@ -332,6 +332,29 @@ describe('GeminiProvider', () => { }) ); }); + + it('should preserve medium thinkingLevel for Gemini 3 point-release Pro models', async () => { + const patchedContent = 'model {\n comp.a -> comp.b\n}\n'; + mockGenerateContent.mockResolvedValueOnce({ + text: patchedContent, + candidates: [{ finishReason: 'STOP' }], + usageMetadata: { promptTokenCount: 100, candidatesTokenCount: 20 }, + }); + + const provider = new GeminiProvider({ + apiKey: 'test-api-key', + fastModel: 'gemini-3.1-pro-preview', + advancedModel: 'gemini-3.1-pro-preview', + }); + await provider.patchModel('model {\n}\n', [' comp.a -> comp.b'], 'likec4'); + + expect(mockGenerateContent).toHaveBeenCalledWith( + expect.objectContaining({ + model: 'gemini-3.1-pro-preview', + config: { maxOutputTokens: 4096, thinkingConfig: { thinkingLevel: 'MEDIUM' } }, + }) + ); + }); }); describe('safety filter handling', () => { diff --git a/packages/core/src/providers/gemini/provider.ts b/packages/core/src/providers/gemini/provider.ts index edb569bf..83294439 100644 --- a/packages/core/src/providers/gemini/provider.ts +++ b/packages/core/src/providers/gemini/provider.ts @@ -3,7 +3,12 @@ import { BaseProvider, type ProviderConfig } from '../base-provider.js'; import { ApiError, ErodeError, ErrorCode } from '../../errors.js'; import { ENV_VAR_NAMES, RC_FILENAME } from '../../utils/config.js'; import type { AnalysisPhase } from '../analysis-phase.js'; -import type { GenerationProfile, OutputSize, ReasoningEffort } from '../generation-profile.js'; +import { + resolveOutputTokenLimit, + type GenerationProfile, + type OutputSize, + type ReasoningEffort, +} from '../generation-profile.js'; import { GEMINI_MODELS } from './models.js'; const MAX_OUTPUT_TOKENS_BY_OUTPUT_SIZE = { @@ -36,7 +41,10 @@ export class GeminiProvider extends BaseProvider { phase: AnalysisPhase, generationProfile: GenerationProfile ): Promise { - const maxOutputTokens = getMaxOutputTokens(generationProfile); + const maxOutputTokens = resolveOutputTokenLimit( + generationProfile, + MAX_OUTPUT_TOKENS_BY_OUTPUT_SIZE + ); const thinkingConfig = getThinkingConfig(model, generationProfile.reasoningEffort); try { @@ -83,15 +91,6 @@ export class GeminiProvider extends BaseProvider { throw ApiError.fromGeminiError(error); } - function getMaxOutputTokens(profile: GenerationProfile): number { - const profileLimit = MAX_OUTPUT_TOKENS_BY_OUTPUT_SIZE[profile.outputSize]; - const hintedLimit = profile.outputContentHint - ? Math.ceil(profile.outputContentHint.characters / 4) - : 0; - - return Math.max(profileLimit, hintedLimit); - } - function getThinkingConfig( thinkingModel: string, reasoningIntent: ReasoningEffort | undefined @@ -135,7 +134,7 @@ export class GeminiProvider extends BaseProvider { case 'high': return { thinkingLevel: ThinkingLevel.HIGH }; case 'medium': - if (isGemini3ProModel(thinkingModel)) { + if (isLegacyGemini3ProModel(thinkingModel)) { return { thinkingLevel: ThinkingLevel.HIGH }; } return { thinkingLevel: ThinkingLevel.MEDIUM }; @@ -159,8 +158,8 @@ export class GeminiProvider extends BaseProvider { return thinkingModel.startsWith('gemini-3-') || thinkingModel.startsWith('gemini-3.'); } - function isGemini3ProModel(thinkingModel: string): boolean { - return thinkingModel.startsWith('gemini-3-pro') || /^gemini-3\.[^-]+-pro/.test(thinkingModel); + function isLegacyGemini3ProModel(thinkingModel: string): boolean { + return thinkingModel.startsWith('gemini-3-pro'); } } } diff --git a/packages/core/src/providers/generation-profile.ts b/packages/core/src/providers/generation-profile.ts index 06c95535..56b93e0c 100644 --- a/packages/core/src/providers/generation-profile.ts +++ b/packages/core/src/providers/generation-profile.ts @@ -41,3 +41,15 @@ export function getGenerationProfileForModelPatch( }, }; } + +export function resolveOutputTokenLimit( + profile: GenerationProfile, + sizeTable: Record +): number { + const profileLimit = sizeTable[profile.outputSize]; + const hintedLimit = profile.outputContentHint + ? Math.ceil(profile.outputContentHint.characters / 4) + : 0; + + return Math.max(profileLimit, hintedLimit); +} diff --git a/packages/core/src/providers/openai/__tests__/provider.test.ts b/packages/core/src/providers/openai/__tests__/provider.test.ts index a062f204..4306c65b 100644 --- a/packages/core/src/providers/openai/__tests__/provider.test.ts +++ b/packages/core/src/providers/openai/__tests__/provider.test.ts @@ -144,7 +144,7 @@ describe('OpenAIProvider', () => { expect.objectContaining({ model: 'gpt-5-mini', max_output_tokens: 1500, - reasoning: { effort: 'minimal' }, + reasoning: { effort: 'low' }, }) ); }); @@ -175,7 +175,7 @@ describe('OpenAIProvider', () => { expect(mockCreate).toHaveBeenCalledWith( expect.objectContaining({ - reasoning: { effort: 'minimal' }, + reasoning: { effort: 'low' }, }) ); }); @@ -228,7 +228,7 @@ describe('OpenAIProvider', () => { expect.objectContaining({ model: 'gpt-5-mini', max_output_tokens: 1500, - reasoning: { effort: 'minimal' }, + reasoning: { effort: 'low' }, }) ); }); @@ -274,7 +274,7 @@ describe('OpenAIProvider', () => { expect.objectContaining({ model: 'gpt-5', max_output_tokens: 6000, - reasoning: { effort: 'minimal' }, + reasoning: { effort: 'low' }, }) ); }); diff --git a/packages/core/src/providers/openai/provider.ts b/packages/core/src/providers/openai/provider.ts index 93c8b348..b2984a64 100644 --- a/packages/core/src/providers/openai/provider.ts +++ b/packages/core/src/providers/openai/provider.ts @@ -3,7 +3,12 @@ import { BaseProvider, type ProviderConfig } from '../base-provider.js'; import { ErodeError, ErrorCode, ApiError } from '../../errors.js'; import { ENV_VAR_NAMES, RC_FILENAME } from '../../utils/config.js'; import type { AnalysisPhase } from '../analysis-phase.js'; -import type { GenerationProfile, OutputSize, ReasoningEffort } from '../generation-profile.js'; +import { + resolveOutputTokenLimit, + type GenerationProfile, + type OutputSize, + type ReasoningEffort, +} from '../generation-profile.js'; import { OPENAI_MODELS } from './models.js'; type OpenAIReasoningEffort = 'minimal' | 'low' | 'medium' | 'high'; @@ -38,7 +43,10 @@ export class OpenAIProvider extends BaseProvider { phase: AnalysisPhase, generationProfile: GenerationProfile ): Promise { - const maxOutputTokens = getMaxOutputTokens(generationProfile); + const maxOutputTokens = resolveOutputTokenLimit( + generationProfile, + MAX_OUTPUT_TOKENS_BY_OUTPUT_SIZE + ); const reasoningEffort = getOpenAIReasoningEffort(generationProfile.reasoningEffort); try { @@ -162,20 +170,12 @@ export class OpenAIProvider extends BaseProvider { case 'medium': return 'medium'; case 'low': + return 'low'; case undefined: return 'minimal'; default: return 'minimal'; } } - - function getMaxOutputTokens(profile: GenerationProfile): number { - const profileLimit = MAX_OUTPUT_TOKENS_BY_OUTPUT_SIZE[profile.outputSize]; - const hintedLimit = profile.outputContentHint - ? Math.ceil(profile.outputContentHint.characters / 4) - : 0; - - return Math.max(profileLimit, hintedLimit); - } } } diff --git a/packages/web/package.json b/packages/web/package.json index 01d54a20..1e592fbe 100644 --- a/packages/web/package.json +++ b/packages/web/package.json @@ -12,7 +12,7 @@ "preview": "astro preview", "typecheck": "npx tsx scripts/generate-agent-skills.ts --check && astro check --minimumFailingSeverity hint", "check:ci": "npm run lint && npm run typecheck && astro build", - "lint": "eslint . --report-unused-disable-directives --max-warnings 0" + "lint": "astro sync && eslint . --report-unused-disable-directives --max-warnings 0" }, "dependencies": { "@astrojs/starlight": "^0.38.4", From 4271caff7abcdb054742717d062bd35d6dfe0100 Mon Sep 17 00:00:00 2001 From: Anders Hassis Date: Wed, 6 May 2026 08:03:47 +0200 Subject: [PATCH 12/12] fix(core): restore drift analysis output headroom --- .../core/src/providers/__tests__/generation-profile.test.ts | 4 ++-- .../core/src/providers/anthropic/__tests__/provider.test.ts | 2 +- packages/core/src/providers/anthropic/provider.ts | 2 +- packages/core/src/providers/gemini/__tests__/provider.test.ts | 2 +- packages/core/src/providers/generation-profile.ts | 2 +- packages/core/src/providers/openai/__tests__/provider.test.ts | 2 +- 6 files changed, 7 insertions(+), 7 deletions(-) diff --git a/packages/core/src/providers/__tests__/generation-profile.test.ts b/packages/core/src/providers/__tests__/generation-profile.test.ts index b3268e25..d9700df3 100644 --- a/packages/core/src/providers/__tests__/generation-profile.test.ts +++ b/packages/core/src/providers/__tests__/generation-profile.test.ts @@ -18,9 +18,9 @@ describe('getGenerationProfileForPhase', () => { }); }); - it('uses medium low-effort generation for drift analysis', () => { + it('uses large low-effort generation for drift analysis', () => { expect(getGenerationProfileForPhase(AnalysisPhase.CHANGE_ANALYSIS)).toEqual({ - outputSize: 'medium', + outputSize: 'large', reasoningEffort: 'low', }); }); diff --git a/packages/core/src/providers/anthropic/__tests__/provider.test.ts b/packages/core/src/providers/anthropic/__tests__/provider.test.ts index 3e118533..e471d7cf 100644 --- a/packages/core/src/providers/anthropic/__tests__/provider.test.ts +++ b/packages/core/src/providers/anthropic/__tests__/provider.test.ts @@ -223,7 +223,7 @@ describe('AnthropicProvider', () => { expect(mockCreate).toHaveBeenCalledWith( expect.objectContaining({ model: 'claude-sonnet-4-6', - max_tokens: 1500, + max_tokens: 8192, }) ); }); diff --git a/packages/core/src/providers/anthropic/provider.ts b/packages/core/src/providers/anthropic/provider.ts index 75cd68db..81b636a2 100644 --- a/packages/core/src/providers/anthropic/provider.ts +++ b/packages/core/src/providers/anthropic/provider.ts @@ -13,7 +13,7 @@ import { ANTHROPIC_MODELS } from './models.js'; const MAX_TOKENS_BY_OUTPUT_SIZE = { small: 600, medium: 1500, - large: 3000, + large: 8192, } satisfies Record; export class AnthropicProvider extends BaseProvider { diff --git a/packages/core/src/providers/gemini/__tests__/provider.test.ts b/packages/core/src/providers/gemini/__tests__/provider.test.ts index 9ba8bf41..14099571 100644 --- a/packages/core/src/providers/gemini/__tests__/provider.test.ts +++ b/packages/core/src/providers/gemini/__tests__/provider.test.ts @@ -244,7 +244,7 @@ describe('GeminiProvider', () => { expect(mockGenerateContent).toHaveBeenCalledWith( expect.objectContaining({ model: 'gemini-2.5-pro', - config: { maxOutputTokens: 3000, thinkingConfig: { thinkingBudget: -1 } }, + config: { maxOutputTokens: 6000, thinkingConfig: { thinkingBudget: -1 } }, }) ); }); diff --git a/packages/core/src/providers/generation-profile.ts b/packages/core/src/providers/generation-profile.ts index 56b93e0c..8c521034 100644 --- a/packages/core/src/providers/generation-profile.ts +++ b/packages/core/src/providers/generation-profile.ts @@ -19,7 +19,7 @@ export function getGenerationProfileForPhase(phase: AnalysisPhase): GenerationPr case AnalysisPhase.MODEL_UPDATE: return { outputSize: 'medium', reasoningEffort: 'medium' }; case AnalysisPhase.CHANGE_ANALYSIS: - return { outputSize: 'medium', reasoningEffort: 'low' }; + return { outputSize: 'large', reasoningEffort: 'low' }; default: return { outputSize: 'small', reasoningEffort: 'low' }; } diff --git a/packages/core/src/providers/openai/__tests__/provider.test.ts b/packages/core/src/providers/openai/__tests__/provider.test.ts index 4306c65b..754f2d97 100644 --- a/packages/core/src/providers/openai/__tests__/provider.test.ts +++ b/packages/core/src/providers/openai/__tests__/provider.test.ts @@ -273,7 +273,7 @@ describe('OpenAIProvider', () => { expect(mockCreate).toHaveBeenCalledWith( expect.objectContaining({ model: 'gpt-5', - max_output_tokens: 6000, + max_output_tokens: 10000, reasoning: { effort: 'low' }, }) );