From ee87008bbd6f83ed74642470c1a831a45c853ea8 Mon Sep 17 00:00:00 2001 From: Cali93 <32299095+Cali93@users.noreply.github.com> Date: Mon, 9 Mar 2026 12:58:36 +0200 Subject: [PATCH 1/4] feat(auto-review): add AI-powered auto-approve capability Add opt-in auto-approve that uses Claude to evaluate whether a PR should be approved based on a repo-specific scope prompt. After the review completes, the evaluation considers the diff, changed files, and extracted findings to make an approve/reject decision. New inputs: - auto_approve: enable the feature (default: false) - auto_approve_app_id: GitHub App ID for generating approval token - auto_approve_private_key: GitHub App private key - auto_approve_scope_prompt: repo-specific criteria for Claude Co-Authored-By: Claude Opus 4.6 --- claude/auto-review/action.yml | 64 +++++ .../__tests__/auto-approve-evaluation.test.js | 264 ++++++++++++++++++ .../scripts/auto-approve-evaluation.js | 235 ++++++++++++++++ 3 files changed, 563 insertions(+) create mode 100644 claude/auto-review/scripts/__tests__/auto-approve-evaluation.test.js create mode 100644 claude/auto-review/scripts/auto-approve-evaluation.js diff --git a/claude/auto-review/action.yml b/claude/auto-review/action.yml index aa3cc8a..49924b0 100644 --- a/claude/auto-review/action.yml +++ b/claude/auto-review/action.yml @@ -35,6 +35,19 @@ inputs: description: "Force data classification agent regardless of heuristic" required: false default: "false" + auto_approve: + description: "Enable AI-powered auto-approval after review. When enabled, Claude evaluates the diff and review findings against the scope_prompt to decide whether to approve." + required: false + default: "false" + auto_approve_app_id: + description: "GitHub App ID used to generate a token for PR approval. Required when auto_approve is true." + required: false + auto_approve_private_key: + description: "GitHub App private key used to generate a token for PR approval. Required when auto_approve is true." + required: false + auto_approve_scope_prompt: + description: "Instructions that tell Claude when to approve or reject. Provide repo-specific criteria (e.g. 'Only approve Terraform changes that do not destroy databases or remove resources')." + required: false runs: using: "composite" @@ -427,3 +440,54 @@ runs: chmod +x "$SCRIPT_PATH" node "$SCRIPT_PATH" + + # ── Auto-approve (opt-in) ────────────────────────────────────────────── + - name: Evaluate auto-approve + if: inputs.auto_approve == 'true' + shell: bash + env: + GH_TOKEN: ${{ github.token }} + GITHUB_TOKEN: ${{ github.token }} + ANTHROPIC_API_KEY: ${{ inputs.anthropic_api_key }} + AUTO_APPROVE_MODEL: ${{ inputs.model }} + AUTO_APPROVE_SCOPE_PROMPT: ${{ inputs.auto_approve_scope_prompt }} + run: | + SCRIPT_PATH="${{ github.action_path }}/scripts/auto-approve-evaluation.js" + + if [[ ! -f "$SCRIPT_PATH" ]]; then + echo "::error::Auto-approve evaluation script not found at $SCRIPT_PATH" + exit 1 + fi + + chmod +x "$SCRIPT_PATH" + RESULT=$(node "$SCRIPT_PATH") + + APPROVED=$(echo "$RESULT" | jq -r '.approved') + REASON=$(echo "$RESULT" | jq -r '.reason') + + echo "AUTO_APPROVE_DECISION=$APPROVED" >> $GITHUB_ENV + echo "Auto-approve decision: approved=$APPROVED reason=\"$REASON\"" + + # Store reason for the approval body (multiline-safe) + echo "AUTO_APPROVE_REASON<> $GITHUB_ENV + echo "$REASON" >> $GITHUB_ENV + echo "EOF" >> $GITHUB_ENV + + - name: Generate App Token for approval + if: inputs.auto_approve == 'true' && env.AUTO_APPROVE_DECISION == 'true' + id: approve-app-token + uses: actions/create-github-app-token@21cfef757ad23c4e5c14bcfa4b9ca7e524e1d1d3 # v1 + with: + app-id: ${{ inputs.auto_approve_app_id }} + private-key: ${{ inputs.auto_approve_private_key }} + + - name: Approve PR + if: inputs.auto_approve == 'true' && env.AUTO_APPROVE_DECISION == 'true' + shell: bash + env: + GH_TOKEN: ${{ steps.approve-app-token.outputs.token }} + run: | + PR_NUMBER=${{ github.event.pull_request.number || github.event.issue.number }} + gh pr review "$PR_NUMBER" --approve --body "✅ **Auto-approved by Claude** + + ${{ env.AUTO_APPROVE_REASON }}" diff --git a/claude/auto-review/scripts/__tests__/auto-approve-evaluation.test.js b/claude/auto-review/scripts/__tests__/auto-approve-evaluation.test.js new file mode 100644 index 0000000..ca62321 --- /dev/null +++ b/claude/auto-review/scripts/__tests__/auto-approve-evaluation.test.js @@ -0,0 +1,264 @@ +import { describe, it, expect, vi, beforeEach, afterEach } from "vitest"; + +// Mock github-utils before importing the module under test +vi.mock("../lib/github-utils.js", async () => { + const actual = await vi.importActual("../lib/github-utils.js"); + return { + ...actual, + loadGitHubContext: vi.fn(() => ({ + repo: { owner: "test-org", repo: "test-repo" }, + issue: { number: 42 }, + payload: {}, + })), + createLogger: () => ({ + log: vi.fn(), + error: vi.fn(), + }), + }; +}); + +// Mock child_process +vi.mock("child_process", () => ({ + spawnSync: vi.fn(), +})); + +// Mock fs +vi.mock("fs", async () => { + const actual = await vi.importActual("fs"); + return { + ...actual, + default: { + ...actual, + existsSync: vi.fn(), + readFileSync: vi.fn(), + }, + existsSync: vi.fn(), + readFileSync: vi.fn(), + }; +}); + +import { spawnSync } from "child_process"; +import fs from "fs"; + +describe("auto-approve-evaluation", () => { + let originalEnv; + let fetchMock; + + beforeEach(() => { + originalEnv = { ...process.env }; + process.env.ANTHROPIC_API_KEY = "test-key"; + process.env.AUTO_APPROVE_MODEL = "claude-sonnet-4-6"; + process.env.AUTO_APPROVE_SCOPE_PROMPT = + "Only approve terraform changes that do not destroy resources."; + + // Mock gh pr diff + spawnSync.mockImplementation((cmd, args) => { + if (args?.includes("--name-only")) { + return { + status: 0, + stdout: "infrastructure/main.tf\n", + stderr: "", + }; + } + return { + status: 0, + stdout: + '--- a/infrastructure/main.tf\n+++ b/infrastructure/main.tf\n@@ -1 +1 @@\n-old\n+new\n', + stderr: "", + }; + }); + + // Mock fs + fs.existsSync.mockReturnValue(false); + fs.readFileSync.mockReturnValue("[]"); + + // Mock global fetch + fetchMock = vi.fn(); + global.fetch = fetchMock; + }); + + afterEach(() => { + process.env = originalEnv; + vi.restoreAllMocks(); + delete global.fetch; + }); + + it("should approve when Claude says approved", async () => { + fetchMock.mockResolvedValue({ + ok: true, + json: () => + Promise.resolve({ + content: [ + { + text: '{"approved": true, "reason": "Safe terraform variable change"}', + }, + ], + }), + }); + + const { main } = await import("../auto-approve-evaluation.js"); + + const logSpy = vi.spyOn(console, "log").mockImplementation(() => {}); + await main(); + + const output = logSpy.mock.calls.find((call) => { + try { + const parsed = JSON.parse(call[0]); + return "approved" in parsed; + } catch { + return false; + } + }); + + expect(output).toBeDefined(); + const result = JSON.parse(output[0]); + expect(result.approved).toBe(true); + expect(result.reason).toBe("Safe terraform variable change"); + + logSpy.mockRestore(); + }); + + it("should reject when Claude says not approved", async () => { + fetchMock.mockResolvedValue({ + ok: true, + json: () => + Promise.resolve({ + content: [ + { + text: '{"approved": false, "reason": "Destructive resource deletion detected"}', + }, + ], + }), + }); + + const { main } = await import("../auto-approve-evaluation.js"); + + const logSpy = vi.spyOn(console, "log").mockImplementation(() => {}); + await main(); + + const output = logSpy.mock.calls.find((call) => { + try { + const parsed = JSON.parse(call[0]); + return "approved" in parsed; + } catch { + return false; + } + }); + + expect(output).toBeDefined(); + const result = JSON.parse(output[0]); + expect(result.approved).toBe(false); + + logSpy.mockRestore(); + }); + + it("should reject when Anthropic API returns an error", async () => { + fetchMock.mockResolvedValue({ + ok: false, + status: 500, + text: () => Promise.resolve("Internal server error"), + }); + + const { main } = await import("../auto-approve-evaluation.js"); + + const logSpy = vi.spyOn(console, "log").mockImplementation(() => {}); + // main() throws on API errors — the top-level guard in the file + // catches this and outputs a reject JSON, but since we call main() + // directly we need to handle the throw ourselves. + await expect(main()).rejects.toThrow("Anthropic API 500"); + + logSpy.mockRestore(); + }); + + it("should reject when Claude response is unparseable", async () => { + fetchMock.mockResolvedValue({ + ok: true, + json: () => + Promise.resolve({ + content: [{ text: "I think this looks good but I'm not sure" }], + }), + }); + + const { main } = await import("../auto-approve-evaluation.js"); + + const logSpy = vi.spyOn(console, "log").mockImplementation(() => {}); + await main(); + + const output = logSpy.mock.calls.find((call) => { + try { + const parsed = JSON.parse(call[0]); + return "approved" in parsed; + } catch { + return false; + } + }); + + expect(output).toBeDefined(); + const result = JSON.parse(output[0]); + expect(result.approved).toBe(false); + expect(result.reason).toContain("Failed to parse"); + + logSpy.mockRestore(); + }); + + it("should include findings from findings.json when available", async () => { + const findings = [ + { + id: "test-finding-1", + severity: "CRITICAL", + description: "SQL injection", + }, + ]; + + fs.existsSync.mockReturnValue(true); + fs.readFileSync.mockReturnValue(JSON.stringify(findings)); + + fetchMock.mockResolvedValue({ + ok: true, + json: () => + Promise.resolve({ + content: [ + { + text: '{"approved": false, "reason": "CRITICAL finding present"}', + }, + ], + }), + }); + + const { main } = await import("../auto-approve-evaluation.js"); + + const logSpy = vi.spyOn(console, "log").mockImplementation(() => {}); + await main(); + + // Verify the API was called with findings in the message + const fetchCall = fetchMock.mock.calls[0]; + const body = JSON.parse(fetchCall[1].body); + const userMessage = body.messages[0].content; + expect(userMessage).toContain("SQL injection"); + + logSpy.mockRestore(); + }); + + it("should send scope prompt in system message", async () => { + fetchMock.mockResolvedValue({ + ok: true, + json: () => + Promise.resolve({ + content: [{ text: '{"approved": true, "reason": "Looks good"}' }], + }), + }); + + const { main } = await import("../auto-approve-evaluation.js"); + + const logSpy = vi.spyOn(console, "log").mockImplementation(() => {}); + await main(); + + const fetchCall = fetchMock.mock.calls[0]; + const body = JSON.parse(fetchCall[1].body); + expect(body.system).toContain( + "Only approve terraform changes that do not destroy resources." + ); + + logSpy.mockRestore(); + }); +}); diff --git a/claude/auto-review/scripts/auto-approve-evaluation.js b/claude/auto-review/scripts/auto-approve-evaluation.js new file mode 100644 index 0000000..faa642b --- /dev/null +++ b/claude/auto-review/scripts/auto-approve-evaluation.js @@ -0,0 +1,235 @@ +#!/usr/bin/env node + +/** + * AI-powered auto-approve evaluation. + * + * Calls the Anthropic API with the PR diff, review findings, and a + * repo-specific scope prompt. Claude decides whether to approve or reject. + * + * Environment variables: + * ANTHROPIC_API_KEY – Anthropic API key + * AUTO_APPROVE_MODEL – Model to use (default: claude-sonnet-4-6) + * AUTO_APPROVE_SCOPE_PROMPT – Repo-specific approval criteria + * + * Outputs a JSON object to stdout: { approved: boolean, reason: string } + */ + +import fs from "fs"; +import { spawnSync } from "child_process"; +import { loadGitHubContext, createLogger } from "./lib/github-utils.js"; + +const logger = createLogger("auto-approve-evaluation.js"); + +/** + * Fetch the PR diff via GitHub CLI. + * @param {number} prNumber + * @returns {string} + */ +function fetchDiff(prNumber) { + const result = spawnSync("gh", ["pr", "diff", String(prNumber)], { + encoding: "utf8", + stdio: ["pipe", "pipe", "pipe"], + maxBuffer: 10 * 1024 * 1024, + }); + if (result.status !== 0) { + throw new Error(`gh pr diff failed: ${result.stderr?.trim()}`); + } + return result.stdout; +} + +/** + * Fetch the list of changed file names. + * @param {number} prNumber + * @returns {string} + */ +function fetchChangedFiles(prNumber) { + const result = spawnSync( + "gh", + ["pr", "diff", String(prNumber), "--name-only"], + { + encoding: "utf8", + stdio: ["pipe", "pipe", "pipe"], + } + ); + if (result.status !== 0) { + throw new Error(`gh pr diff --name-only failed: ${result.stderr?.trim()}`); + } + return result.stdout.trim(); +} + +/** + * Call the Anthropic Messages API. + * @param {object} params + * @param {string} params.apiKey + * @param {string} params.model + * @param {string} params.system + * @param {string} params.userMessage + * @returns {Promise} assistant text + */ +async function callAnthropic({ apiKey, model, system, userMessage }) { + const response = await fetch("https://api.anthropic.com/v1/messages", { + method: "POST", + headers: { + "Content-Type": "application/json", + "x-api-key": apiKey, + "anthropic-version": "2023-06-01", + }, + body: JSON.stringify({ + model, + max_tokens: 1024, + system, + messages: [{ role: "user", content: userMessage }], + }), + }); + + if (!response.ok) { + const body = await response.text(); + throw new Error(`Anthropic API ${response.status}: ${body}`); + } + + const data = await response.json(); + const text = data.content?.[0]?.text; + if (!text) { + throw new Error("Empty response from Anthropic API"); + } + return text; +} + +/** + * Build the system prompt for the evaluation. + */ +function buildSystemPrompt(scopePrompt) { + return `You are a pull request auto-approve evaluator. Your job is to decide whether a PR should be automatically approved based on the provided criteria. + +## Approval Scope & Criteria +${scopePrompt || "No specific scope prompt was provided. Default to conservative: only approve trivial, clearly safe changes."} + +## Instructions +1. Review the diff and any findings from the code review. +2. Apply the approval criteria strictly. +3. If there are CRITICAL or HIGH severity findings from the review, you should NOT approve. +4. When in doubt, do NOT approve — a human reviewer can always approve manually. + +Respond with ONLY a JSON object (no markdown fences, no extra text): +{"approved": true or false, "reason": "Brief explanation of your decision"}`; +} + +/** + * Build the user message with diff, findings, and file list. + */ +function buildUserMessage({ diff, changedFiles, findings }) { + // Truncate diff if very large to stay within context limits + const maxDiffLen = 100_000; + const truncatedDiff = + diff.length > maxDiffLen + ? diff.slice(0, maxDiffLen) + "\n\n... [diff truncated]" + : diff; + + let message = `## Changed Files\n\`\`\`\n${changedFiles}\n\`\`\`\n\n`; + message += `## Diff\n\`\`\`diff\n${truncatedDiff}\n\`\`\`\n\n`; + + if (findings.length > 0) { + message += `## Review Findings\n\`\`\`json\n${JSON.stringify(findings, null, 2)}\n\`\`\`\n`; + } else { + message += `## Review Findings\nNo issues were found during the code review.\n`; + } + + return message; +} + +// ---- Main execution -------------------------------------------------------- + +export async function main() { + const apiKey = process.env.ANTHROPIC_API_KEY; + if (!apiKey) { + throw new Error("ANTHROPIC_API_KEY environment variable is required"); + } + + const model = process.env.AUTO_APPROVE_MODEL || "claude-sonnet-4-6"; + const scopePrompt = process.env.AUTO_APPROVE_SCOPE_PROMPT || ""; + + const context = loadGitHubContext(); + const prNumber = context.issue.number; + + if (!prNumber) { + logger.error("Could not determine PR number"); + console.log( + JSON.stringify({ approved: false, reason: "Could not determine PR number" }) + ); + return; + } + + logger.log(`Evaluating auto-approve for PR #${prNumber}...`); + + // Gather context + const diff = fetchDiff(prNumber); + const changedFiles = fetchChangedFiles(prNumber); + + let findings = []; + if (fs.existsSync("findings.json")) { + try { + findings = JSON.parse(fs.readFileSync("findings.json", "utf8")); + } catch { + logger.log("Could not parse findings.json, proceeding without findings"); + } + } + + // Call Claude + const system = buildSystemPrompt(scopePrompt); + const userMessage = buildUserMessage({ diff, changedFiles, findings }); + + logger.log(`Calling ${model} for auto-approve evaluation...`); + + const responseText = await callAnthropic({ + apiKey, + model, + system, + userMessage, + }); + + // Parse the response — handle potential markdown fences + let decision; + try { + const jsonMatch = responseText.match(/\{[\s\S]*\}/); + if (!jsonMatch) { + throw new Error("No JSON object found in response"); + } + decision = JSON.parse(jsonMatch[0]); + } catch (parseError) { + logger.error( + `Failed to parse Claude response: ${parseError.message}\nRaw response: ${responseText}` + ); + decision = { + approved: false, + reason: "Failed to parse evaluation response — defaulting to reject", + }; + } + + // Ensure proper shape + const result = { + approved: decision.approved === true, + reason: decision.reason || "No reason provided", + }; + + logger.log( + `Decision: ${result.approved ? "APPROVE" : "REJECT"} — ${result.reason}` + ); + + // Output to stdout for the calling step to capture + console.log(JSON.stringify(result)); +} + +if (import.meta.url === `file://${process.argv[1]}`) { + try { + await main(); + } catch (error) { + logger.error(`Error during auto-approve evaluation: ${error.message}`); + console.log( + JSON.stringify({ + approved: false, + reason: `Evaluation error: ${error.message}`, + }) + ); + process.exit(0); + } +} From 1c658d23d4ebe15c94bce7dd5ae1ef4d06353f8b Mon Sep 17 00:00:00 2001 From: Cali93 <32299095+Cali93@users.noreply.github.com> Date: Mon, 9 Mar 2026 13:05:45 +0200 Subject: [PATCH 2/4] docs(auto-review): add auto-approve setup and usage documentation Add inputs table entries, setup guide (GitHub App creation, private key, secrets), usage examples (terraform, docs-only, dependency bumps), and how-it-works explanation to the README. Co-Authored-By: Claude Opus 4.6 --- claude/auto-review/README.md | 74 ++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) diff --git a/claude/auto-review/README.md b/claude/auto-review/README.md index 1b7116a..676771c 100644 --- a/claude/auto-review/README.md +++ b/claude/auto-review/README.md @@ -94,6 +94,10 @@ jobs: | `comment_pr_findings` | ❌ | `true` | Automatically post inline PR comments for findings saved to `findings.json` | | `force_breaking_changes_agent` | ❌ | `false` | Force breaking changes subagent regardless of file heuristic | | `force_license_compliance_agent` | ❌ | `false` | Force license compliance agent regardless of heuristic | +| `auto_approve` | ❌ | `false` | Enable AI-powered auto-approval after review | +| `auto_approve_app_id` | When `auto_approve` is `true` | - | GitHub App ID used to generate a token for PR approval | +| `auto_approve_private_key` | When `auto_approve` is `true` | - | GitHub App private key for PR approval | +| `auto_approve_scope_prompt` | ❌ | - | Instructions telling Claude when to approve or reject. Provide repo-specific criteria | ## Usage Examples @@ -293,6 +297,76 @@ The auto-review action includes a specialized breaking changes subagent that is **Force override:** Set `force_breaking_changes_agent: "true"` to always spawn the agent regardless of heuristic. +### Auto-Approve (AI-Powered) + +The auto-approve feature lets Claude automatically approve PRs that pass review, using a repo-specific scope prompt to decide. This is useful for satisfying org-level "required approvals" rules on low-risk PRs (e.g. Terraform config changes, documentation updates). + +#### How It Works + +1. The normal auto-review runs (unchanged) +2. Findings are extracted into `findings.json` (unchanged) +3. Claude evaluates the diff, changed files, and review findings against your `auto_approve_scope_prompt` +4. If Claude decides the PR is safe, a GitHub App token is generated and the PR is approved +5. If Claude decides the PR is unsafe (or has CRITICAL/HIGH findings), approval is skipped + +#### Setup + +**1. Create a GitHub App** in your org (Settings → Developer settings → GitHub Apps → New GitHub App): + - **Name**: e.g. `Claude Reviewer` (must be unique across GitHub) + - **Homepage URL**: your org's GitHub URL (required field, any URL works) + - **Permissions**: Repository permissions → **Pull Requests → Read & Write** + - **Webhook**: uncheck "Active" (no webhook needed) + - **Installation**: "Only on this account" + +**2. Generate a private key**: on the App's settings page, scroll to "Private keys" → "Generate a private key". Save the downloaded `.pem` file. + +**3. Install the App** on the target repository (or all repositories): App settings → "Install App" → select your org → choose repositories. + +**4. Add repository secrets** (Settings → Secrets and variables → Actions → New repository secret): + - `CLAUDE_REVIEWER_APP_ID` — the App ID (visible on the App's "General" settings page) + - `CLAUDE_REVIEWER_PRIVATE_KEY` — the full contents of the `.pem` private key file + +#### Usage + +```yaml +- name: Claude Review + uses: WalletConnect/actions/claude/auto-review@master + with: + anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }} + auto_approve: "true" + auto_approve_app_id: ${{ secrets.CLAUDE_REVIEWER_APP_ID }} + auto_approve_private_key: ${{ secrets.CLAUDE_REVIEWER_PRIVATE_KEY }} + auto_approve_scope_prompt: | + Only approve Terraform infrastructure changes. + All changed files must be under infrastructure/, monitoring/, or .github/workflows/*terraform*. + If the PR includes any non-Terraform files, reject. + + Do NOT approve if: + - Resources are being destroyed or removed + - Database or storage resources are deleted + - force_destroy is enabled or prevent_destroy is removed + - Any change that could cause data loss + + Approve if changes are safe: new resources, variable updates, policy changes, monitoring config. +``` + +The scope prompt is fully customizable per repository. Other examples: + +```yaml +# Documentation-only auto-approve +auto_approve_scope_prompt: | + Only approve if every changed file is documentation (.md, .txt, .rst). + Reject if any code files are modified. +``` + +```yaml +# Dependency update auto-approve +auto_approve_scope_prompt: | + Only approve dependency version bumps (package.json, lockfiles). + Reject if any source code files are modified. + Reject if a dependency is added or removed (only version changes are safe). +``` + ## Best Practices ### 1. Provide Project Context From 81a257648f34fd300b203ea0d0d5d32057f7f5ba Mon Sep 17 00:00:00 2001 From: Cali93 <32299095+Cali93@users.noreply.github.com> Date: Mon, 9 Mar 2026 13:19:51 +0200 Subject: [PATCH 3/4] docs(auto-review): use org secrets instead of repo secrets Co-Authored-By: Claude Opus 4.6 --- claude/auto-review/README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/claude/auto-review/README.md b/claude/auto-review/README.md index 676771c..8d1e141 100644 --- a/claude/auto-review/README.md +++ b/claude/auto-review/README.md @@ -322,9 +322,10 @@ The auto-approve feature lets Claude automatically approve PRs that pass review, **3. Install the App** on the target repository (or all repositories): App settings → "Install App" → select your org → choose repositories. -**4. Add repository secrets** (Settings → Secrets and variables → Actions → New repository secret): +**4. Add org secrets** (Org Settings → Secrets and variables → Actions → New organization secret): - `CLAUDE_REVIEWER_APP_ID` — the App ID (visible on the App's "General" settings page) - `CLAUDE_REVIEWER_PRIVATE_KEY` — the full contents of the `.pem` private key file + - Set repository access to "All repositories" or select specific repos that need auto-approve #### Usage From d0b81551f2882a51926a73752238b64faa6f4c42 Mon Sep 17 00:00:00 2001 From: Cali93 <32299095+Cali93@users.noreply.github.com> Date: Mon, 9 Mar 2026 16:02:58 +0200 Subject: [PATCH 4/4] fix(auto-review): address auto-approve security and reliability issues - Fix stdout contamination: logger now writes to stderr so stdout contains only the final JSON result, preventing jq parse failures - Fix command injection: pass AUTO_APPROVE_REASON as an env var instead of interpolating it directly into the shell script via ${{ }} - Add input validation step: fail early with a clear error when auto_approve is true but app credentials are missing Co-Authored-By: Claude Opus 4.6 --- claude/auto-review/action.yml | 22 +++++- .../__tests__/auto-approve-evaluation.test.js | 73 +++++++++---------- .../scripts/auto-approve-evaluation.js | 11 ++- 3 files changed, 62 insertions(+), 44 deletions(-) diff --git a/claude/auto-review/action.yml b/claude/auto-review/action.yml index 49924b0..c00391f 100644 --- a/claude/auto-review/action.yml +++ b/claude/auto-review/action.yml @@ -442,6 +442,21 @@ runs: node "$SCRIPT_PATH" # ── Auto-approve (opt-in) ────────────────────────────────────────────── + - name: Validate auto-approve inputs + if: inputs.auto_approve == 'true' + shell: bash + run: | + MISSING="" + if [[ -z "$APP_ID" ]]; then MISSING="auto_approve_app_id"; fi + if [[ -z "$PRIVATE_KEY" ]]; then MISSING="${MISSING:+$MISSING, }auto_approve_private_key"; fi + if [[ -n "$MISSING" ]]; then + echo "::error::Missing required inputs for auto_approve: $MISSING" + exit 1 + fi + env: + APP_ID: ${{ inputs.auto_approve_app_id }} + PRIVATE_KEY: ${{ inputs.auto_approve_private_key }} + - name: Evaluate auto-approve if: inputs.auto_approve == 'true' shell: bash @@ -486,8 +501,7 @@ runs: shell: bash env: GH_TOKEN: ${{ steps.approve-app-token.outputs.token }} + APPROVE_REASON: ${{ env.AUTO_APPROVE_REASON }} run: | - PR_NUMBER=${{ github.event.pull_request.number || github.event.issue.number }} - gh pr review "$PR_NUMBER" --approve --body "✅ **Auto-approved by Claude** - - ${{ env.AUTO_APPROVE_REASON }}" + PR_NUMBER="${{ github.event.pull_request.number || github.event.issue.number }}" + gh pr review "$PR_NUMBER" --approve --body "$(printf '✅ **Auto-approved by Claude**\n\n%s' "$APPROVE_REASON")" diff --git a/claude/auto-review/scripts/__tests__/auto-approve-evaluation.test.js b/claude/auto-review/scripts/__tests__/auto-approve-evaluation.test.js index ca62321..f1aa2b1 100644 --- a/claude/auto-review/scripts/__tests__/auto-approve-evaluation.test.js +++ b/claude/auto-review/scripts/__tests__/auto-approve-evaluation.test.js @@ -10,10 +10,6 @@ vi.mock("../lib/github-utils.js", async () => { issue: { number: 42 }, payload: {}, })), - createLogger: () => ({ - log: vi.fn(), - error: vi.fn(), - }), }; }); @@ -43,6 +39,7 @@ import fs from "fs"; describe("auto-approve-evaluation", () => { let originalEnv; let fetchMock; + let stderrSpy; beforeEach(() => { originalEnv = { ...process.env }; @@ -75,6 +72,9 @@ describe("auto-approve-evaluation", () => { // Mock global fetch fetchMock = vi.fn(); global.fetch = fetchMock; + + // Silence stderr logging from the script + stderrSpy = vi.spyOn(process.stderr, "write").mockImplementation(() => true); }); afterEach(() => { @@ -101,17 +101,9 @@ describe("auto-approve-evaluation", () => { const logSpy = vi.spyOn(console, "log").mockImplementation(() => {}); await main(); - const output = logSpy.mock.calls.find((call) => { - try { - const parsed = JSON.parse(call[0]); - return "approved" in parsed; - } catch { - return false; - } - }); - - expect(output).toBeDefined(); - const result = JSON.parse(output[0]); + // stdout should contain only the final JSON (no log noise) + expect(logSpy).toHaveBeenCalledTimes(1); + const result = JSON.parse(logSpy.mock.calls[0][0]); expect(result.approved).toBe(true); expect(result.reason).toBe("Safe terraform variable change"); @@ -136,17 +128,8 @@ describe("auto-approve-evaluation", () => { const logSpy = vi.spyOn(console, "log").mockImplementation(() => {}); await main(); - const output = logSpy.mock.calls.find((call) => { - try { - const parsed = JSON.parse(call[0]); - return "approved" in parsed; - } catch { - return false; - } - }); - - expect(output).toBeDefined(); - const result = JSON.parse(output[0]); + expect(logSpy).toHaveBeenCalledTimes(1); + const result = JSON.parse(logSpy.mock.calls[0][0]); expect(result.approved).toBe(false); logSpy.mockRestore(); @@ -184,17 +167,8 @@ describe("auto-approve-evaluation", () => { const logSpy = vi.spyOn(console, "log").mockImplementation(() => {}); await main(); - const output = logSpy.mock.calls.find((call) => { - try { - const parsed = JSON.parse(call[0]); - return "approved" in parsed; - } catch { - return false; - } - }); - - expect(output).toBeDefined(); - const result = JSON.parse(output[0]); + expect(logSpy).toHaveBeenCalledTimes(1); + const result = JSON.parse(logSpy.mock.calls[0][0]); expect(result.approved).toBe(false); expect(result.reason).toContain("Failed to parse"); @@ -261,4 +235,29 @@ describe("auto-approve-evaluation", () => { logSpy.mockRestore(); }); + + it("should write logs to stderr, not stdout", async () => { + fetchMock.mockResolvedValue({ + ok: true, + json: () => + Promise.resolve({ + content: [{ text: '{"approved": true, "reason": "Safe"}' }], + }), + }); + + const { main } = await import("../auto-approve-evaluation.js"); + + const logSpy = vi.spyOn(console, "log").mockImplementation(() => {}); + await main(); + + // Verify logs went to stderr + const stderrCalls = stderrSpy.mock.calls.map((c) => c[0]); + expect(stderrCalls.some((line) => line.includes("Evaluating auto-approve"))).toBe(true); + + // Verify stdout only has the JSON result + expect(logSpy).toHaveBeenCalledTimes(1); + expect(() => JSON.parse(logSpy.mock.calls[0][0])).not.toThrow(); + + logSpy.mockRestore(); + }); }); diff --git a/claude/auto-review/scripts/auto-approve-evaluation.js b/claude/auto-review/scripts/auto-approve-evaluation.js index faa642b..cb029b4 100644 --- a/claude/auto-review/scripts/auto-approve-evaluation.js +++ b/claude/auto-review/scripts/auto-approve-evaluation.js @@ -16,9 +16,14 @@ import fs from "fs"; import { spawnSync } from "child_process"; -import { loadGitHubContext, createLogger } from "./lib/github-utils.js"; - -const logger = createLogger("auto-approve-evaluation.js"); +import { loadGitHubContext } from "./lib/github-utils.js"; + +// Use stderr for logging so stdout contains only the final JSON result +const SCRIPT_NAME = "auto-approve-evaluation.js"; +const logger = { + log: (...args) => process.stderr.write(`[${SCRIPT_NAME}] ${args.join(" ")}\n`), + error: (...args) => process.stderr.write(`[${SCRIPT_NAME}] ${args.join(" ")}\n`), +}; /** * Fetch the PR diff via GitHub CLI.