diff --git a/CHANGELOG.md b/CHANGELOG.md index 8e658a0..5f5406c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,7 +2,13 @@ ## 0.0.11-beta.3 — 2026-05-28 +### Features +- Add `failproofai auth login | logout | whoami` for email-OTP login against the failproofai api-server: new `src/auth/{session-store,api-client,prompts,manager}.ts`; session at `~/.failproofai/session.json` (mode 0600); default base URL `https://api.befailproof.ai` overridable via `FAILPROOFAI_API_BASE_URL`; login prompts interactively or takes `--email`; logout best-effort revokes server-side then always clears local state; whoami auto-refreshes an expired access token, clears the session only on 401/403 from refresh, preserves it on transient failures, and wraps fetch/timeout errors as `CliError` so they print `Error:` instead of `Unexpected error:` (the follow-up promised by #380's removal of the old device-flow auth) (#396). + ### Fixes +- `block-secrets-write` no longer denies `Write` of source files containing the substring `credentials` or `id_rsa`: `SECRET_FILE_CREDENTIALS_RE` now anchors to well-known credential paths (`.aws/credentials`, `.docker/credentials.json`, `.netrc`, …), `SECRET_FILE_ID_RSA_RE` matches the SSH private-key basename only (so `/id_rsa` blocks but `id_rsa_backup.md` and `id_rsa.pub` don't), and both patterns accept POSIX `/` and Windows `\` separators (#396). +- `sanitize-connection-strings` no longer flags grep/perl arguments that merely contain a scheme name (e.g. `'postgres://|mysql://'`): the regex now requires a URL-safe `:@` shape so regex metachars in the pre-`@` segment break the match while real connection strings still get redacted (#396). +- `warn-repeated-tool-calls` canonicalizes the fingerprint (sorts input keys recursively) so the same logical call always hashes the same regardless of property order, records a `warned` set in the sidecar so the warning fires once per fingerprint per session instead of repeating every turn, and fires on the 3rd identical call (matching the `THRESHOLD = 3` constant — previously off-by-one, fired on the 4th) (back-compat with the old bare-counts sidecar format preserved) (#396). - Fix the `bump-platform-submodule.yml` workflow's first post-merge push, which failed with `fatal: could not read Username for 'https://github.com'`. The `persist-credentials: false` hardening from #394 left the cross-repo `git push`/`fetch` unauthenticated, and the inline `Authorization: bearer …` extraheader only authenticates GitHub's REST API — git-over-HTTPS smart-protocol expects Basic auth with `x-access-token:`. Switch to a base64-encoded Basic header (matching `actions/checkout`'s own internal extraheader format) so the push and the rebase-and-retry fetch in the loop both authenticate (#395). ### Features diff --git a/__tests__/auth/api-client.test.ts b/__tests__/auth/api-client.test.ts new file mode 100644 index 0000000..f84e91c --- /dev/null +++ b/__tests__/auth/api-client.test.ts @@ -0,0 +1,159 @@ +// @vitest-environment node +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; + +interface CapturedRequest { + url: string; + init: RequestInit; +} + +function mockFetch( + responder: (req: CapturedRequest) => { status?: number; body?: unknown; headers?: Record }, +) { + const captured: CapturedRequest[] = []; + const fetchSpy = vi.fn(async (url: string | URL, init: RequestInit = {}) => { + const req: CapturedRequest = { url: String(url), init }; + captured.push(req); + const { status = 200, body, headers = {} } = responder(req); + return new Response(body === undefined ? null : JSON.stringify(body), { + status, + headers: { "Content-Type": "application/json", ...headers }, + }); + }); + vi.stubGlobal("fetch", fetchSpy); + return { captured }; +} + +describe("auth/api-client", () => { + beforeEach(() => { + vi.resetModules(); + delete process.env.FAILPROOFAI_API_BASE_URL; + }); + + afterEach(() => { + vi.unstubAllGlobals(); + delete process.env.FAILPROOFAI_API_BASE_URL; + }); + + describe("resolveBaseUrl", () => { + it("uses the bundled default when no env override is set", async () => { + const { resolveBaseUrl } = await import("../../src/auth/api-client"); + expect(resolveBaseUrl()).toBe("https://api.befailproof.ai"); + }); + + it("respects FAILPROOFAI_API_BASE_URL and strips a trailing slash", async () => { + process.env.FAILPROOFAI_API_BASE_URL = "http://localhost:8080/"; + const { resolveBaseUrl } = await import("../../src/auth/api-client"); + expect(resolveBaseUrl()).toBe("http://localhost:8080"); + }); + }); + + describe("requestLoginCode", () => { + it("POSTs the email and returns the parsed body", async () => { + const { captured } = mockFetch(() => ({ + body: { status: "code_sent", expires_in: 600, resend_available_in: 30 }, + })); + const { requestLoginCode } = await import("../../src/auth/api-client"); + const res = await requestLoginCode("http://localhost:8080", "jane@acme.com"); + + expect(res).toEqual({ status: "code_sent", expires_in: 600, resend_available_in: 30 }); + expect(captured[0]?.url).toBe("http://localhost:8080/v0/auth/login/request"); + expect(captured[0]?.init.method).toBe("POST"); + expect(JSON.parse(captured[0]?.init.body as string)).toEqual({ email: "jane@acme.com" }); + }); + }); + + describe("verifyLoginCode", () => { + it("POSTs email + code and returns the token response", async () => { + const { captured } = mockFetch(() => ({ + body: { + token_type: "Bearer", + access_token: "access-abc", + access_expires_in: 3600, + refresh_token: "refresh-xyz", + refresh_expires_in: 2_592_000, + user: { id: "u-1", email: "jane@acme.com" }, + }, + })); + const { verifyLoginCode } = await import("../../src/auth/api-client"); + const res = await verifyLoginCode("http://localhost:8080", "jane@acme.com", "123456"); + + expect(res.user.email).toBe("jane@acme.com"); + expect(res.access_token).toBe("access-abc"); + expect(JSON.parse(captured[0]?.init.body as string)).toEqual({ + email: "jane@acme.com", + code: "123456", + }); + }); + + it("throws a CliError with the server detail on 401", async () => { + mockFetch(() => ({ + status: 401, + body: { success: false, code: "invalid_code", detail: "The code is invalid or has expired." }, + })); + const { verifyLoginCode } = await import("../../src/auth/api-client"); + await expect(verifyLoginCode("http://x", "a@b.c", "999999")) + .rejects.toThrow("The code is invalid or has expired."); + }); + + it("wraps a network failure into CliError instead of letting it bubble as Unexpected error", async () => { + vi.stubGlobal("fetch", vi.fn(async () => { + throw new TypeError("fetch failed"); + })); + const { verifyLoginCode } = await import("../../src/auth/api-client"); + const err = await verifyLoginCode("http://localhost:8080", "a@b.c", "111111") + .then(() => null, (e: Error) => e); + expect(err).not.toBeNull(); + expect(err?.name).toBe("CliError"); + expect(err?.message).toContain("Network error"); + }); + + it("wraps an AbortError-style timeout into CliError", async () => { + const timeoutErr = new Error("timed out"); + timeoutErr.name = "TimeoutError"; + vi.stubGlobal("fetch", vi.fn(async () => { throw timeoutErr; })); + const { verifyLoginCode } = await import("../../src/auth/api-client"); + const err = await verifyLoginCode("http://localhost:8080", "a@b.c", "111111") + .then(() => null, (e: Error) => e); + expect(err?.name).toBe("CliError"); + expect(err?.message).toContain("timed out"); + }); + + it("surfaces the Retry-After hint on 429", async () => { + mockFetch(() => ({ + status: 429, + body: { success: false, code: "rate_limited", detail: "Too many requests, please try again later." }, + headers: { "retry-after": "42" }, + })); + const { verifyLoginCode } = await import("../../src/auth/api-client"); + await expect(verifyLoginCode("http://x", "a@b.c", "111111")) + .rejects.toThrow(/retry after 42s/); + }); + }); + + describe("logout", () => { + it("includes the Bearer header and the refresh token body, returns void on 204", async () => { + const { captured } = mockFetch(() => ({ status: 204 })); + const { logout } = await import("../../src/auth/api-client"); + await expect(logout("http://x", "access-tok", "refresh-tok")).resolves.toBeUndefined(); + expect(captured[0]?.url).toBe("http://x/v0/auth/logout"); + const headers = captured[0]?.init.headers as Record; + expect(headers.Authorization).toBe("Bearer access-tok"); + expect(JSON.parse(captured[0]?.init.body as string)).toEqual({ refresh_token: "refresh-tok" }); + }); + }); + + describe("me", () => { + it("GETs /v0/auth/me with the bearer access token", async () => { + const { captured } = mockFetch(() => ({ + body: { id: "u-1", email: "jane@acme.com", status: "active", created_at: "2026-01-01T00:00:00Z" }, + })); + const { me } = await import("../../src/auth/api-client"); + const res = await me("http://x", "access-tok"); + expect(res.email).toBe("jane@acme.com"); + expect(captured[0]?.url).toBe("http://x/v0/auth/me"); + expect(captured[0]?.init.method).toBe("GET"); + const headers = captured[0]?.init.headers as Record; + expect(headers.Authorization).toBe("Bearer access-tok"); + }); + }); +}); diff --git a/__tests__/auth/manager.test.ts b/__tests__/auth/manager.test.ts new file mode 100644 index 0000000..6f98a08 --- /dev/null +++ b/__tests__/auth/manager.test.ts @@ -0,0 +1,248 @@ +// @vitest-environment node +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; + +const stdoutWrite = vi.spyOn(process.stdout, "write"); + +const apiClientMocks = vi.hoisted(() => ({ + resolveBaseUrl: vi.fn(), + requestLoginCode: vi.fn(), + verifyLoginCode: vi.fn(), + refreshTokens: vi.fn(), + logout: vi.fn(), + me: vi.fn(), +})); + +const promptMocks = vi.hoisted(() => ({ + promptEmail: vi.fn(), + promptCode: vi.fn(), +})); + +const sessionMocks = vi.hoisted(() => ({ + readSession: vi.fn(), + writeSession: vi.fn(), + clearSession: vi.fn(), +})); + +vi.mock("../../src/auth/api-client", () => apiClientMocks); +vi.mock("../../src/auth/prompts", () => promptMocks); +vi.mock("../../src/auth/session-store", () => sessionMocks); + +describe("auth/manager", () => { + beforeEach(() => { + apiClientMocks.resolveBaseUrl.mockReturnValue("http://localhost:8080"); + apiClientMocks.requestLoginCode.mockReset(); + apiClientMocks.verifyLoginCode.mockReset(); + apiClientMocks.refreshTokens.mockReset(); + apiClientMocks.logout.mockReset(); + apiClientMocks.me.mockReset(); + promptMocks.promptEmail.mockReset(); + promptMocks.promptCode.mockReset(); + sessionMocks.readSession.mockReset(); + sessionMocks.writeSession.mockReset(); + sessionMocks.clearSession.mockReset(); + stdoutWrite.mockClear(); + }); + + afterEach(() => { + vi.restoreAllMocks(); + }); + + describe("loginCmd", () => { + it("uses --email when given, exchanges OTP for tokens, and persists the session", async () => { + apiClientMocks.requestLoginCode.mockResolvedValue({ + status: "code_sent", expires_in: 600, resend_available_in: 30, + }); + promptMocks.promptCode.mockResolvedValue("123456"); + apiClientMocks.verifyLoginCode.mockResolvedValue({ + token_type: "Bearer", + access_token: "access-abc", + access_expires_in: 3600, + refresh_token: "refresh-xyz", + refresh_expires_in: 2_592_000, + user: { id: "u-1", email: "jane@acme.com" }, + }); + + const { loginCmd } = await import("../../src/auth/manager"); + await loginCmd({ email: "jane@acme.com" }); + + expect(promptMocks.promptEmail).not.toHaveBeenCalled(); + expect(apiClientMocks.requestLoginCode).toHaveBeenCalledWith( + "http://localhost:8080", + "jane@acme.com", + ); + expect(apiClientMocks.verifyLoginCode).toHaveBeenCalledWith( + "http://localhost:8080", + "jane@acme.com", + "123456", + ); + expect(sessionMocks.writeSession).toHaveBeenCalledTimes(1); + const written = sessionMocks.writeSession.mock.calls[0]?.[0]; + expect(written?.user.email).toBe("jane@acme.com"); + expect(written?.api_base_url).toBe("http://localhost:8080"); + expect(written?.access_expires_at).toBeGreaterThan(Date.now()); + }); + + it("prompts for email when --email is omitted", async () => { + promptMocks.promptEmail.mockResolvedValue("interactive@acme.com"); + promptMocks.promptCode.mockResolvedValue("000000"); + apiClientMocks.requestLoginCode.mockResolvedValue({ + status: "code_sent", expires_in: 600, resend_available_in: 30, + }); + apiClientMocks.verifyLoginCode.mockResolvedValue({ + token_type: "Bearer", + access_token: "a", access_expires_in: 10, + refresh_token: "r", refresh_expires_in: 100, + user: { id: "u-2", email: "interactive@acme.com" }, + }); + + const { loginCmd } = await import("../../src/auth/manager"); + await loginCmd({}); + + expect(promptMocks.promptEmail).toHaveBeenCalledTimes(1); + expect(apiClientMocks.requestLoginCode).toHaveBeenCalledWith( + "http://localhost:8080", + "interactive@acme.com", + ); + }); + }); + + describe("logoutCmd", () => { + it("clears the session and revokes server-side when one exists", async () => { + sessionMocks.readSession.mockReturnValue({ + user: { id: "u", email: "a@b.c" }, + access_token: "atk", + access_expires_at: Date.now() + 1000, + refresh_token: "rtk", + refresh_expires_at: Date.now() + 10000, + api_base_url: "http://localhost:8080", + }); + apiClientMocks.logout.mockResolvedValue(undefined); + + const { logoutCmd } = await import("../../src/auth/manager"); + await logoutCmd(); + + expect(apiClientMocks.logout).toHaveBeenCalledWith("http://localhost:8080", "atk", "rtk"); + expect(sessionMocks.clearSession).toHaveBeenCalledTimes(1); + }); + + it("clears the local session even if the server call fails", async () => { + sessionMocks.readSession.mockReturnValue({ + user: { id: "u", email: "a@b.c" }, + access_token: "atk", + access_expires_at: Date.now() + 1000, + refresh_token: "rtk", + refresh_expires_at: Date.now() + 10000, + api_base_url: "http://localhost:8080", + }); + apiClientMocks.logout.mockRejectedValue(new Error("network down")); + + const { logoutCmd } = await import("../../src/auth/manager"); + await logoutCmd(); + + expect(sessionMocks.clearSession).toHaveBeenCalledTimes(1); + }); + + it("is a no-op when there is no local session", async () => { + sessionMocks.readSession.mockReturnValue(null); + + const { logoutCmd } = await import("../../src/auth/manager"); + await logoutCmd(); + + expect(apiClientMocks.logout).not.toHaveBeenCalled(); + expect(sessionMocks.clearSession).not.toHaveBeenCalled(); + }); + }); + + describe("whoamiCmd", () => { + it("throws CliError when there is no local session", async () => { + sessionMocks.readSession.mockReturnValue(null); + const { whoamiCmd } = await import("../../src/auth/manager"); + await expect(whoamiCmd()).rejects.toThrow(/Not logged in/); + }); + + it("refreshes the access token when expired and re-persists the session", async () => { + sessionMocks.readSession.mockReturnValue({ + user: { id: "u", email: "a@b.c" }, + access_token: "stale-atk", + access_expires_at: Date.now() - 1000, + refresh_token: "rtk-old", + refresh_expires_at: Date.now() + 10_000, + api_base_url: "http://localhost:8080", + }); + apiClientMocks.refreshTokens.mockResolvedValue({ + token_type: "Bearer", + access_token: "fresh-atk", + access_expires_in: 3600, + refresh_token: "rtk-new", + refresh_expires_in: 2_592_000, + }); + apiClientMocks.me.mockResolvedValue({ + id: "u", email: "a@b.c", status: "active", created_at: "2026-01-01T00:00:00Z", + }); + + const { whoamiCmd } = await import("../../src/auth/manager"); + await whoamiCmd(); + + expect(apiClientMocks.refreshTokens).toHaveBeenCalledWith("http://localhost:8080", "rtk-old"); + expect(sessionMocks.writeSession).toHaveBeenCalledTimes(1); + const written = sessionMocks.writeSession.mock.calls[0]?.[0]; + expect(written?.access_token).toBe("fresh-atk"); + expect(written?.refresh_token).toBe("rtk-new"); + expect(apiClientMocks.me).toHaveBeenCalledWith("http://localhost:8080", "fresh-atk"); + }); + + it("clears the session and tells the user to re-login when the refresh token itself is expired", async () => { + sessionMocks.readSession.mockReturnValue({ + user: { id: "u", email: "a@b.c" }, + access_token: "stale-atk", + access_expires_at: Date.now() - 1000, + refresh_token: "rtk-stale", + refresh_expires_at: Date.now() - 1, + api_base_url: "http://localhost:8080", + }); + + const { whoamiCmd } = await import("../../src/auth/manager"); + await expect(whoamiCmd()).rejects.toThrow(/Session expired/); + expect(sessionMocks.clearSession).toHaveBeenCalledTimes(1); + expect(apiClientMocks.refreshTokens).not.toHaveBeenCalled(); + }); + + it("clears the session on 401 from the refresh endpoint (truly invalid token)", async () => { + sessionMocks.readSession.mockReturnValue({ + user: { id: "u", email: "a@b.c" }, + access_token: "stale-atk", + access_expires_at: Date.now() - 1000, + refresh_token: "rtk-revoked", + refresh_expires_at: Date.now() + 10_000, + api_base_url: "http://localhost:8080", + }); + const apiErr = Object.assign(new Error("Session expired, please log in again."), { + name: "CliError", exitCode: 1 as const, status: 401, + }); + apiClientMocks.refreshTokens.mockRejectedValue(apiErr); + + const { whoamiCmd } = await import("../../src/auth/manager"); + await expect(whoamiCmd()).rejects.toThrow(/Session expired/); + expect(sessionMocks.clearSession).toHaveBeenCalledTimes(1); + }); + + it("preserves the session on a transient refresh failure (network / 5xx / timeout)", async () => { + sessionMocks.readSession.mockReturnValue({ + user: { id: "u", email: "a@b.c" }, + access_token: "stale-atk", + access_expires_at: Date.now() - 1000, + refresh_token: "rtk-good", + refresh_expires_at: Date.now() + 10_000, + api_base_url: "http://localhost:8080", + }); + const apiErr = Object.assign(new Error("Network error contacting http://localhost:8080/v0/auth/token/refresh: fetch failed"), { + name: "CliError", exitCode: 1 as const, + }); + apiClientMocks.refreshTokens.mockRejectedValue(apiErr); + + const { whoamiCmd } = await import("../../src/auth/manager"); + await expect(whoamiCmd()).rejects.toThrow(/Could not refresh session/); + expect(sessionMocks.clearSession).not.toHaveBeenCalled(); + }); + }); +}); diff --git a/__tests__/auth/session-store.test.ts b/__tests__/auth/session-store.test.ts new file mode 100644 index 0000000..bf0b392 --- /dev/null +++ b/__tests__/auth/session-store.test.ts @@ -0,0 +1,70 @@ +// @vitest-environment node +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import { mkdtempSync, rmSync, statSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; + +describe("auth/session-store", () => { + const fakeHome = mkdtempSync(join(tmpdir(), "failproofai-auth-test-")); + const sessionFile = join(fakeHome, ".failproofai", "session.json"); + + beforeEach(() => { + vi.resetModules(); + vi.doMock("node:os", async (orig) => { + const real = await (orig as () => Promise)(); + return { ...real, homedir: () => fakeHome }; + }); + }); + + afterEach(() => { + rmSync(fakeHome, { recursive: true, force: true }); + vi.doUnmock("node:os"); + }); + + function makeSession() { + return { + user: { id: "u-1", email: "jane@acme.com" }, + access_token: "access-abc", + access_expires_at: 1_700_000_000_000, + refresh_token: "refresh-xyz", + refresh_expires_at: 1_700_999_999_999, + api_base_url: "https://api.example.com", + }; + } + + it("writes and reads back the same session", async () => { + const mod = await import("../../src/auth/session-store"); + const session = makeSession(); + mod.writeSession(session); + expect(mod.readSession()).toEqual(session); + }); + + it("writes the session file with mode 0600", async () => { + if (process.platform === "win32") return; + const mod = await import("../../src/auth/session-store"); + mod.writeSession(makeSession()); + const mode = statSync(sessionFile).mode & 0o777; + expect(mode).toBe(0o600); + }); + + it("returns null when no session file exists", async () => { + const mod = await import("../../src/auth/session-store"); + expect(mod.readSession()).toBeNull(); + }); + + it("returns null for a session file with missing required fields", async () => { + const { writeFileSync, mkdirSync } = await import("node:fs"); + mkdirSync(join(fakeHome, ".failproofai"), { recursive: true }); + writeFileSync(sessionFile, JSON.stringify({ user: { id: "u" } }), "utf-8"); + const mod = await import("../../src/auth/session-store"); + expect(mod.readSession()).toBeNull(); + }); + + it("clearSession removes the file and is idempotent", async () => { + const mod = await import("../../src/auth/session-store"); + mod.writeSession(makeSession()); + mod.clearSession(); + expect(mod.readSession()).toBeNull(); + expect(() => mod.clearSession()).not.toThrow(); + }); +}); diff --git a/__tests__/hooks/builtin-policies.test.ts b/__tests__/hooks/builtin-policies.test.ts index b0effdc..261b608 100644 --- a/__tests__/hooks/builtin-policies.test.ts +++ b/__tests__/hooks/builtin-policies.test.ts @@ -185,6 +185,30 @@ describe("hooks/builtin-policies", () => { const result = await policy.fn(ctx); expect(result.decision).toBe("allow"); }); + + it("allows shell pipelines whose args contain a scheme-like alternation", async () => { + // grep -E 'postgres://|mysql://|@host:1234/' is a regex pattern, not a + // leaked connection string. The detector should not see this as a hit. + for (const output of [ + "grep -nE 'postgres://|mysql://|@[a-z]+:[0-9]+/' file.txt", + "perl -ne '/postgres:\\/\\/[a-z]+@[a-z]+/'", + "echo 'allowed schemes: postgres://, mysql://, mongodb://'", + ]) { + const ctx = makeCtx({ eventType: "PostToolUse", payload: { output } }); + const result = await policy.fn(ctx); + expect(result.decision, output).toBe("allow"); + } + }); + + it("still detects real connection strings even alongside other text", async () => { + const ctx = makeCtx({ + eventType: "PostToolUse", + payload: { + output: "DB up: postgresql://app_user:hunter2@db.internal:5432/app", + }, + }); + expect((await policy.fn(ctx)).decision).toBe("deny"); + }); }); describe("sanitize-private-key-content", () => { @@ -877,6 +901,67 @@ describe("hooks/builtin-policies", () => { const ctx = makeCtx({ toolName: "Read", toolInput: { file_path: "/home/user/cert.pem" } }); expect((await policy.fn(ctx)).decision).toBe("allow"); }); + + it("allows source files whose path contains the word credentials", async () => { + // Source code that *deals with* credentials is not itself a credential file. + for (const file_path of [ + "/repo/src/auth/credentials.ts", + "/repo/lib/credentials-helper.js", + "/repo/docs/credentials.md", + "/repo/__tests__/credentials.test.ts", + ]) { + const ctx = makeCtx({ toolName: "Write", toolInput: { file_path } }); + expect((await policy.fn(ctx)).decision, file_path).toBe("allow"); + } + }); + + it("allows source files whose path contains the substring id_rsa", async () => { + // Files named after id_rsa (notes, backups, docs) but not in ~/.ssh/. + for (const file_path of [ + "/repo/notes/id_rsa_recovery.md", + "/repo/docs/setup-id_rsa.md", + "/repo/src/id_rsa-rotator.ts", + ]) { + const ctx = makeCtx({ toolName: "Write", toolInput: { file_path } }); + expect((await policy.fn(ctx)).decision, file_path).toBe("allow"); + } + }); + + it("still blocks ssh private keys and well-known credential paths", async () => { + for (const file_path of [ + "/home/user/.ssh/id_rsa", + "/home/user/.ssh/id_ed25519", + "/home/user/.aws/credentials", + "/home/user/.docker/credentials.json", + "/home/user/.netrc", + ]) { + const ctx = makeCtx({ toolName: "Write", toolInput: { file_path } }); + expect((await policy.fn(ctx)).decision, file_path).toBe("deny"); + } + }); + + it("allows ssh public keys (.pub) since they are not secrets", async () => { + for (const file_path of [ + "/home/user/.ssh/id_rsa.pub", + "/home/user/.ssh/id_ed25519.pub", + "/home/user/.ssh/id_ecdsa.pub", + ]) { + const ctx = makeCtx({ toolName: "Write", toolInput: { file_path } }); + expect((await policy.fn(ctx)).decision, file_path).toBe("allow"); + } + }); + + it("blocks Windows-style secret paths too", async () => { + for (const file_path of [ + "C:\\Users\\me\\.ssh\\id_rsa", + "C:\\Users\\me\\.aws\\credentials", + "C:\\Users\\me\\.docker\\credentials.json", + "C:\\Users\\me\\.netrc", + ]) { + const ctx = makeCtx({ toolName: "Write", toolInput: { file_path } }); + expect((await policy.fn(ctx)).decision, file_path).toBe("deny"); + } + }); }); describe("block-work-on-main", () => { @@ -1378,10 +1463,11 @@ describe("hooks/builtin-policies", () => { expect(result.decision).toBe("allow"); }); - it("returns instruct when sidecar shows 3+ identical calls", async () => { - // Policy now reads a per-session sidecar file instead of the full transcript. + it("returns instruct when this call brings the count to the threshold (3rd identical call)", async () => { + // Sidecar shows 2 prior identical calls; this call (the 3rd) brings the + // total to THRESHOLD and the warning fires reporting "3 times". const fingerprint = JSON.stringify({ tool: "Read", input: { file_path: "/foo/bar.ts" } }); - vi.mocked(readFile).mockResolvedValue(JSON.stringify({ [fingerprint]: 3 })); + vi.mocked(readFile).mockResolvedValue(JSON.stringify({ [fingerprint]: 2 })); const ctx = makeCtx({ toolName: "Read", @@ -1428,7 +1514,7 @@ describe("hooks/builtin-policies", () => { // Malformed sidecar JSON → counts reset to {} → allow on first call. // Valid sidecar at threshold → instruct. Test the valid-sidecar path here. const fingerprint = JSON.stringify({ tool: "Read", input: { file_path: "/foo/bar.ts" } }); - vi.mocked(readFile).mockResolvedValue(JSON.stringify({ [fingerprint]: 3 })); + vi.mocked(readFile).mockResolvedValue(JSON.stringify({ [fingerprint]: 2 })); const ctx = makeCtx({ toolName: "Read", @@ -1450,6 +1536,64 @@ describe("hooks/builtin-policies", () => { const result = await policy.fn(ctx); expect(result.decision).toBe("allow"); }); + + it("does not fire again for a fingerprint already warned", async () => { + // Second time the agent hits the same fingerprint after we already + // warned — keep quiet so the warning doesn't pile up turn after turn. + const fingerprint = JSON.stringify({ tool: "Read", input: { file_path: "/foo/bar.ts" } }); + vi.mocked(readFile).mockResolvedValue( + JSON.stringify({ counts: { [fingerprint]: 7 }, warned: { [fingerprint]: true } }), + ); + + const ctx = makeCtx({ + toolName: "Read", + toolInput: { file_path: "/foo/bar.ts" }, + session: { transcriptPath: "/tmp/transcript.jsonl" }, + }); + const result = await policy.fn(ctx); + expect(result.decision).toBe("allow"); + }); + + it("canonicalizes input key order so reordered identical calls match", async () => { + // Tracker recorded { file_path, offset } in one order; live call sends + // them in the opposite order. Should still fingerprint as the same call. + const canonicalFp = JSON.stringify({ + tool: "Read", + input: { file_path: "/foo/bar.ts", offset: 100 }, + }); + vi.mocked(readFile).mockResolvedValue( + JSON.stringify({ counts: { [canonicalFp]: 3 }, warned: {} }), + ); + + const ctx = makeCtx({ + toolName: "Read", + // Reverse key order — JSON.stringify alone would not match the bucket. + toolInput: { offset: 100, file_path: "/foo/bar.ts" } as Record, + session: { transcriptPath: "/tmp/transcript.jsonl" }, + }); + const result = await policy.fn(ctx); + expect(result.decision).toBe("instruct"); + }); + + it("treats reads with different offset/limit as distinct fingerprints", async () => { + // Three prior reads of the same file at offset 0, but the current call + // is at offset 500 — that's a genuinely new call, do not warn. + const otherFp = JSON.stringify({ + tool: "Read", + input: { file_path: "/foo/bar.ts", offset: 0 }, + }); + vi.mocked(readFile).mockResolvedValue( + JSON.stringify({ counts: { [otherFp]: 3 }, warned: {} }), + ); + + const ctx = makeCtx({ + toolName: "Read", + toolInput: { file_path: "/foo/bar.ts", offset: 500 }, + session: { transcriptPath: "/tmp/transcript.jsonl" }, + }); + const result = await policy.fn(ctx); + expect(result.decision).toBe("allow"); + }); }); describe("warn-git-amend", () => { diff --git a/bin/failproofai.mjs b/bin/failproofai.mjs index a451a8c..480c852 100755 --- a/bin/failproofai.mjs +++ b/bin/failproofai.mjs @@ -7,6 +7,7 @@ * --version / -v Print version and exit * --help / -h Show usage and exit * policies Manage policies (list / install / uninstall) + * auth Email-OTP login against the failproofai api-server * (default) Launch production dashboard */ import { realpathSync } from "node:fs"; @@ -103,7 +104,7 @@ if (hookIdx >= 0) { */ async function runCli() { // --help / -h (only when not inside a subcommand that handles its own --help) - const SUBCOMMANDS = ["policies", "audit"]; + const SUBCOMMANDS = ["policies", "audit", "auth"]; if ((args.includes("--help") || args.includes("-h")) && !SUBCOMMANDS.includes(args[0])) { const extraArgs = args.filter((a) => a !== "--help" && a !== "-h"); if (extraArgs.length > 0) { @@ -159,6 +160,11 @@ COMMANDS --no-cache Bypass the per-transcript cache. audit --help, -h Show this help for the audit command + auth login [--email ] Log in via email + one-time code + auth logout Sign out and clear the local session + auth whoami Show the currently signed-in user + auth --help, -h Show this help for the auth command + --version, -v Print version and exit --help, -h Show this help message @@ -622,6 +628,81 @@ EXAMPLES process.exit(0); } + // auth — email-OTP login against the failproofai api-server. + if (args[0] === "auth") { + const subArgs = args.slice(1); + + if (subArgs.length === 0 || subArgs.includes("--help") || subArgs.includes("-h")) { + console.log(` +failproofai auth — log in to the failproofai api-server + +USAGE + failproofai auth login [--email ] Send a one-time code and exchange + it for a session + failproofai auth logout Revoke the session and clear local + state + failproofai auth whoami Show the currently signed-in user + failproofai auth --help, -h Show this help + +ENVIRONMENT + FAILPROOFAI_API_BASE_URL Override the api-server URL (default: + https://api.befailproof.ai). + +EXAMPLES + failproofai auth login + failproofai auth login --email me@example.com + failproofai auth whoami + failproofai auth logout +`.trimStart()); + process.exit(0); + } + + const verb = subArgs[0]; + lastSubcommand = `auth_${verb}`; + + if (verb === "login") { + const rest = subArgs.slice(1); + const emailIdx = rest.indexOf("--email"); + let email; + if (emailIdx >= 0) { + email = rest[emailIdx + 1]; + if (!email || email.startsWith("-")) { + throw new CliError("Missing value for --email. Usage: --email "); + } + rest.splice(emailIdx, 2); + } + for (const arg of rest) { + throw new CliError(`Unexpected argument: ${arg}\nRun \`failproofai auth --help\` for usage.`); + } + const { loginCmd } = await import("../src/auth/manager"); + await loginCmd({ email }); + await track("cli_auth_login_success", {}); + process.exit(0); + } + + if (verb === "logout") { + if (subArgs.length > 1) { + throw new CliError(`Unexpected argument: ${subArgs[1]}\nRun \`failproofai auth --help\` for usage.`); + } + const { logoutCmd } = await import("../src/auth/manager"); + await logoutCmd(); + await track("cli_auth_logout_success", {}); + process.exit(0); + } + + if (verb === "whoami") { + if (subArgs.length > 1) { + throw new CliError(`Unexpected argument: ${subArgs[1]}\nRun \`failproofai auth --help\` for usage.`); + } + const { whoamiCmd } = await import("../src/auth/manager"); + await whoamiCmd(); + await track("cli_auth_whoami_success", {}); + process.exit(0); + } + + throw new CliError(`Unknown auth subcommand: ${verb}\nRun \`failproofai auth --help\` for usage.`); + } + // Unknown flag guard — must appear after all known-flag branches const knownFlags = ["--version", "-v", "--help", "-h", "--hook"]; const unknownFlag = args.find(a => a.startsWith("-") && !knownFlags.includes(a)); @@ -640,7 +721,7 @@ EXAMPLES return dp[m][n]; } - const primary = ["--version", "--help", "--hook", "policies", "audit"]; + const primary = ["--version", "--help", "--hook", "policies", "audit", "auth"]; const closest = primary.reduce((best, flag) => { const dist = levenshtein(unknownFlag, flag); return dist < best.dist ? { flag, dist } : best; @@ -682,11 +763,23 @@ EXAMPLES // ── Import CliError for use in the guard above ──────────────────────────────── const { CliError } = await import("../src/cli-error"); +// bun's bundler can emit multiple copies of the CliError class when it shows +// up via both a static import (inside a dynamically-imported subcommand +// handler) and the dynamic import on the line above. `instanceof` then +// returns false across the two copies and a clean user-facing error gets +// reported as "Unexpected error". Duck-type instead. +function isCliError(err) { + return ( + err instanceof CliError + || (err && err.name === "CliError" && (err.exitCode === 1 || err.exitCode === 2)) + ); +} + // ── Run ─────────────────────────────────────────────────────────────────────── try { await runCli(); } catch (err) { - if (err instanceof CliError) { + if (isCliError(err)) { if (lastSubcommand === "install") { await track("cli_install_failure", { error_type: "cli_error", exit_code: err.exitCode }); } else if (lastSubcommand === "uninstall") { diff --git a/src/auth/api-client.ts b/src/auth/api-client.ts new file mode 100644 index 0000000..0660900 --- /dev/null +++ b/src/auth/api-client.ts @@ -0,0 +1,167 @@ +/** + * Typed wrappers around the failproofai api-server's `/v0/auth/*` endpoints. + * + * Uses native `fetch` (Node 20+); same shape as `src/hooks/llm-client.ts`. All + * non-2xx responses throw `CliError` with the server's `detail` string when + * available, so handlers can surface the message directly to the user. + */ +import { CliError } from "../cli-error"; + +const DEFAULT_API_BASE_URL = "https://api.befailproof.ai"; + +export interface UserView { + id: string; + email: string; +} + +export interface TokenResponse { + token_type: "Bearer"; + access_token: string; + access_expires_in: number; + refresh_token: string; + refresh_expires_in: number; + user: UserView; +} + +export interface AccessRefreshResponse { + token_type: "Bearer"; + access_token: string; + access_expires_in: number; + refresh_token: string; + refresh_expires_in: number; +} + +export interface LoginRequestResponse { + status: string; + expires_in: number; + resend_available_in: number; +} + +export interface MeResponse { + id: string; + email: string; + status: string; + created_at: string; +} + +interface ServerErrorBody { + success: false; + code: string; + detail: string; +} + +export function resolveBaseUrl(): string { + const raw = process.env.FAILPROOFAI_API_BASE_URL?.trim(); + const url = raw && raw.length > 0 ? raw : DEFAULT_API_BASE_URL; + return url.replace(/\/+$/, ""); +} + +async function readErrorDetail(response: Response): Promise { + try { + const body = (await response.json()) as Partial; + if (body && typeof body.detail === "string" && body.detail.length > 0) { + return body.detail; + } + if (body && typeof body.code === "string") return body.code; + } catch { + // fall through + } + return `${response.status} ${response.statusText}`; +} + +/** Network/timeout failures: convert into a `CliError` so they land on the + * "Error: ..." exit-1 path instead of getting tagged "Unexpected error". The + * status field on the thrown error is left undefined for these so callers + * (whoamiCmd) can distinguish "transient" from "401, session is dead". */ +function wrapFetchFailure(err: unknown, url: string): CliError { + if (err instanceof Error && err.name === "TimeoutError") { + return new CliError(`Request to ${url} timed out`); + } + const msg = err instanceof Error ? err.message : String(err); + return new CliError(`Network error contacting ${url}: ${msg}`); +} + +interface ApiError extends CliError { + status?: number; +} + +function apiErrorFromResponse(detail: string, status: number): ApiError { + const err = new CliError(detail) as ApiError; + err.status = status; + return err; +} + +async function postJson( + url: string, + body: unknown, + init?: { authToken?: string; timeoutMs?: number }, +): Promise { + const headers: Record = { "Content-Type": "application/json" }; + if (init?.authToken) headers["Authorization"] = `Bearer ${init.authToken}`; + + let response: Response; + try { + response = await fetch(url, { + method: "POST", + headers, + body: JSON.stringify(body), + signal: AbortSignal.timeout(init?.timeoutMs ?? 15_000), + }); + } catch (err) { + throw wrapFetchFailure(err, url); + } + + if (!response.ok) { + const detail = await readErrorDetail(response); + if (response.status === 429) { + const retryAfter = response.headers.get("retry-after"); + const hint = retryAfter ? ` (retry after ${retryAfter}s)` : ""; + throw apiErrorFromResponse(`${detail}${hint}`, 429); + } + throw apiErrorFromResponse(detail, response.status); + } + + if (response.status === 204) return undefined as T; + return (await response.json()) as T; +} + +async function getJson(url: string, authToken: string, timeoutMs = 15_000): Promise { + let response: Response; + try { + response = await fetch(url, { + method: "GET", + headers: { Authorization: `Bearer ${authToken}` }, + signal: AbortSignal.timeout(timeoutMs), + }); + } catch (err) { + throw wrapFetchFailure(err, url); + } + if (!response.ok) { + throw apiErrorFromResponse(await readErrorDetail(response), response.status); + } + return (await response.json()) as T; +} + +export function requestLoginCode(baseUrl: string, email: string): Promise { + return postJson(`${baseUrl}/v0/auth/login/request`, { email }); +} + +export function verifyLoginCode(baseUrl: string, email: string, code: string): Promise { + return postJson(`${baseUrl}/v0/auth/login/verify`, { email, code }); +} + +export function refreshTokens(baseUrl: string, refreshToken: string): Promise { + return postJson(`${baseUrl}/v0/auth/token/refresh`, { + refresh_token: refreshToken, + }); +} + +export function logout(baseUrl: string, accessToken: string, refreshToken: string): Promise { + return postJson(`${baseUrl}/v0/auth/logout`, { refresh_token: refreshToken }, { + authToken: accessToken, + }); +} + +export function me(baseUrl: string, accessToken: string): Promise { + return getJson(`${baseUrl}/v0/auth/me`, accessToken); +} diff --git a/src/auth/manager.ts b/src/auth/manager.ts new file mode 100644 index 0000000..edb1452 --- /dev/null +++ b/src/auth/manager.ts @@ -0,0 +1,110 @@ +/** + * Handlers behind `failproofai auth login | logout | whoami`. + * + * Talks to the failproofai api-server via `./api-client` and persists the + * session via `./session-store`. + */ +import { CliError } from "../cli-error"; +import { logWarn } from "../../lib/logger"; +import { + logout as apiLogout, + me as apiMe, + refreshTokens, + requestLoginCode, + resolveBaseUrl, + verifyLoginCode, +} from "./api-client"; +import { promptCode, promptEmail } from "./prompts"; +import { + type AuthSession, + clearSession, + readSession, + writeSession, +} from "./session-store"; + +function expiresAt(seconds: number): number { + return Date.now() + seconds * 1000; +} + +export async function loginCmd(opts: { email?: string }): Promise { + const baseUrl = resolveBaseUrl(); + const email = opts.email ?? (await promptEmail()); + + const requested = await requestLoginCode(baseUrl, email); + const expiresMin = Math.max(1, Math.round(requested.expires_in / 60)); + process.stdout.write(`Sent a code to ${email} (expires in ${expiresMin}m).\n`); + + const code = await promptCode(); + const tokens = await verifyLoginCode(baseUrl, email, code); + + const session: AuthSession = { + user: tokens.user, + access_token: tokens.access_token, + access_expires_at: expiresAt(tokens.access_expires_in), + refresh_token: tokens.refresh_token, + refresh_expires_at: expiresAt(tokens.refresh_expires_in), + api_base_url: baseUrl, + }; + writeSession(session); + + process.stdout.write(`Logged in as ${tokens.user.email}.\n`); +} + +export async function logoutCmd(): Promise { + const session = readSession(); + if (!session) { + process.stdout.write("Not logged in.\n"); + return; + } + + try { + await apiLogout(session.api_base_url, session.access_token, session.refresh_token); + } catch (err) { + const msg = err instanceof Error ? err.message : String(err); + logWarn(`auth: server-side logout failed, clearing local session anyway: ${msg}`); + } + + clearSession(); + process.stdout.write("Logged out.\n"); +} + +export async function whoamiCmd(): Promise { + const session = readSession(); + if (!session) { + throw new CliError("Not logged in. Run `failproofai auth login`."); + } + + let accessToken = session.access_token; + if (Date.now() >= session.access_expires_at) { + if (Date.now() >= session.refresh_expires_at) { + clearSession(); + throw new CliError("Session expired. Run `failproofai auth login`."); + } + try { + const refreshed = await refreshTokens(session.api_base_url, session.refresh_token); + const rotated: AuthSession = { + ...session, + access_token: refreshed.access_token, + access_expires_at: expiresAt(refreshed.access_expires_in), + refresh_token: refreshed.refresh_token, + refresh_expires_at: expiresAt(refreshed.refresh_expires_in), + }; + writeSession(rotated); + accessToken = refreshed.access_token; + } catch (err) { + // Only nuke the local session when the server actively rejects the + // refresh token (401/403) — transient failures (network, 5xx, timeout) + // should leave the session intact so the user can just retry. + const status = (err as { status?: number } | null)?.status; + const msg = err instanceof Error ? err.message : String(err); + if (status === 401 || status === 403) { + clearSession(); + throw new CliError(`Session expired. Run \`failproofai auth login\`.`); + } + throw new CliError(`Could not refresh session: ${msg}. Try again or run \`failproofai auth login\`.`); + } + } + + const profile = await apiMe(session.api_base_url, accessToken); + process.stdout.write(`${profile.email} (id: ${profile.id}, status: ${profile.status})\n`); +} diff --git a/src/auth/prompts.ts b/src/auth/prompts.ts new file mode 100644 index 0000000..409dae6 --- /dev/null +++ b/src/auth/prompts.ts @@ -0,0 +1,32 @@ +/** + * Tiny readline prompts for the `auth login` flow. Matches the codebase + * convention of using `node:readline` directly rather than pulling in a + * prompts dependency. + */ +import { createInterface } from "node:readline/promises"; +import { CliError } from "../cli-error"; + +async function ask(question: string): Promise { + const rl = createInterface({ input: process.stdin, output: process.stdout }); + try { + const answer = await rl.question(question); + return answer.trim(); + } finally { + rl.close(); + } +} + +export async function promptEmail(): Promise { + const email = await ask("Email: "); + if (!email) throw new CliError("Email is required."); + if (!email.includes("@") || !email.includes(".")) { + throw new CliError(`"${email}" does not look like a valid email.`); + } + return email; +} + +export async function promptCode(): Promise { + const code = await ask("Code: "); + if (!code) throw new CliError("Code is required."); + return code; +} diff --git a/src/auth/session-store.ts b/src/auth/session-store.ts new file mode 100644 index 0000000..966ceaf --- /dev/null +++ b/src/auth/session-store.ts @@ -0,0 +1,62 @@ +/** + * On-disk store for the failproofai auth session. + * + * Lives at `~/.failproofai/session.json`. Written with mode 0600 so other + * users on the machine cannot read the access/refresh tokens — same pattern + * the audit cache uses (`src/audit/cache.ts`). + */ +import { chmodSync, mkdirSync, readFileSync, rmSync, writeFileSync } from "node:fs"; +import { homedir } from "node:os"; +import { join } from "node:path"; + +const SESSION_DIR = join(homedir(), ".failproofai"); +const SESSION_FILE = join(SESSION_DIR, "session.json"); + +export interface AuthSession { + user: { id: string; email: string }; + access_token: string; + access_expires_at: number; + refresh_token: string; + refresh_expires_at: number; + api_base_url: string; +} + +export function sessionPath(): string { + return SESSION_FILE; +} + +export function readSession(): AuthSession | null { + try { + const raw = readFileSync(SESSION_FILE, "utf-8"); + const parsed = JSON.parse(raw) as Partial; + if ( + !parsed + || typeof parsed.access_token !== "string" + || typeof parsed.refresh_token !== "string" + || typeof parsed.access_expires_at !== "number" + || typeof parsed.refresh_expires_at !== "number" + || typeof parsed.api_base_url !== "string" + || !parsed.user + || typeof parsed.user.id !== "string" + || typeof parsed.user.email !== "string" + ) { + return null; + } + return parsed as AuthSession; + } catch { + return null; + } +} + +export function writeSession(session: AuthSession): void { + mkdirSync(SESSION_DIR, { recursive: true }); + writeFileSync(SESSION_FILE, JSON.stringify(session, null, 2), { + encoding: "utf-8", + mode: 0o600, + }); + try { chmodSync(SESSION_FILE, 0o600); } catch { /* best-effort on POSIX */ } +} + +export function clearSession(): void { + rmSync(SESSION_FILE, { force: true }); +} diff --git a/src/hooks/builtin-policies.ts b/src/hooks/builtin-policies.ts index 54db64c..1e04efd 100644 --- a/src/hooks/builtin-policies.ts +++ b/src/hooks/builtin-policies.ts @@ -131,7 +131,10 @@ const API_KEY_PATTERNS: Array<[RegExp, string]> = [ ]; // sanitizeConnectionStrings -const CONNECTION_STRING_RE = /(?:postgresql|postgres|mysql|mongodb(?:\+srv)?|redis|amqps?|smtps?):\/\/[^@\s]+@/; +// Require a URL-safe user:password@host shape so regex patterns or example +// snippets that merely contain a scheme name (e.g. a grep pattern like +// `postgres://|mongodb://|@foo:1/`) do not get flagged as leaked secrets. +const CONNECTION_STRING_RE = /(?:postgresql|postgres|mysql|mongodb(?:\+srv)?|redis|amqps?|smtps?):\/\/[\w.\-+%]+:[^@\s:|&?#]+@[\w.\-]+/; // sanitizePrivateKeyContent const PRIVATE_KEY_RE = /-----BEGIN (?:[A-Z]+ )?PRIVATE KEY-----/; @@ -174,10 +177,15 @@ const PS_WEB_PIPE_RE = /(?:Invoke-WebRequest|iwr|Invoke-RestMethod|irm)\s+.*\|\s // blockForcePush const FORCE_PUSH_RE = /(?:--force|-f\b)/; -// blockSecretsWrite -const SECRET_FILE_RE = /\.(?:pem|key)$/; -const SECRET_FILE_ID_RSA_RE = /id_rsa/; -const SECRET_FILE_CREDENTIALS_RE = /credentials/; +// blockSecretsWrite — match well-known secret files only, never source code +// (e.g. `src/auth/session-store.ts` is not a secret even though it deals with +// session tokens; `id_rsa_backup.md` is documentation, not a key). Patterns +// accept both POSIX `/` and Windows `\` path separators so e.g. +// `C:\Users\me\.ssh\id_rsa` is still caught. +const SECRET_FILE_RE = /\.(?:pem|key|p12|pfx|jks)$/; +const SECRET_FILE_ID_RSA_RE = /(?:^|[\\/])id_(?:rsa|dsa|ecdsa|ed25519)$/; +const SECRET_FILE_CREDENTIALS_RE = + /(?:^|[\\/])\.(?:aws|docker|kube|azure|gcloud)[\\/]credentials(?:\.db|\.json)?$|[\\/]\.config[\\/]gcloud[\\/]credentials(?:\.db|\.json)?$|(?:^|[\\/])\.netrc$/; // blockWorkOnMain const GIT_COMMIT_MERGE_RE = /git\s+(commit|merge|rebase|cherry-pick)\b/; @@ -904,36 +912,82 @@ function blockGhPipeline(ctx: PolicyContext): PolicyResult { // than growing the file unboundedly. const TOOL_CALL_TRACKER_MAX_BYTES = 65_536; // 64 KB +/** Stable JSON: sort object keys recursively so {a:1,b:2} and {b:2,a:1} + * produce the same fingerprint. Without this, a tool input with the same + * fields in different orders fingerprints differently and the policy misses + * real repetitions; worse, it can over-fire when one ordering accumulates + * count and a later identical-but-reordered call lands on a different bucket. + */ +function canonicalize(value: unknown): unknown { + if (value === null || typeof value !== "object") return value; + if (Array.isArray(value)) return value.map(canonicalize); + const obj = value as Record; + const out: Record = {}; + for (const k of Object.keys(obj).sort()) { + out[k] = canonicalize(obj[k]); + } + return out; +} + +interface RepeatedCallsTracker { + counts: Record; + warned: Record; +} + +function parseTracker(raw: string): RepeatedCallsTracker { + const parsed: unknown = JSON.parse(raw); + if (parsed && typeof parsed === "object" && "counts" in (parsed as object)) { + const obj = parsed as Partial; + return { + counts: obj.counts ?? {}, + warned: obj.warned ?? {}, + }; + } + // Back-compat: older sessions wrote bare `{ fingerprint: count }`. + return { counts: (parsed ?? {}) as Record, warned: {} }; +} + async function warnRepeatedToolCalls(ctx: PolicyContext): Promise { const THRESHOLD = 3; const transcriptPath = ctx.session?.transcriptPath; if (!transcriptPath || !ctx.toolName || !ctx.toolInput) return allow(); - // Sidecar file tracks { fingerprint: count } — O(1) per call vs O(transcript) per call. + // Sidecar file tracks { counts, warned } per session — O(1) per call vs + // O(transcript) per call. const trackerPath = `${transcriptPath}.tool-calls.json`; - const fingerprint = JSON.stringify({ tool: ctx.toolName, input: ctx.toolInput }); + const fingerprint = JSON.stringify({ + tool: ctx.toolName, + input: canonicalize(ctx.toolInput), + }); - let counts: Record = {}; + let tracker: RepeatedCallsTracker = { counts: {}, warned: {} }; try { - const raw = await readFile(trackerPath, "utf8"); - counts = JSON.parse(raw) as Record; + tracker = parseTracker(await readFile(trackerPath, "utf8")); } catch { /* first call or unreadable — start fresh */ } - const prevCount = counts[fingerprint] ?? 0; - if (prevCount >= THRESHOLD) { - return instruct( - `STOP: You have already called ${ctx.toolName} ${prevCount} times with identical parameters. This is wasteful and unproductive. Do NOT repeat this call — use a different approach or ask the user for clarification.`, - ); - } + const prevCount = tracker.counts[fingerprint] ?? 0; + const nextCount = prevCount + 1; + // Fire when this current call brings the total to THRESHOLD or above. With + // THRESHOLD = 3 that means we warn on the 3rd identical call. We also only + // fire once per fingerprint — repeated warnings add noise without adding + // information; the agent already got the message the first time. + const shouldWarn = nextCount >= THRESHOLD && !tracker.warned[fingerprint]; + + tracker.counts[fingerprint] = nextCount; + if (shouldWarn) tracker.warned[fingerprint] = true; - counts[fingerprint] = prevCount + 1; try { - const serialized = JSON.stringify(counts); + const serialized = JSON.stringify(tracker); if (serialized.length <= TOOL_CALL_TRACKER_MAX_BYTES) { await writeFile(trackerPath, serialized, "utf8"); } } catch { /* non-fatal */ } + if (shouldWarn) { + return instruct( + `STOP: You have called ${ctx.toolName} ${nextCount} times with identical parameters. This is wasteful and unproductive. Do NOT repeat this call — use a different approach or ask the user for clarification.`, + ); + } return allow(); }