diff --git a/actions/setup/js/create_issue.cjs b/actions/setup/js/create_issue.cjs index 28ff1c7b499..160ef8d1ec6 100644 --- a/actions/setup/js/create_issue.cjs +++ b/actions/setup/js/create_issue.cjs @@ -43,6 +43,7 @@ const { createAuthenticatedGitHubClient } = require("./handler_auth.cjs"); const { removeDuplicateTitleFromDescription } = require("./remove_duplicate_title.cjs"); const { getErrorMessage } = require("./error_helpers.cjs"); const { ERR_VALIDATION } = require("./error_codes.cjs"); +const { withRetry } = require("./error_recovery.cjs"); const { renderTemplateFromFile } = require("./messages_core.cjs"); const { createExpirationLine, addExpirationToFooter } = require("./ephemerals.cjs"); const { MAX_SUB_ISSUES, getSubIssueCount } = require("./sub_issue_helpers.cjs"); @@ -167,13 +168,18 @@ async function findOrCreateParentIssue({ githubClient, groupId, owner, repo, tit core.info(`Creating new parent issue for group: ${groupId}`); try { const template = createParentIssueTemplate(groupId, titlePrefix, workflowName, workflowSourceURL, expiresHours); - const { data: parentIssue } = await githubClient.rest.issues.create({ - owner, - repo, - title: template.title, - body: template.body, - labels: labels, - }); + const { data: parentIssue } = await withRetry( + () => + githubClient.rest.issues.create({ + owner, + repo, + title: template.title, + body: template.body, + labels: labels, + }), + { initialDelayMs: 15000, maxDelayMs: 45000, jitterMs: 10000 }, + `create_parent_issue for group ${groupId}` + ); core.info(`Created new parent issue #${parentIssue.number}: ${parentIssue.html_url}`); return parentIssue.number; @@ -588,14 +594,19 @@ async function main(config = {}) { } try { - const { data: issue } = await githubClient.rest.issues.create({ - owner: repoParts.owner, - repo: repoParts.repo, - title, - body, - labels, - assignees, - }); + const { data: issue } = await withRetry( + () => + githubClient.rest.issues.create({ + owner: repoParts.owner, + repo: repoParts.repo, + title, + body, + labels, + assignees, + }), + { initialDelayMs: 15000, maxDelayMs: 45000, jitterMs: 10000 }, + `create_issue in ${qualifiedItemRepo}` + ); core.info(`Created issue ${qualifiedItemRepo}#${issue.number}: ${issue.html_url}`); createdIssues.push({ ...issue, _repo: qualifiedItemRepo }); diff --git a/actions/setup/js/create_issue.test.cjs b/actions/setup/js/create_issue.test.cjs index 9a111cac42e..d6aa22df303 100644 --- a/actions/setup/js/create_issue.test.cjs +++ b/actions/setup/js/create_issue.test.cjs @@ -452,7 +452,7 @@ describe("create_issue", () => { const result = await handler({ title: "Test" }); expect(result.success).toBe(false); - expect(result.error).toBe("API Error"); + expect(result.error).toContain("API Error"); }); }); @@ -667,4 +667,95 @@ describe("create_issue", () => { expect(mockGithub.rest.issues.create).not.toHaveBeenCalled(); }); }); + + describe("retry on rate limit errors", () => { + beforeEach(() => { + vi.useFakeTimers(); + }); + + afterEach(() => { + vi.useRealTimers(); + }); + + it("should retry issue creation on transient rate limit error and succeed", async () => { + mockGithub.rest.issues.create = vi + .fn() + .mockRejectedValueOnce(new Error("Secondary rate limit hit")) + .mockResolvedValue({ + data: { + number: 456, + html_url: "https://github.com/owner/repo/issues/456", + title: "Retried Issue", + }, + }); + + const handler = await main({}); + const resultPromise = handler({ + title: "Retried Issue", + body: "Test body", + }); + + await vi.runAllTimersAsync(); + const result = await resultPromise; + + expect(result.success).toBe(true); + expect(result.number).toBe(456); + expect(mockGithub.rest.issues.create).toHaveBeenCalledTimes(2); + }); + + it("should fail after exhausting retries on persistent rate limit error", async () => { + mockGithub.rest.issues.create = vi.fn().mockRejectedValue(new Error("Secondary rate limit hit")); + + const handler = await main({}); + const resultPromise = handler({ + title: "Failing Issue", + body: "Test body", + }); + + await vi.runAllTimersAsync(); + const result = await resultPromise; + + expect(result.success).toBe(false); + expect(result.error).toBeDefined(); + // 1 initial + 3 retries = 4 calls + expect(mockGithub.rest.issues.create).toHaveBeenCalledTimes(4); + }); + + it("should have retry delays that never exceed maxDelayMs + jitterMs", async () => { + const setTimeoutSpy = vi.spyOn(globalThis, "setTimeout"); + + mockGithub.rest.issues.create = vi + .fn() + .mockRejectedValueOnce(new Error("Secondary rate limit hit")) + .mockRejectedValueOnce(new Error("Secondary rate limit hit")) + .mockResolvedValue({ + data: { + number: 789, + html_url: "https://github.com/owner/repo/issues/789", + title: "Bounded Delay Issue", + }, + }); + + const handler = await main({}); + const resultPromise = handler({ + title: "Bounded Delay Issue", + body: "Test body", + }); + + await vi.runAllTimersAsync(); + await resultPromise; + + // create_issue uses { initialDelayMs: 15000, maxDelayMs: 45000, jitterMs: 10000 } + // Maximum possible delay per retry = maxDelayMs + jitterMs = 55000ms + const maxBound = 55000; + // Filter out short setTimeout calls (e.g. from test infrastructure) to isolate retry delays + const sleepDelays = setTimeoutSpy.mock.calls.filter(([, ms]) => ms > 1000).map(([, ms]) => ms); + + for (const delay of sleepDelays) { + expect(delay).toBeLessThanOrEqual(maxBound); + } + + setTimeoutSpy.mockRestore(); + }); + }); }); diff --git a/actions/setup/js/error_recovery.cjs b/actions/setup/js/error_recovery.cjs index 8707b2d6a75..688fa2f9a99 100644 --- a/actions/setup/js/error_recovery.cjs +++ b/actions/setup/js/error_recovery.cjs @@ -16,6 +16,7 @@ const { ERR_API } = require("./error_codes.cjs"); * @property {number} initialDelayMs - Initial delay in milliseconds (default: 1000) * @property {number} maxDelayMs - Maximum delay in milliseconds (default: 10000) * @property {number} backoffMultiplier - Backoff multiplier for exponential backoff (default: 2) + * @property {number} jitterMs - Maximum random jitter in milliseconds added to each retry delay (default: 100) * @property {(error: any) => boolean} shouldRetry - Function to determine if error is retryable */ @@ -28,6 +29,7 @@ const DEFAULT_RETRY_CONFIG = { initialDelayMs: 1000, maxDelayMs: 10000, backoffMultiplier: 2, + jitterMs: 100, shouldRetry: isTransientError, }; @@ -95,8 +97,10 @@ async function withRetry(operation, config = {}, operationName = "operation") { for (let attempt = 0; attempt <= fullConfig.maxRetries; attempt++) { try { if (attempt > 0) { - core.info(`Retry attempt ${attempt}/${fullConfig.maxRetries} for ${operationName} after ${delay}ms delay`); - await sleep(delay); + const jitter = fullConfig.jitterMs > 0 ? Math.floor(Math.random() * fullConfig.jitterMs) : 0; + const delayWithJitter = delay + jitter; + core.info(`Retry attempt ${attempt}/${fullConfig.maxRetries} for ${operationName} after ${delayWithJitter}ms delay`); + await sleep(delayWithJitter); } const result = await operation(); diff --git a/actions/setup/js/error_recovery.test.cjs b/actions/setup/js/error_recovery.test.cjs index 3d62889b7ea..32a0823f555 100644 --- a/actions/setup/js/error_recovery.test.cjs +++ b/actions/setup/js/error_recovery.test.cjs @@ -110,6 +110,7 @@ describe("error_recovery", () => { initialDelayMs: 100, backoffMultiplier: 2, maxDelayMs: 1000, + jitterMs: 0, }; await withRetry(operation, config, "test-operation"); @@ -130,6 +131,7 @@ describe("error_recovery", () => { initialDelayMs: 1000, backoffMultiplier: 10, maxDelayMs: 2000, // Cap at 2000ms + jitterMs: 0, }; await withRetry(operation, config, "test-operation"); @@ -147,6 +149,45 @@ describe("error_recovery", () => { expect(operation).toHaveBeenCalledTimes(1); expect(shouldRetry).toHaveBeenCalled(); }); + + it("should add random jitter to delay when jitterMs is configured", async () => { + const randomSpy = vi.spyOn(Math, "random").mockReturnValue(0.5); + const operation = vi.fn().mockRejectedValueOnce(new Error("Network timeout")).mockResolvedValue("success"); + + const config = { + maxRetries: 2, + initialDelayMs: 100, + backoffMultiplier: 2, + maxDelayMs: 10000, + jitterMs: 1000, + }; + + await withRetry(operation, config, "test-operation"); + + // Base delay after first failure: 100 * 2 = 200ms + // Jitter: Math.floor(0.5 * 1000) = 500ms + // Total: 200 + 500 = 700ms + expect(core.info).toHaveBeenCalledWith(expect.stringContaining("after 700ms delay")); + + randomSpy.mockRestore(); + }); + + it("should not add jitter when jitterMs is 0", async () => { + const operation = vi.fn().mockRejectedValueOnce(new Error("Network timeout")).mockResolvedValue("success"); + + const config = { + maxRetries: 2, + initialDelayMs: 100, + backoffMultiplier: 2, + maxDelayMs: 10000, + jitterMs: 0, + }; + + await withRetry(operation, config, "test-operation"); + + // Base delay after first failure: 100 * 2 = 200ms, no jitter + expect(core.info).toHaveBeenCalledWith(expect.stringContaining("after 200ms delay")); + }); }); describe("enhanceError", () => { @@ -288,6 +329,7 @@ describe("error_recovery", () => { expect(DEFAULT_RETRY_CONFIG.initialDelayMs).toBe(1000); expect(DEFAULT_RETRY_CONFIG.maxDelayMs).toBe(10000); expect(DEFAULT_RETRY_CONFIG.backoffMultiplier).toBe(2); + expect(DEFAULT_RETRY_CONFIG.jitterMs).toBe(100); expect(DEFAULT_RETRY_CONFIG.shouldRetry).toBe(isTransientError); }); }); diff --git a/cmd/gh-aw/argument_syntax_test.go b/cmd/gh-aw/argument_syntax_test.go index 46a53f957f5..e59a4c940bc 100644 --- a/cmd/gh-aw/argument_syntax_test.go +++ b/cmd/gh-aw/argument_syntax_test.go @@ -63,7 +63,7 @@ func TestArgumentSyntaxConsistency(t *testing.T) { { name: "remove command has optional pattern", command: removeCmd, - expectedUse: "remove [pattern]", + expectedUse: "remove [filter]", argsValidator: "no validator (all optional)", shouldValidate: func(cmd *cobra.Command) error { return nil }, },