Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 12 additions & 6 deletions actions/setup/js/add_labels.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ const { createAuthenticatedGitHubClient } = require("./handler_auth.cjs");
const { resolveRepoIssueTarget, loadTemporaryIdMapFromResolved } = require("./temporary_id.cjs");
const { MAX_LABELS } = require("./constants.cjs");
const { createCountGatedHandler } = require("./handler_scaffold.cjs");
const { withRetry, RATE_LIMIT_RETRY_CONFIG } = require("./error_recovery.cjs");

/**
* Main handler factory for add_labels
Expand Down Expand Up @@ -167,12 +168,17 @@ const main = createCountGatedHandler({
}

try {
await githubClient.rest.issues.addLabels({
owner: repoParts.owner,
repo: repoParts.repo,
issue_number: itemNumber,
labels: uniqueLabels,
});
await withRetry(
() =>
githubClient.rest.issues.addLabels({
owner: repoParts.owner,
repo: repoParts.repo,
issue_number: itemNumber,
labels: uniqueLabels,
}),
RATE_LIMIT_RETRY_CONFIG,
`add_labels to ${contextType} #${itemNumber} in ${itemRepo}`
);

core.info(`Successfully added ${uniqueLabels.length} labels to ${contextType} #${itemNumber} in ${itemRepo}`);
return {
Expand Down
3 changes: 2 additions & 1 deletion actions/setup/js/add_labels.test.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ describe("add_labels", () => {
info: () => {},
warning: () => {},
error: () => {},
debug: () => {},
messages: [],
infos: [],
warnings: [],
Expand Down Expand Up @@ -314,7 +315,7 @@ describe("add_labels", () => {
);

expect(result.success).toBe(false);
expect(result.error.includes("API Error")).toBe(true);
expect(result.error).toContain("API Error: Not found");
});

it("should deduplicate labels", async () => {
Expand Down
4 changes: 2 additions & 2 deletions actions/setup/js/create_issue.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ const { createAuthenticatedGitHubClient } = require("./handler_auth.cjs");
const { removeDuplicateTitleFromDescription } = require("./remove_duplicate_title.cjs");
const { getErrorMessage } = require("./error_helpers.cjs");
const { ERR_VALIDATION } = require("./error_codes.cjs");
const { withRetry } = require("./error_recovery.cjs");
const { withRetry, RATE_LIMIT_RETRY_CONFIG } = require("./error_recovery.cjs");
const { renderTemplateFromFile } = require("./messages_core.cjs");
const { createExpirationLine, addExpirationToFooter } = require("./ephemerals.cjs");
const { MAX_SUB_ISSUES, getSubIssueCount } = require("./sub_issue_helpers.cjs");
Expand Down Expand Up @@ -585,7 +585,7 @@ async function main(config = {}) {
labels,
assignees,
}),
{ initialDelayMs: 15000, maxDelayMs: 45000, jitterMs: 10000 },
RATE_LIMIT_RETRY_CONFIG,
`create_issue in ${qualifiedItemRepo}`
);

Expand Down
95 changes: 93 additions & 2 deletions actions/setup/js/error_recovery.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

const { getErrorMessage } = require("./error_helpers.cjs");
const { ERR_API } = require("./error_codes.cjs");
const { logRetryEvent } = require("./github_rate_limit_logger.cjs");

/**
* Configuration for retry behavior
Expand All @@ -33,6 +34,25 @@ const DEFAULT_RETRY_CONFIG = {
shouldRetry: isTransientError,
};

/**
* Retry configuration for GitHub API rate-limit scenarios.
* Uses longer delays to handle installation token exhaustion during burst windows.
*
* Backoff sequence (approximate — jitter of up to 5 s is added per retry):
* ~30 s → ~60 s → ~120 s → ~240 s → ~240 s (capped)
*
* Note: The first actual retry sleep = initialDelayMs * backoffMultiplier = 15 000 * 2 = 30 000 ms.
* @type {RetryConfig}
*/
const RATE_LIMIT_RETRY_CONFIG = {
maxRetries: 5,
initialDelayMs: 15000, // 15 s × backoffMultiplier(2) = 30 s first retry
maxDelayMs: 240000, // 4-minute cap
backoffMultiplier: 2,
jitterMs: 5000, // Up to 5 s of jitter to spread concurrent retries
shouldRetry: isTransientError,
};

/**
* Determine if an error is transient and worth retrying
* @param {any} error - The error to check
Expand Down Expand Up @@ -80,6 +100,61 @@ function sleep(ms) {
return new Promise(resolve => setTimeout(resolve, ms));
}

/**
* Extract the Retry-After delay in milliseconds from a GitHub API rate-limit error.
*
* Only applies when the response status indicates a rate-limit condition:
* - HTTP 429 (Too Many Requests)
* - HTTP 403 with `x-ratelimit-remaining: 0` (GitHub secondary rate limit)
*
* In those cases GitHub returns one of two headers:
* - `retry-after` – integer seconds to wait (per RFC 6585)
* - `x-ratelimit-reset` – Unix timestamp (seconds) when the quota resets
*
* For any other status (5xx transient errors, etc.) returns null so normal
* exponential backoff applies.
*
* @param {any} error - The error object from a failed GitHub API call
* @returns {number|null} Milliseconds to wait, or null if not a rate-limit response
*/
function getRetryAfterMs(error) {
// Octokit surfaces response headers via error.response.headers or error.headers
const status = error?.response?.status ?? error?.status ?? null;
const headers = error?.response?.headers ?? error?.headers ?? null;
if (!headers) return null;

// Only honour rate-limit headers for genuine rate-limit responses.
// GitHub uses 429 for primary rate limits and 403 for secondary rate limits
// (the latter always sets x-ratelimit-remaining to "0").
const remainingHeader = headers["x-ratelimit-remaining"];
const isRateLimitStatus = status === 429 || (status === 403 && remainingHeader != null && parseInt(remainingHeader, 10) === 0);

if (!isRateLimitStatus) return null;

// retry-after: number of seconds (highest priority)
const retryAfter = headers["retry-after"];
if (retryAfter != null) {
const seconds = parseInt(retryAfter, 10);
if (!isNaN(seconds) && seconds > 0) {
return seconds * 1000;
}
}

// x-ratelimit-reset: Unix timestamp — derive wait time from clock delta
const resetAt = headers["x-ratelimit-reset"];
if (resetAt != null) {
const resetTimestampMs = parseInt(resetAt, 10) * 1000;
if (!isNaN(resetTimestampMs)) {
const waitMs = resetTimestampMs - Date.now();
if (waitMs > 0) {
return waitMs;
}
}
}
Comment on lines +103 to +153

Copilot AI May 1, 2026

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

getRetryAfterMs() falls back to x-ratelimit-reset whenever that header is present. GitHub includes x-ratelimit-reset on most API responses (including non-rate-limit failures like 5xx/502), so withRetry() will start honoring that reset time for unrelated transient errors and effectively override the intended exponential backoff (often immediately capping to maxDelayMs). Consider only using retry-after/x-ratelimit-reset when the response status indicates a rate-limit condition (e.g. 429, or 403 with x-ratelimit-remaining: 0 / message contains "rate limit"), otherwise return null so normal backoff applies.

Copilot uses AI. Check for mistakes.

return null;
}

/**
* Execute an operation with retry logic and exponential backoff
* @template T
Expand All @@ -100,6 +175,7 @@ async function withRetry(operation, config = {}, operationName = "operation") {
const jitter = fullConfig.jitterMs > 0 ? Math.floor(Math.random() * fullConfig.jitterMs) : 0;
const delayWithJitter = delay + jitter;
core.info(`Retry attempt ${attempt}/${fullConfig.maxRetries} for ${operationName} after ${delayWithJitter}ms delay`);
logRetryEvent(lastError, operationName, attempt, delayWithJitter);
await sleep(delayWithJitter);
}

Expand Down Expand Up @@ -140,8 +216,21 @@ async function withRetry(operation, config = {}, operationName = "operation") {
// Log the retry attempt
core.warning(`${operationName} failed (attempt ${attempt + 1}/${fullConfig.maxRetries + 1}): ${errorMsg}`);

// Calculate next delay with exponential backoff
delay = Math.min(delay * fullConfig.backoffMultiplier, fullConfig.maxDelayMs);
// Calculate next delay: honour Retry-After header when present, otherwise
// use exponential backoff. Either way the result is capped at maxDelayMs.
const retryAfterMs = getRetryAfterMs(error);
if (retryAfterMs !== null) {
const cappedDelay = Math.min(retryAfterMs, fullConfig.maxDelayMs);
if (cappedDelay < retryAfterMs) {
core.info(`Retry-After header detected for ${operationName}: server requested ${retryAfterMs}ms wait, capped to ${cappedDelay}ms (maxDelayMs)`);
} else {
core.info(`Retry-After header detected for ${operationName}: next retry will wait ${cappedDelay}ms`);
}
delay = cappedDelay;
} else {
// Calculate next delay with exponential backoff
delay = Math.min(delay * fullConfig.backoffMultiplier, fullConfig.maxDelayMs);
}
}
}

Expand Down Expand Up @@ -266,8 +355,10 @@ module.exports = {
withRetry,
sleep,
isTransientError,
getRetryAfterMs,
enhanceError,
createValidationError,
createOperationError,
DEFAULT_RETRY_CONFIG,
RATE_LIMIT_RETRY_CONFIG,
};
165 changes: 163 additions & 2 deletions actions/setup/js/error_recovery.test.cjs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
// @ts-check

import { describe, it, expect, beforeEach, vi } from "vitest";
import { describe, it, expect, beforeEach, afterEach, vi } from "vitest";
import fs from "fs";

// Mock @actions/core
global.core = {
Expand All @@ -10,7 +11,7 @@ global.core = {
debug: vi.fn(),
};

import { withRetry, isTransientError, enhanceError, createValidationError, createOperationError, DEFAULT_RETRY_CONFIG } from "./error_recovery.cjs";
import { withRetry, isTransientError, getRetryAfterMs, enhanceError, createValidationError, createOperationError, DEFAULT_RETRY_CONFIG, RATE_LIMIT_RETRY_CONFIG } from "./error_recovery.cjs";

describe("error_recovery", () => {
beforeEach(() => {
Expand Down Expand Up @@ -333,4 +334,164 @@ describe("error_recovery", () => {
expect(DEFAULT_RETRY_CONFIG.shouldRetry).toBe(isTransientError);
});
});

describe("RATE_LIMIT_RETRY_CONFIG", () => {
it("should have 5 retries", () => {
expect(RATE_LIMIT_RETRY_CONFIG.maxRetries).toBe(5);
});

it("should have initialDelayMs producing 30s first retry sleep", () => {
// First retry sleep = initialDelayMs * backoffMultiplier
const firstRetrySleep = RATE_LIMIT_RETRY_CONFIG.initialDelayMs * RATE_LIMIT_RETRY_CONFIG.backoffMultiplier;
expect(firstRetrySleep).toBe(30000);
});

it("should cap delay at 240s", () => {
expect(RATE_LIMIT_RETRY_CONFIG.maxDelayMs).toBe(240000);
});

it("should use isTransientError as shouldRetry", () => {
expect(RATE_LIMIT_RETRY_CONFIG.shouldRetry).toBe(isTransientError);
});
});

describe("getRetryAfterMs", () => {
it("should return null when error has no response headers", () => {
expect(getRetryAfterMs(new Error("Some error"))).toBeNull();
expect(getRetryAfterMs(null)).toBeNull();
expect(getRetryAfterMs(undefined)).toBeNull();
});

it("should return null when status is not a rate-limit status", () => {
// 5xx errors should not use Retry-After from x-ratelimit-reset
const error502 = { response: { status: 502, headers: { "x-ratelimit-reset": String(Math.floor((Date.now() + 120000) / 1000)) } } };
expect(getRetryAfterMs(error502)).toBeNull();
// 403 without x-ratelimit-remaining: 0 is not a rate-limit response
const error403 = { response: { status: 403, headers: { "retry-after": "60", "x-ratelimit-remaining": "100" } } };
expect(getRetryAfterMs(error403)).toBeNull();
});

it("should extract retry-after seconds from response headers on 429", () => {
const error = { response: { status: 429, headers: { "retry-after": "60" } } };
expect(getRetryAfterMs(error)).toBe(60000);
});

it("should extract retry-after seconds from top-level headers on 429", () => {
const error = { status: 429, headers: { "retry-after": "30" } };
expect(getRetryAfterMs(error)).toBe(30000);
});

it("should prefer response.headers over top-level headers on 429", () => {
const error = {
response: { status: 429, headers: { "retry-after": "60" } },
headers: { "retry-after": "30" },
};
expect(getRetryAfterMs(error)).toBe(60000);
});

it("should return null for zero or negative retry-after on 429", () => {
expect(getRetryAfterMs({ response: { status: 429, headers: { "retry-after": "0" } } })).toBeNull();
expect(getRetryAfterMs({ response: { status: 429, headers: { "retry-after": "-1" } } })).toBeNull();
});

it("should fall back to x-ratelimit-reset when retry-after is absent on 429", () => {
const futureTimestamp = Math.floor((Date.now() + 120000) / 1000); // 2 min from now
const error = { response: { status: 429, headers: { "x-ratelimit-reset": String(futureTimestamp) } } };
const result = getRetryAfterMs(error);
// Should be roughly 120s (allow ±5s for test timing)
expect(result).toBeGreaterThan(115000);
expect(result).toBeLessThan(125000);
});

it("should use x-ratelimit-reset for 403 with x-ratelimit-remaining: 0 (secondary rate limit)", () => {
const futureTimestamp = Math.floor((Date.now() + 60000) / 1000); // 1 min from now
const error = {
response: {
status: 403,
headers: { "x-ratelimit-remaining": "0", "x-ratelimit-reset": String(futureTimestamp) },
},
};
const result = getRetryAfterMs(error);
expect(result).toBeGreaterThan(55000);
expect(result).toBeLessThan(65000);
});

it("should return null when x-ratelimit-reset is in the past on 429", () => {
const pastTimestamp = Math.floor((Date.now() - 60000) / 1000);
const error = { response: { status: 429, headers: { "x-ratelimit-reset": String(pastTimestamp) } } };
expect(getRetryAfterMs(error)).toBeNull();
});

it("should return null for non-numeric retry-after on 429", () => {
const error = { response: { status: 429, headers: { "retry-after": "not-a-number" } } };
expect(getRetryAfterMs(error)).toBeNull();
});
});

describe("withRetry with Retry-After header", () => {
let appendSpy, existsSpy, mkdirSpy;

beforeEach(() => {
existsSpy = vi.spyOn(fs, "existsSync").mockReturnValue(true);
mkdirSpy = vi.spyOn(fs, "mkdirSync").mockImplementation(() => undefined);
appendSpy = vi.spyOn(fs, "appendFileSync").mockImplementation(() => undefined);
});

afterEach(() => {
existsSpy.mockRestore();
mkdirSpy.mockRestore();
appendSpy.mockRestore();
});

it("should use Retry-After delay instead of backoff when header is present on 429", async () => {
const retryAfterError = {
message: "rate limit exceeded",
response: { status: 429, headers: { "retry-after": "1" } }, // 1s for test speed
};
const operation = vi.fn().mockRejectedValueOnce(retryAfterError).mockResolvedValue("success");

const result = await withRetry(operation, { maxRetries: 2, initialDelayMs: 10, backoffMultiplier: 2, jitterMs: 0 }, "test-operation");

expect(result).toBe("success");
// The delay used should be 1000ms (from Retry-After: 1) rather than 20ms (10 * 2)
expect(core.info).toHaveBeenCalledWith(expect.stringContaining("Retry-After header detected for test-operation: next retry will wait 1000ms"));
});

it("should fall back to exponential backoff for non-rate-limit errors (502)", async () => {
const transientError = {
message: "502 bad gateway",
response: { status: 502, headers: { "x-ratelimit-reset": String(Math.floor((Date.now() + 120000) / 1000)) } },
};
const operation = vi.fn().mockRejectedValueOnce(transientError).mockResolvedValue("success");

const result = await withRetry(operation, { maxRetries: 2, initialDelayMs: 10, backoffMultiplier: 2, jitterMs: 0 }, "test-operation");

expect(result).toBe("success");
// Normal backoff: 10 * 2 = 20ms — NOT the 120s reset header
expect(core.info).toHaveBeenCalledWith(expect.stringContaining("after 20ms delay"));
expect(core.info).not.toHaveBeenCalledWith(expect.stringContaining("Retry-After header detected"));
});

it("should write a JSONL retry entry to the rate-limit log file on each retry", async () => {
const error = {
message: "rate limit exceeded",
response: {
status: 429,
headers: { "x-ratelimit-remaining": "0", "x-ratelimit-limit": "5000", "retry-after": "1" },
},
};
const operation = vi.fn().mockRejectedValueOnce(error).mockResolvedValue("ok");

await withRetry(operation, { maxRetries: 2, initialDelayMs: 10, backoffMultiplier: 2, jitterMs: 0 }, "test-log-op");

// appendFileSync should have been called once (one retry)
expect(appendSpy).toHaveBeenCalledOnce();
const entry = JSON.parse(appendSpy.mock.calls[0][1].trimEnd());
expect(entry.source).toBe("retry");
expect(entry.operation).toBe("test-log-op");
expect(entry.attempt).toBe(1);
expect(entry.status).toBe(429);
expect(entry.remaining).toBe(0);
});
});
});
Loading