From 4fc0f3e51b739c970ee3c39537a8a1518eb926dc Mon Sep 17 00:00:00 2001 From: Henry Mercer Date: Mon, 18 May 2026 10:38:00 +0100 Subject: [PATCH 1/7] Add a PR check that comments on significant repo size changes --- .github/workflows/check-repo-size.yml | 53 ++++ package-lock.json | 2 + pr-checks/check-repo-size.test.ts | 334 ++++++++++++++++++++++++++ pr-checks/check-repo-size.ts | 327 +++++++++++++++++++++++++ pr-checks/package.json | 2 + 5 files changed, 718 insertions(+) create mode 100644 .github/workflows/check-repo-size.yml create mode 100644 pr-checks/check-repo-size.test.ts create mode 100644 pr-checks/check-repo-size.ts diff --git a/.github/workflows/check-repo-size.yml b/.github/workflows/check-repo-size.yml new file mode 100644 index 0000000000..9668ad232a --- /dev/null +++ b/.github/workflows/check-repo-size.yml @@ -0,0 +1,53 @@ +name: Check repo size + +on: + pull_request: + types: [opened, synchronize, reopened] + +defaults: + run: + shell: bash + +permissions: + contents: read + pull-requests: write + +jobs: + check-repo-size: + name: Check repo size + runs-on: ubuntu-slim + # PRs from forks (and Dependabot, which behaves like a fork) get a + # read-only GITHUB_TOKEN that can't post comments, so the job would only + # ever fail. Skip them. + if: >- + github.event.pull_request.head.repo.full_name == github.repository && + github.triggering_actor != 'dependabot[bot]' + timeout-minutes: 10 + + steps: + - name: Checkout repository + uses: actions/checkout@v6 + with: + # Need full history so we have both the PR merge commit (HEAD) and + # the base ref locally for `git archive` to work against either. + fetch-depth: 0 + + - name: Set up Node.js + uses: actions/setup-node@v6 + with: + node-version: 24 + cache: 'npm' + + - name: Install pr-checks dependencies + working-directory: pr-checks + run: npm ci + + - name: Check repo size + working-directory: pr-checks + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + BASE_REF: ${{ github.event.pull_request.base.ref }} + PR_NUMBER: ${{ github.event.pull_request.number }} + GITHUB_REPOSITORY: ${{ github.repository }} + RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} + run: npx tsx check-repo-size.ts diff --git a/package-lock.json b/package-lock.json index 130ed3da1e..ff0319736f 100644 --- a/package-lock.json +++ b/package-lock.json @@ -10408,6 +10408,8 @@ }, "devDependencies": { "@types/node": "^20.19.39", + "@types/sinon": "^21.0.1", + "sinon": "^21.1.2", "tsx": "^4.21.0" } } diff --git a/pr-checks/check-repo-size.test.ts b/pr-checks/check-repo-size.test.ts new file mode 100644 index 0000000000..dcdb821b70 --- /dev/null +++ b/pr-checks/check-repo-size.test.ts @@ -0,0 +1,334 @@ +import * as assert from "node:assert/strict"; +import { execFileSync } from "node:child_process"; +import { randomBytes } from "node:crypto"; +import * as fs from "node:fs"; +import * as os from "node:os"; +import * as path from "node:path"; +import { afterEach, beforeEach, describe, it } from "node:test"; + +import { getOctokit } from "@actions/github"; +import * as sinon from "sinon"; + +import { + COMMENT_MARKER, + buildCommentBody, + formatBytes, + formatPercent, + isDeltaSignificant, + measureArchiveSize, + upsertSizeComment, +} from "./check-repo-size"; + +describe("formatBytes", async () => { + const cases: Array<[number, boolean, string]> = [ + // Unsigned: bytes / KiB / MiB boundaries. + [0, false, "0 B"], + [1, false, "1 B"], + [1023, false, "1023 B"], + [1024, false, "1.00 KiB"], + [2048, false, "2.00 KiB"], + [1024 * 1024 - 1, false, "1024.00 KiB"], + [1024 * 1024, false, "1.00 MiB"], + [2.5 * 1024 * 1024, false, "2.50 MiB"], + // Negative values always use a leading minus. + [-512, false, "-512 B"], + [-2048, false, "-2.00 KiB"], + [-2 * 1024 * 1024, false, "-2.00 MiB"], + // signed=true prepends a + to non-negative values. + [0, true, "+0 B"], + [512, true, "+512 B"], + [2048, true, "+2.00 KiB"], + [-512, true, "-512 B"], + ]; + for (const [bytes, signed, expected] of cases) { + await it(`formats ${bytes} (signed=${signed}) as ${expected}`, () => { + assert.equal(formatBytes(bytes, signed), expected); + }); + } +}); + +describe("formatPercent", async () => { + await it("formats positive fractions with a leading +", () => { + assert.equal(formatPercent(0.1), "+10.00%"); + assert.equal(formatPercent(0.0123), "+1.23%"); + }); + + await it("formats negative fractions with a leading -", () => { + assert.equal(formatPercent(-0.1), "-10.00%"); + }); + + await it("formats zero without a sign", () => { + assert.equal(formatPercent(0), "0.00%"); + }); +}); + +describe("isDeltaSignificant", async () => { + const cases: Array<[number, number, number, boolean]> = [ + // At and above threshold (both signs). + [100, 1000, 0.1, true], + [101, 1000, 0.1, true], + [-100, 1000, 0.1, true], + // Below threshold (both signs, plus exact zero). + [99, 1000, 0.1, false], + [-99, 1000, 0.1, false], + [0, 1000, 0.1, false], + ]; + for (const [delta, base, fraction, expected] of cases) { + await it(`returns ${expected} for delta=${delta}, base=${base}, fraction=${fraction}`, () => { + assert.equal(isDeltaSignificant(delta, base, fraction), expected); + }); + } +}); + +describe("buildCommentBody", async () => { + await it("includes the marker, the base/PR/delta rows, and the run URL", () => { + const body = buildCommentBody({ + baseRef: "main", + baseSize: 2_000_000, + prSize: 2_300_000, + runUrl: "https://example.test/run", + }); + + assert.match(body, new RegExp(`^${escapeRegExp(COMMENT_MARKER)}`)); + assert.match(body, /Base \(`main`\) \| 1\.91 MiB \(2000000 bytes\)/); + assert.match(body, /This PR \| 2\.19 MiB \(2300000 bytes\)/); + assert.match( + body, + /\*\*Delta\*\* \| \*\*\+292\.97 KiB \(\+300000 bytes, \+15\.00%\)\*\*/, + ); + assert.match(body, /\[workflow run\]\(https:\/\/example\.test\/run\)/); + }); + + await it("formats negative deltas with a leading minus and omits the run URL when missing", () => { + const body = buildCommentBody({ + baseRef: "main", + baseSize: 2_000_000, + prSize: 1_800_000, + }); + assert.match( + body, + /\*\*Delta\*\* \| \*\*-195\.31 KiB \(-200000 bytes, -10\.00%\)\*\*/, + ); + assert.doesNotMatch(body, /workflow run/); + }); +}); + +let repoDir: string; + +beforeEach(() => { + repoDir = fs.mkdtempSync(path.join(os.tmpdir(), "check-repo-size-test-")); + execFileSync("git", ["init", "--initial-branch=main", "-q"], { + cwd: repoDir, + }); + execFileSync("git", ["config", "user.email", "test@example.test"], { + cwd: repoDir, + }); + execFileSync("git", ["config", "user.name", "Test"], { cwd: repoDir }); + execFileSync("git", ["config", "commit.gpgsign", "false"], { cwd: repoDir }); +}); + +afterEach(() => { + fs.rmSync(repoDir, { recursive: true, force: true }); +}); + +function commit(name: string, content: string, message: string) { + fs.writeFileSync(path.join(repoDir, name), content); + execFileSync("git", ["add", name], { cwd: repoDir }); + execFileSync("git", ["commit", "-q", "-m", message], { cwd: repoDir }); +} + +describe("measureArchiveSize", async () => { + await it("returns a positive byte count for a non-empty repo", async () => { + commit("a.txt", "hello world\n", "first"); + const size = await measureArchiveSize("HEAD", repoDir); + assert.ok(size > 0, `expected size > 0, got ${size}`); + }); + + await it("returns the same size on repeated runs (deterministic)", async () => { + commit("a.txt", "hello world\n", "first"); + const a = await measureArchiveSize("HEAD", repoDir); + const b = await measureArchiveSize("HEAD", repoDir); + assert.equal(a, b); + }); + + await it("returns a larger size when more content is added", async () => { + commit("a.txt", "hello world\n", "first"); + const small = await measureArchiveSize("HEAD", repoDir); + + // Use random bytes so the new content is incompressible and the archive + // is guaranteed to grow even after gzip. + commit("b.bin", randomBytes(8192).toString("base64"), "second"); + const big = await measureArchiveSize("HEAD", repoDir); + assert.ok( + big > small, + `expected ${big} > ${small} after adding more content`, + ); + }); + + await it("ignores untracked files (e.g. node_modules)", async () => { + commit("a.txt", "hello\n", "first"); + commit(".gitignore", "node_modules/\n", "ignore node_modules"); + const sizeBefore = await measureArchiveSize("HEAD", repoDir); + + fs.mkdirSync(path.join(repoDir, "node_modules")); + fs.writeFileSync( + path.join(repoDir, "node_modules", "huge.bin"), + "x".repeat(1_000_000), + ); + + const sizeAfter = await measureArchiveSize("HEAD", repoDir); + assert.equal( + sizeAfter, + sizeBefore, + "untracked node_modules should not affect the archive size", + ); + }); + + await it("rejects when the ref does not exist", async () => { + commit("a.txt", "hello\n", "first"); + await assert.rejects( + () => measureArchiveSize("does-not-exist", repoDir), + /git archive does-not-exist exited with code/, + ); + }); +}); + +describe("upsertSizeComment", async () => { + const owner = "test-owner"; + const repo = "test-repo"; + const prNumber = 42; + + let octokit: ReturnType; + + beforeEach(() => { + octokit = getOctokit("test-token"); + }); + + afterEach(() => { + sinon.restore(); + }); + + function stubExistingComments(comments: Array<{ id: number; body: string }>) { + // upsertSizeComment calls `octokit.paginate(octokit.rest.issues.listComments, ...)`, + // so stubbing `paginate` directly mocks the listing without depending on how + // paginate walks Octokit's response (link headers etc.). + return sinon.stub(octokit, "paginate").resolves(comments); + } + + await it("creates a new comment when none exists and the delta is significant", async () => { + stubExistingComments([]); + const createStub = sinon + .stub(octokit.rest.issues, "createComment") + .resolves({ data: { id: 999 } } as never); + + const result = await upsertSizeComment({ + octokit, + owner, + repo, + prNumber, + body: `${COMMENT_MARKER}\nhello`, + delta: 200, + baseSize: 1000, + }); + + assert.deepEqual(result, { action: "created", commentId: 999 }); + sinon.assert.calledOnce(createStub); + const createArgs = createStub.firstCall.args[0]!; + assert.equal(createArgs.owner, owner); + assert.equal(createArgs.repo, repo); + assert.equal(createArgs.issue_number, prNumber); + assert.ok(createArgs.body.includes(COMMENT_MARKER)); + }); + + await it("creates a new comment for a significant size decrease", async () => { + // Shrinkage matters too: it might indicate accidentally deleted tracked + // files. The full pipeline (not just isDeltaSignificant) needs to post on + // negative deltas. + stubExistingComments([]); + const createStub = sinon + .stub(octokit.rest.issues, "createComment") + .resolves({ data: { id: 999 } } as never); + + const result = await upsertSizeComment({ + octokit, + owner, + repo, + prNumber, + body: `${COMMENT_MARKER}\nhello`, + delta: -200, + baseSize: 1000, + }); + + assert.deepEqual(result, { action: "created", commentId: 999 }); + sinon.assert.calledOnce(createStub); + }); + + await it("skips when no existing comment and delta is below threshold", async () => { + stubExistingComments([]); + const createStub = sinon.stub(octokit.rest.issues, "createComment"); + const updateStub = sinon.stub(octokit.rest.issues, "updateComment"); + + const result = await upsertSizeComment({ + octokit, + owner, + repo, + prNumber, + body: `${COMMENT_MARKER}\nhello`, + delta: 50, + baseSize: 1000, + }); + + assert.equal(result.action, "skipped"); + sinon.assert.notCalled(createStub); + sinon.assert.notCalled(updateStub); + }); + + await it("updates the existing comment when the delta is significant", async () => { + stubExistingComments([{ id: 7, body: `${COMMENT_MARKER}\nold body` }]); + const updateStub = sinon + .stub(octokit.rest.issues, "updateComment") + .resolves({ data: { id: 7 } } as never); + + const result = await upsertSizeComment({ + octokit, + owner, + repo, + prNumber, + body: `${COMMENT_MARKER}\nnew body`, + delta: 200, + baseSize: 1000, + }); + + assert.deepEqual(result, { action: "updated", commentId: 7 }); + sinon.assert.calledOnce(updateStub); + const updateArgs = updateStub.firstCall.args[0]!; + assert.equal(updateArgs.comment_id, 7); + assert.ok(updateArgs.body.includes("new body")); + }); + + await it("updates an existing comment even when the delta is below threshold", async () => { + // This keeps the comment in sync after a PR that initially had a big diff + // gets reduced below the threshold by a follow-up commit. + stubExistingComments([{ id: 7, body: `${COMMENT_MARKER}\nold body` }]); + const updateStub = sinon + .stub(octokit.rest.issues, "updateComment") + .resolves({ data: { id: 7 } } as never); + + const result = await upsertSizeComment({ + octokit, + owner, + repo, + prNumber, + body: `${COMMENT_MARKER}\nnew body`, + delta: 1, + baseSize: 1000, + }); + + assert.deepEqual(result, { action: "updated", commentId: 7 }); + sinon.assert.calledOnce(updateStub); + }); +}); + +function escapeRegExp(s: string): string { + return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); +} diff --git a/pr-checks/check-repo-size.ts b/pr-checks/check-repo-size.ts new file mode 100644 index 0000000000..485dfdf94c --- /dev/null +++ b/pr-checks/check-repo-size.ts @@ -0,0 +1,327 @@ +#!/usr/bin/env npx tsx + +/* +Computes the difference in the `.tar.gz`'d checkout size of the repo between the PR head and the PR +base, and posts/updates a sticky comment on the PR when the change is significant in either +direction. This size is relevant because it corresponds to the duration of the "Download action +repository" step that happens at the start of every job that uses this Action. + +Designed to be invoked from the `Check repo size` workflow on PR events, but also runnable locally +(with --dry-run) for testing. +*/ + +import { spawn } from "node:child_process"; +import * as path from "node:path"; +import { parseArgs } from "node:util"; + +import { getOctokit } from "@actions/github"; + +/** Hidden marker used to find the existing sticky comment on a PR. */ +export const COMMENT_MARKER = ""; + +export const DEFAULT_BASE_REF = "main"; +export const DEFAULT_REPOSITORY = "github/codeql-action"; + +/** + * Fraction of the base archive size at which a delta is considered + * significant enough to warrant a new sticky comment. We always update an + * existing comment regardless, so the comment stays in sync as the diff + * evolves. + */ +export const SIGNIFICANT_DELTA_FRACTION = 0.1; + +export type Octokit = ReturnType; + +/** + * Stream `git archive --format=tar.gz ` and count the compressed bytes. + * + * `git archive` only includes tracked files, so we will ignore directories like `node_modules` and + * `build` that aren't downloaded when starting up a CodeQL job. + */ +export async function measureArchiveSize( + ref: string, + cwd: string, +): Promise { + const git = spawn("git", ["archive", "--format=tar.gz", ref], { cwd }); + + let stderr = ""; + git.stderr.on("data", (chunk: Buffer) => { + stderr += chunk.toString(); + }); + + let size = 0; + git.stdout.on("data", (chunk: Buffer) => { + size += chunk.length; + }); + + const exitCode = await new Promise((resolve, reject) => { + git.on("error", reject); + git.on("close", resolve); + }); + + if (exitCode !== 0) { + throw new Error( + `git archive ${ref} exited with code ${exitCode}: ${stderr.trim()}`, + ); + } + return size; +} + +/** + * Format a byte count into a human-readable string with binary units. If + * `signed` is true, a leading `+` is prepended for non-negative values so + * gains and losses are visually distinct. + */ +export function formatBytes(bytes: number, signed = false): string { + const sign = bytes < 0 ? "-" : signed ? "+" : ""; + const abs = Math.abs(bytes); + if (abs < 1024) return `${sign}${abs} B`; + if (abs < 1024 * 1024) return `${sign}${(abs / 1024).toFixed(2)} KiB`; + return `${sign}${(abs / 1024 / 1024).toFixed(2)} MiB`; +} + +/** Format a fraction as a signed percentage with 2 decimal places. */ +export function formatPercent(fraction: number): string { + const pct = fraction * 100; + const sign = pct > 0 ? "+" : ""; + return `${sign}${pct.toFixed(2)}%`; +} + +export interface CommentBodyOptions { + baseRef: string; + baseSize: number; + prSize: number; + /** Optional URL of the workflow run, included in the comment footer. */ + runUrl?: string; +} + +export function buildCommentBody(opts: CommentBodyOptions): string { + const { baseRef, baseSize, prSize, runUrl } = opts; + const delta = prSize - baseSize; + const signedDelta = delta >= 0 ? `+${delta}` : `${delta}`; + const runUrlLine = runUrl + ? ` See the [workflow run](${runUrl}) for details.` + : ""; + + return [ + COMMENT_MARKER, + "### Repository checkout size", + "", + "| | Compressed archive size |", + "|---|---|", + `| Base (\`${baseRef}\`) | ${formatBytes(baseSize)} (${baseSize} bytes) |`, + `| This PR | ${formatBytes(prSize)} (${prSize} bytes) |`, + `| **Delta** | **${formatBytes(delta, true)} (${signedDelta} bytes, ${formatPercent(delta / baseSize)})** |`, + "", + "Sizes are measured by streaming `git archive --format=tar.gz `, " + + "which includes all tracked files and excludes `node_modules` and " + + "other untracked or git-ignored files. The compressed checkout is " + + "downloaded by every consumer of this Action, so changes here directly " + + `affect Action download time.${runUrlLine}`, + ].join("\n"); +} + +/** + * Returns true when the absolute delta is at least `fraction` of the base size. Both increases and + * decreases are considered significant, so we report wins as well as losses. + */ +export function isDeltaSignificant( + delta: number, + baseSize: number, + fraction: number, +): boolean { + return Math.abs(delta) >= baseSize * fraction; +} + +export interface UpsertOptions { + octokit: Octokit; + owner: string; + repo: string; + prNumber: number; + body: string; + delta: number; + baseSize: number; +} + +export type UpsertResult = + | { action: "updated"; commentId: number } + | { action: "created"; commentId: number } + | { action: "skipped"; reason: string }; + +/** + * Find an existing sticky comment on the PR by HTML marker. If one exists, + * always update it (so it stays in sync). Otherwise, only create a new + * comment when the delta is currently significant. + */ +export async function upsertSizeComment( + opts: UpsertOptions, +): Promise { + const { octokit, owner, repo, prNumber, body, delta, baseSize } = opts; + + const comments = await octokit.paginate(octokit.rest.issues.listComments, { + owner, + repo, + issue_number: prNumber, + per_page: 100, + }); + const existing = comments.find((c) => + (c.body ?? "").includes(COMMENT_MARKER), + ); + + if (existing) { + await octokit.rest.issues.updateComment({ + owner, + repo, + comment_id: existing.id, + body, + }); + return { action: "updated", commentId: existing.id }; + } + + if (isDeltaSignificant(delta, baseSize, SIGNIFICANT_DELTA_FRACTION)) { + const { data } = await octokit.rest.issues.createComment({ + owner, + repo, + issue_number: prNumber, + body, + }); + return { action: "created", commentId: data.id }; + } + + return { + action: "skipped", + reason: + `delta ${delta} bytes is below ` + + `${(SIGNIFICANT_DELTA_FRACTION * 100).toFixed(2)}% of base size ` + + `${baseSize} bytes`, + }; +} + +interface MainArgs { + /** Base ref of the PR. Defaults to `main`, and is prefixed with `origin/` when passed to git. */ + baseRef: string; + /** Numeric PR number used to find / create / update the sticky comment. */ + prNumber: number; + /** `owner/repo` slug, defaulting to `github/codeql-action`, split before being passed to Octokit. */ + ownerRepo: string; + /** Optional URL of the workflow run, surfaced in the comment footer. */ + runUrl?: string; + /** When true, log the would-be comment instead of calling GitHub. */ + dryRun: boolean; + /** GitHub token used to authenticate Octokit. Required unless `dryRun` is true. */ + token?: string; +} + +export function readArgs(): MainArgs { + const { values } = parseArgs({ + options: { + "dry-run": { type: "boolean", default: false }, + }, + strict: true, + }); + + const baseRef = process.env.BASE_REF ?? DEFAULT_BASE_REF; + const prNumberStr = process.env.PR_NUMBER; + const repo = process.env.GITHUB_REPOSITORY ?? DEFAULT_REPOSITORY; + + if (!prNumberStr) throw new Error("Missing PR_NUMBER env var"); + + const prNumber = Number.parseInt(prNumberStr, 10); + if (!Number.isFinite(prNumber)) { + throw new Error(`Invalid PR_NUMBER value: ${prNumberStr}`); + } + + return { + baseRef, + prNumber, + ownerRepo: repo, + runUrl: process.env.RUN_URL, + dryRun: values["dry-run"] ?? false, + token: process.env.GITHUB_TOKEN, + }; +} + +async function main(): Promise { + const args = readArgs(); + + // The script lives at `/pr-checks/check-repo-size.ts`, so the repo + // root is always the parent directory. + const repoRoot = path.resolve(__dirname, ".."); + + console.log(`Measuring base archive size for origin/${args.baseRef}...`); + const baseSize = await measureArchiveSize(`origin/${args.baseRef}`, repoRoot); + console.log(` ${baseSize} bytes`); + + console.log("Measuring PR archive size for HEAD..."); + const prSize = await measureArchiveSize("HEAD", repoRoot); + console.log(` ${prSize} bytes`); + + const delta = prSize - baseSize; + console.log(`Delta: ${delta} bytes`); + + const body = buildCommentBody({ + baseRef: args.baseRef, + baseSize, + prSize, + runUrl: args.runUrl, + }); + + if (args.dryRun) { + const significant = isDeltaSignificant( + delta, + baseSize, + SIGNIFICANT_DELTA_FRACTION, + ); + console.log( + `--dry-run: significant=${significant} (threshold ${( + SIGNIFICANT_DELTA_FRACTION * 100 + ).toFixed(2)}%); would post:\n${body}`, + ); + return 0; + } + + if (!args.token) { + throw new Error( + "GITHUB_TOKEN env var is required when not running with --dry-run", + ); + } + + const [owner, repo] = args.ownerRepo.split("/"); + if (!owner || !repo) { + throw new Error(`Invalid GITHUB_REPOSITORY value: ${args.ownerRepo}`); + } + + const result = await upsertSizeComment({ + octokit: getOctokit(args.token), + owner, + repo, + prNumber: args.prNumber, + body, + delta, + baseSize, + }); + + switch (result.action) { + case "updated": + console.log(`Updated existing comment ${result.commentId}.`); + break; + case "created": + console.log(`Created new comment ${result.commentId}.`); + break; + case "skipped": + console.log(`Skipped commenting: ${result.reason}.`); + break; + } + return 0; +} + +if (require.main === module) { + void (async () => { + try { + process.exit(await main()); + } catch (err) { + console.error(err instanceof Error ? err.message : String(err)); + process.exit(1); + } + })(); +} diff --git a/pr-checks/package.json b/pr-checks/package.json index 2741560f68..ff4b45238e 100644 --- a/pr-checks/package.json +++ b/pr-checks/package.json @@ -11,6 +11,8 @@ }, "devDependencies": { "@types/node": "^20.19.39", + "@types/sinon": "^21.0.1", + "sinon": "^21.1.2", "tsx": "^4.21.0" } } From 6f8805e224af921e4c2ccaf903943f0a0b111e53 Mon Sep 17 00:00:00 2001 From: Henry Mercer Date: Mon, 18 May 2026 17:15:30 +0100 Subject: [PATCH 2/7] Default setup env vars: Restrict results to `src` --- queries/default-setup-environment-variables.ql | 1 + 1 file changed, 1 insertion(+) diff --git a/queries/default-setup-environment-variables.ql b/queries/default-setup-environment-variables.ql index e456409419..9f677dfb9b 100644 --- a/queries/default-setup-environment-variables.ql +++ b/queries/default-setup-environment-variables.ql @@ -43,6 +43,7 @@ predicate envVarRead(DataFlow::Node node, string envVar) { from DataFlow::Node read, string envVar where envVarRead(read, envVar) and + read.getFile().getRelativePath().matches("src/%") and not read.getFile().getBaseName().matches("%.test.ts") and not isSafeForDefaultSetup(envVar) select read, From bcffb2b658dd0a38f8229b2887dbe454044f93a9 Mon Sep 17 00:00:00 2001 From: Henry Mercer Date: Mon, 18 May 2026 17:33:45 +0100 Subject: [PATCH 3/7] Unify checks into a single job --- .github/workflows/check-repo-size.yml | 53 ------------------------ .github/workflows/pr-checks.yml | 58 +++++++++++++-------------- 2 files changed, 29 insertions(+), 82 deletions(-) delete mode 100644 .github/workflows/check-repo-size.yml diff --git a/.github/workflows/check-repo-size.yml b/.github/workflows/check-repo-size.yml deleted file mode 100644 index 9668ad232a..0000000000 --- a/.github/workflows/check-repo-size.yml +++ /dev/null @@ -1,53 +0,0 @@ -name: Check repo size - -on: - pull_request: - types: [opened, synchronize, reopened] - -defaults: - run: - shell: bash - -permissions: - contents: read - pull-requests: write - -jobs: - check-repo-size: - name: Check repo size - runs-on: ubuntu-slim - # PRs from forks (and Dependabot, which behaves like a fork) get a - # read-only GITHUB_TOKEN that can't post comments, so the job would only - # ever fail. Skip them. - if: >- - github.event.pull_request.head.repo.full_name == github.repository && - github.triggering_actor != 'dependabot[bot]' - timeout-minutes: 10 - - steps: - - name: Checkout repository - uses: actions/checkout@v6 - with: - # Need full history so we have both the PR merge commit (HEAD) and - # the base ref locally for `git archive` to work against either. - fetch-depth: 0 - - - name: Set up Node.js - uses: actions/setup-node@v6 - with: - node-version: 24 - cache: 'npm' - - - name: Install pr-checks dependencies - working-directory: pr-checks - run: npm ci - - - name: Check repo size - working-directory: pr-checks - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - BASE_REF: ${{ github.event.pull_request.base.ref }} - PR_NUMBER: ${{ github.event.pull_request.number }} - GITHUB_REPOSITORY: ${{ github.repository }} - RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} - run: npx tsx check-repo-size.ts diff --git a/.github/workflows/pr-checks.yml b/.github/workflows/pr-checks.yml index f240997030..99940cddfd 100644 --- a/.github/workflows/pr-checks.yml +++ b/.github/workflows/pr-checks.yml @@ -67,24 +67,24 @@ jobs: sarif_file: eslint.sarif category: eslint - # Verifying the PR checks are up-to-date requires Node 24. The PR checks are not dependent - # on the main codebase and therefore do not need to be run as part of the same matrix that - # we use for the `unit-tests` job. - verify-pr-checks: - name: Verify PR checks + # These checks do not need to be run as part of the same matrix that we use for the `unit-tests` + # job. + pr-checks: + name: PR checks if: github.triggering_actor != 'dependabot[bot]' permissions: contents: read + pull-requests: write runs-on: ubuntu-slim timeout-minutes: 10 steps: - - name: Prepare git (Windows) - if: runner.os == 'Windows' - run: git config --global core.autocrlf false - - name: Checkout repository uses: actions/checkout@v6 + with: + # Need full history so we have both the PR merge commit (HEAD) and the base SHA locally + # for `git archive` to work against either. + fetch-depth: 0 - name: Set up Node.js uses: actions/setup-node@v6 @@ -96,29 +96,29 @@ jobs: run: npm ci - name: Verify PR checks up to date - if: always() run: .github/workflows/script/verify-pr-checks.sh - name: Run pr-checks tests - if: always() working-directory: pr-checks run: npx tsx --test - check-node-version: - if: github.triggering_actor != 'dependabot[bot]' - name: Check Action Node versions - runs-on: ubuntu-latest - timeout-minutes: 45 - env: - BASE_REF: ${{ github.base_ref }} - - permissions: - contents: read - - steps: - - uses: actions/checkout@v6 - - id: head-version - name: Verify all Actions use the same Node version + - name: Check repo size + if: >- + github.event_name == 'pull_request' && + github.event.pull_request.head.repo.full_name == github.repository && + github.event.pull_request.user.login != 'dependabot[bot]' + working-directory: pr-checks + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + BASE_REF: ${{ github.event.pull_request.base.ref }} + BASE_SHA: ${{ github.event.pull_request.base.sha }} + PR_NUMBER: ${{ github.event.pull_request.number }} + GITHUB_REPOSITORY: ${{ github.repository }} + RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} + run: npx tsx check-repo-size.ts + + - name: Verify all Actions use the same Node version + id: head-version run: | NODE_VERSION=$(find . -name "action.yml" -exec yq -e '.runs.using' {} \; | grep node | sort | uniq) echo "NODE_VERSION: ${NODE_VERSION}" @@ -128,12 +128,12 @@ jobs: fi echo "node_version=${NODE_VERSION}" >> $GITHUB_OUTPUT - - id: checkout-base - name: 'Backport: Check out base ref' + - name: 'Backport: Check out base ref' + id: checkout-base if: ${{ startsWith(github.head_ref, 'backport-') }} uses: actions/checkout@v6 with: - ref: ${{ env.BASE_REF }} + ref: ${{ github.base_ref }} - name: 'Backport: Verify Node versions unchanged' if: steps.checkout-base.outcome == 'success' From 5a80681bb6c514957ebf142061bb9243575ad592 Mon Sep 17 00:00:00 2001 From: Henry Mercer Date: Mon, 18 May 2026 17:39:12 +0100 Subject: [PATCH 4/7] Address review comments --- pr-checks/check-repo-size.test.ts | 36 ++++++++-------- pr-checks/check-repo-size.ts | 70 ++++++++++++++++++------------- 2 files changed, 59 insertions(+), 47 deletions(-) diff --git a/pr-checks/check-repo-size.test.ts b/pr-checks/check-repo-size.test.ts index dcdb821b70..d91316547b 100644 --- a/pr-checks/check-repo-size.test.ts +++ b/pr-checks/check-repo-size.test.ts @@ -6,9 +6,9 @@ import * as os from "node:os"; import * as path from "node:path"; import { afterEach, beforeEach, describe, it } from "node:test"; -import { getOctokit } from "@actions/github"; import * as sinon from "sinon"; +import { type ApiClient, getApiClient } from "./api-client"; import { COMMENT_MARKER, buildCommentBody, @@ -198,10 +198,10 @@ describe("upsertSizeComment", async () => { const repo = "test-repo"; const prNumber = 42; - let octokit: ReturnType; + let client: ApiClient; beforeEach(() => { - octokit = getOctokit("test-token"); + client = getApiClient("test-token"); }); afterEach(() => { @@ -209,20 +209,20 @@ describe("upsertSizeComment", async () => { }); function stubExistingComments(comments: Array<{ id: number; body: string }>) { - // upsertSizeComment calls `octokit.paginate(octokit.rest.issues.listComments, ...)`, - // so stubbing `paginate` directly mocks the listing without depending on how - // paginate walks Octokit's response (link headers etc.). - return sinon.stub(octokit, "paginate").resolves(comments); + // upsertSizeComment calls `client.paginate(...)`, so stubbing `paginate` + // directly mocks the listing without depending on how paginate walks + // Octokit's response (link headers etc.). + return sinon.stub(client, "paginate").resolves(comments); } await it("creates a new comment when none exists and the delta is significant", async () => { stubExistingComments([]); const createStub = sinon - .stub(octokit.rest.issues, "createComment") + .stub(client.rest.issues, "createComment") .resolves({ data: { id: 999 } } as never); const result = await upsertSizeComment({ - octokit, + client, owner, repo, prNumber, @@ -246,11 +246,11 @@ describe("upsertSizeComment", async () => { // negative deltas. stubExistingComments([]); const createStub = sinon - .stub(octokit.rest.issues, "createComment") + .stub(client.rest.issues, "createComment") .resolves({ data: { id: 999 } } as never); const result = await upsertSizeComment({ - octokit, + client, owner, repo, prNumber, @@ -265,11 +265,11 @@ describe("upsertSizeComment", async () => { await it("skips when no existing comment and delta is below threshold", async () => { stubExistingComments([]); - const createStub = sinon.stub(octokit.rest.issues, "createComment"); - const updateStub = sinon.stub(octokit.rest.issues, "updateComment"); + const createStub = sinon.stub(client.rest.issues, "createComment"); + const updateStub = sinon.stub(client.rest.issues, "updateComment"); const result = await upsertSizeComment({ - octokit, + client, owner, repo, prNumber, @@ -286,11 +286,11 @@ describe("upsertSizeComment", async () => { await it("updates the existing comment when the delta is significant", async () => { stubExistingComments([{ id: 7, body: `${COMMENT_MARKER}\nold body` }]); const updateStub = sinon - .stub(octokit.rest.issues, "updateComment") + .stub(client.rest.issues, "updateComment") .resolves({ data: { id: 7 } } as never); const result = await upsertSizeComment({ - octokit, + client, owner, repo, prNumber, @@ -311,11 +311,11 @@ describe("upsertSizeComment", async () => { // gets reduced below the threshold by a follow-up commit. stubExistingComments([{ id: 7, body: `${COMMENT_MARKER}\nold body` }]); const updateStub = sinon - .stub(octokit.rest.issues, "updateComment") + .stub(client.rest.issues, "updateComment") .resolves({ data: { id: 7 } } as never); const result = await upsertSizeComment({ - octokit, + client, owner, repo, prNumber, diff --git a/pr-checks/check-repo-size.ts b/pr-checks/check-repo-size.ts index 485dfdf94c..0b7d8cdcf2 100644 --- a/pr-checks/check-repo-size.ts +++ b/pr-checks/check-repo-size.ts @@ -14,7 +14,7 @@ import { spawn } from "node:child_process"; import * as path from "node:path"; import { parseArgs } from "node:util"; -import { getOctokit } from "@actions/github"; +import { type ApiClient, getApiClient } from "./api-client"; /** Hidden marker used to find the existing sticky comment on a PR. */ export const COMMENT_MARKER = ""; @@ -30,13 +30,11 @@ export const DEFAULT_REPOSITORY = "github/codeql-action"; */ export const SIGNIFICANT_DELTA_FRACTION = 0.1; -export type Octokit = ReturnType; - /** * Stream `git archive --format=tar.gz ` and count the compressed bytes. * - * `git archive` only includes tracked files, so we will ignore directories like `node_modules` and - * `build` that aren't downloaded when starting up a CodeQL job. + * `git archive` only includes tracked files, so untracked directories like `node_modules` and + * `build` aren't counted in the size downloaded when starting up a CodeQL job. */ export async function measureArchiveSize( ref: string, @@ -114,8 +112,8 @@ export function buildCommentBody(opts: CommentBodyOptions): string { `| **Delta** | **${formatBytes(delta, true)} (${signedDelta} bytes, ${formatPercent(delta / baseSize)})** |`, "", "Sizes are measured by streaming `git archive --format=tar.gz `, " + - "which includes all tracked files and excludes `node_modules` and " + - "other untracked or git-ignored files. The compressed checkout is " + + "which includes tracked files and excludes untracked files such as " + + "`node_modules`. The compressed checkout is " + "downloaded by every consumer of this Action, so changes here directly " + `affect Action download time.${runUrlLine}`, ].join("\n"); @@ -134,7 +132,7 @@ export function isDeltaSignificant( } export interface UpsertOptions { - octokit: Octokit; + client: ApiClient; owner: string; repo: string; prNumber: number; @@ -156,20 +154,23 @@ export type UpsertResult = export async function upsertSizeComment( opts: UpsertOptions, ): Promise { - const { octokit, owner, repo, prNumber, body, delta, baseSize } = opts; + const { client, owner, repo, prNumber, body, delta, baseSize } = opts; - const comments = await octokit.paginate(octokit.rest.issues.listComments, { - owner, - repo, - issue_number: prNumber, - per_page: 100, - }); + const comments = await client.paginate( + "GET /repos/{owner}/{repo}/issues/{issue_number}/comments", + { + owner, + repo, + issue_number: prNumber, + per_page: 100, + }, + ); const existing = comments.find((c) => (c.body ?? "").includes(COMMENT_MARKER), ); if (existing) { - await octokit.rest.issues.updateComment({ + await client.rest.issues.updateComment({ owner, repo, comment_id: existing.id, @@ -179,7 +180,7 @@ export async function upsertSizeComment( } if (isDeltaSignificant(delta, baseSize, SIGNIFICANT_DELTA_FRACTION)) { - const { data } = await octokit.rest.issues.createComment({ + const { data } = await client.rest.issues.createComment({ owner, repo, issue_number: prNumber, @@ -198,10 +199,12 @@ export async function upsertSizeComment( } interface MainArgs { - /** Base ref of the PR. Defaults to `main`, and is prefixed with `origin/` when passed to git. */ + /** Base ref of the PR. Defaults to `main`, and is used as the label in the PR comment. */ baseRef: string; - /** Numeric PR number used to find / create / update the sticky comment. */ - prNumber: number; + /** Base commit to archive. Defaults to `origin/main` for local dry runs. */ + baseCommitish: string; + /** Numeric PR number used to find / create / update the sticky comment. Required outside dry-run. */ + prNumber?: number; /** `owner/repo` slug, defaulting to `github/codeql-action`, split before being passed to Octokit. */ ownerRepo: string; /** Optional URL of the workflow run, surfaced in the comment footer. */ @@ -220,23 +223,29 @@ export function readArgs(): MainArgs { strict: true, }); + const dryRun = values["dry-run"] ?? false; const baseRef = process.env.BASE_REF ?? DEFAULT_BASE_REF; + const baseCommitish = process.env.BASE_SHA ?? `origin/${baseRef}`; const prNumberStr = process.env.PR_NUMBER; const repo = process.env.GITHUB_REPOSITORY ?? DEFAULT_REPOSITORY; - if (!prNumberStr) throw new Error("Missing PR_NUMBER env var"); - - const prNumber = Number.parseInt(prNumberStr, 10); - if (!Number.isFinite(prNumber)) { - throw new Error(`Invalid PR_NUMBER value: ${prNumberStr}`); + let prNumber: number | undefined; + if (prNumberStr) { + prNumber = Number.parseInt(prNumberStr, 10); + if (!Number.isFinite(prNumber)) { + throw new Error(`Invalid PR_NUMBER value: ${prNumberStr}`); + } + } else if (!dryRun) { + throw new Error("Missing PR_NUMBER env var"); } return { baseRef, + baseCommitish, prNumber, ownerRepo: repo, runUrl: process.env.RUN_URL, - dryRun: values["dry-run"] ?? false, + dryRun, token: process.env.GITHUB_TOKEN, }; } @@ -248,8 +257,8 @@ async function main(): Promise { // root is always the parent directory. const repoRoot = path.resolve(__dirname, ".."); - console.log(`Measuring base archive size for origin/${args.baseRef}...`); - const baseSize = await measureArchiveSize(`origin/${args.baseRef}`, repoRoot); + console.log(`Measuring base archive size for ${args.baseCommitish}...`); + const baseSize = await measureArchiveSize(args.baseCommitish, repoRoot); console.log(` ${baseSize} bytes`); console.log("Measuring PR archive size for HEAD..."); @@ -285,6 +294,9 @@ async function main(): Promise { "GITHUB_TOKEN env var is required when not running with --dry-run", ); } + if (args.prNumber === undefined) { + throw new Error("Missing PR_NUMBER env var"); + } const [owner, repo] = args.ownerRepo.split("/"); if (!owner || !repo) { @@ -292,7 +304,7 @@ async function main(): Promise { } const result = await upsertSizeComment({ - octokit: getOctokit(args.token), + client: getApiClient(args.token), owner, repo, prNumber: args.prNumber, From 9b6438e93682cb5c2fab835f4e49084118ab1106 Mon Sep 17 00:00:00 2001 From: Henry Mercer Date: Mon, 18 May 2026 18:19:22 +0100 Subject: [PATCH 5/7] Tweak workflow --- .github/workflows/pr-checks.yml | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/.github/workflows/pr-checks.yml b/.github/workflows/pr-checks.yml index 99940cddfd..55ad7d56af 100644 --- a/.github/workflows/pr-checks.yml +++ b/.github/workflows/pr-checks.yml @@ -29,6 +29,10 @@ jobs: runs-on: ${{ matrix.os }} timeout-minutes: 45 + concurrency: + cancel-in-progress: ${{ github.event_name == 'pull_request' || false }} + group: pr-checks-unit-tests-${{ github.ref }}-${{ github.event_name }}-${{ matrix.os }}-node${{ matrix['node-version'] }} + steps: - name: Prepare git (Windows) if: runner.os == 'Windows' @@ -70,7 +74,7 @@ jobs: # These checks do not need to be run as part of the same matrix that we use for the `unit-tests` # job. pr-checks: - name: PR checks + name: PR Checks if: github.triggering_actor != 'dependabot[bot]' permissions: contents: read @@ -78,6 +82,10 @@ jobs: runs-on: ubuntu-slim timeout-minutes: 10 + concurrency: + cancel-in-progress: ${{ github.event_name == 'pull_request' || false }} + group: pr-checks-pr-checks-${{ github.ref }}-${{ github.event_name }} + steps: - name: Checkout repository uses: actions/checkout@v6 @@ -103,6 +111,8 @@ jobs: run: npx tsx --test - name: Check repo size + # Forks and Dependabot PRs don't have permission to write comments, so skip the check in + # those cases. if: >- github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name == github.repository && From 15a712bbc2336f64eee2e053e438db3c90ee6e29 Mon Sep 17 00:00:00 2001 From: Henry Mercer Date: Mon, 18 May 2026 20:08:43 +0100 Subject: [PATCH 6/7] Address review comments --- .github/workflows/pr-checks.yml | 115 +++++++++++++---- package-lock.json | 2 - pr-checks/check-repo-size.test.ts | 208 ++++++++++-------------------- pr-checks/check-repo-size.ts | 206 +++++++---------------------- pr-checks/excluded.yml | 1 + pr-checks/package.json | 2 - 6 files changed, 205 insertions(+), 329 deletions(-) diff --git a/.github/workflows/pr-checks.yml b/.github/workflows/pr-checks.yml index 55ad7d56af..f2f781e439 100644 --- a/.github/workflows/pr-checks.yml +++ b/.github/workflows/pr-checks.yml @@ -78,8 +78,7 @@ jobs: if: github.triggering_actor != 'dependabot[bot]' permissions: contents: read - pull-requests: write - runs-on: ubuntu-slim + runs-on: ubuntu-latest timeout-minutes: 10 concurrency: @@ -89,10 +88,6 @@ jobs: steps: - name: Checkout repository uses: actions/checkout@v6 - with: - # Need full history so we have both the PR merge commit (HEAD) and the base SHA locally - # for `git archive` to work against either. - fetch-depth: 0 - name: Set up Node.js uses: actions/setup-node@v6 @@ -110,33 +105,54 @@ jobs: working-directory: pr-checks run: npx tsx --test + - name: Verify all Actions use the same Node version + id: head-version + run: | + NODE_VERSION=$(find . -path "*/node_modules" -prune -o -name "action.yml" -exec yq -o=json '.runs.using' {} \; | jq -rs '[.[] | select(. != null and startswith("node"))] | unique | .[]') + echo "NODE_VERSION: ${NODE_VERSION}" + if [[ $(echo "$NODE_VERSION" | wc -l) -gt 1 ]]; then + echo "::error::More than one node version used in 'action.yml' files." + exit 1 + fi + echo "node_version=${NODE_VERSION}" >> $GITHUB_OUTPUT + + - name: Fetch base commit + # Forks and Dependabot PRs don't have permission to write comments, so skip the repo size + # check in those cases. + if: >- + github.event_name == 'pull_request' && + github.event.pull_request.head.repo.full_name == github.repository && + github.event.pull_request.user.login != 'dependabot[bot]' + env: + BASE_SHA: ${{ github.event.pull_request.base.sha }} + run: git fetch --no-tags --depth=1 origin "$BASE_SHA" + - name: Check repo size - # Forks and Dependabot PRs don't have permission to write comments, so skip the check in - # those cases. + # Forks and Dependabot PRs don't have permission to write comments, so skip the repo size + # check in those cases. if: >- github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name == github.repository && github.event.pull_request.user.login != 'dependabot[bot]' working-directory: pr-checks env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} BASE_REF: ${{ github.event.pull_request.base.ref }} BASE_SHA: ${{ github.event.pull_request.base.sha }} - PR_NUMBER: ${{ github.event.pull_request.number }} - GITHUB_REPOSITORY: ${{ github.repository }} RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} - run: npx tsx check-repo-size.ts + run: npx tsx check-repo-size.ts --output-dir "$RUNNER_TEMP/repo-size" - - name: Verify all Actions use the same Node version - id: head-version - run: | - NODE_VERSION=$(find . -name "action.yml" -exec yq -e '.runs.using' {} \; | grep node | sort | uniq) - echo "NODE_VERSION: ${NODE_VERSION}" - if [[ $(echo "$NODE_VERSION" | wc -l) -gt 1 ]]; then - echo "::error::More than one node version used in 'action.yml' files." - exit 1 - fi - echo "node_version=${NODE_VERSION}" >> $GITHUB_OUTPUT + - name: Upload repo size comment + # Forks and Dependabot PRs don't have permission to write comments, so skip the repo size + # check in those cases. + if: >- + github.event_name == 'pull_request' && + github.event.pull_request.head.repo.full_name == github.repository && + github.event.pull_request.user.login != 'dependabot[bot]' + uses: actions/upload-artifact@v7 + with: + name: repo-size-comment + path: ${{ runner.temp }}/repo-size/ + if-no-files-found: error - name: 'Backport: Check out base ref' id: checkout-base @@ -150,10 +166,63 @@ jobs: env: HEAD_VERSION: ${{ steps.head-version.outputs.node_version }} run: | - BASE_VERSION=$(find . -name "action.yml" -exec yq -e '.runs.using' {} \; | grep node | sort | uniq) + BASE_VERSION=$(find . -path "*/node_modules" -prune -o -name "action.yml" -exec yq -o=json '.runs.using' {} \; | jq -rs '[.[] | select(. != null and startswith("node"))] | unique | .[]') echo "HEAD_VERSION: ${HEAD_VERSION}" echo "BASE_VERSION: ${BASE_VERSION}" if [[ "$BASE_VERSION" != "$HEAD_VERSION" ]]; then echo "::error::Cannot change the Node version of an Action in a backport PR." exit 1 fi + + post-repo-size-comment: + name: Post repo size comment + needs: pr-checks + # Keep write permissions isolated from the job that checks out and tests PR code. This job only + # posts the candidate comment body produced by the read-only `pr-checks` job. + if: >- + github.event_name == 'pull_request' && + github.event.pull_request.head.repo.full_name == github.repository && + github.event.pull_request.user.login != 'dependabot[bot]' && + needs.pr-checks.result == 'success' + permissions: + contents: read + pull-requests: write + runs-on: ubuntu-slim + timeout-minutes: 10 + + concurrency: + cancel-in-progress: true + group: check-repo-size-${{ github.event.pull_request.number }} + + steps: + - name: Download repo size comment + uses: actions/download-artifact@v8 + with: + name: repo-size-comment + path: repo-size-comment + + - name: Post repo size comment + env: + COMMENT_MARKER: "" + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GITHUB_REPOSITORY: ${{ github.repository }} + PR_NUMBER: ${{ github.event.pull_request.number }} + run: | + significant=$(jq -r '.significant' repo-size-comment/metadata.json) + body=$(cat repo-size-comment/body.md) + comment_id=$( + gh api "repos/$GITHUB_REPOSITORY/issues/$PR_NUMBER/comments" \ + --paginate \ + --jq ".[] | select(.body | contains(\"$COMMENT_MARKER\")) | .id" \ + | head -n 1 + ) + + if [[ -n "$comment_id" ]]; then + echo "Updating existing comment $comment_id." + gh api --method PATCH "repos/$GITHUB_REPOSITORY/issues/comments/$comment_id" --field body="$body" + elif [[ "$significant" == "true" ]]; then + echo "Creating new repo size comment." + gh api --method POST "repos/$GITHUB_REPOSITORY/issues/$PR_NUMBER/comments" --field body="$body" + else + echo "Skipping repo size comment because the delta is below the threshold and no sticky comment exists." + fi diff --git a/package-lock.json b/package-lock.json index c4c542a011..6608c05602 100644 --- a/package-lock.json +++ b/package-lock.json @@ -10408,8 +10408,6 @@ }, "devDependencies": { "@types/node": "^20.19.39", - "@types/sinon": "^21.0.1", - "sinon": "^22.0.0", "tsx": "^4.21.0" } } diff --git a/pr-checks/check-repo-size.test.ts b/pr-checks/check-repo-size.test.ts index d91316547b..1cd8d3c8d4 100644 --- a/pr-checks/check-repo-size.test.ts +++ b/pr-checks/check-repo-size.test.ts @@ -1,3 +1,9 @@ +#!/usr/bin/env npx tsx + +/* +Tests for check-repo-size.ts. +*/ + import * as assert from "node:assert/strict"; import { execFileSync } from "node:child_process"; import { randomBytes } from "node:crypto"; @@ -6,17 +12,15 @@ import * as os from "node:os"; import * as path from "node:path"; import { afterEach, beforeEach, describe, it } from "node:test"; -import * as sinon from "sinon"; - -import { type ApiClient, getApiClient } from "./api-client"; import { COMMENT_MARKER, + DEFAULT_BASE_REF, buildCommentBody, formatBytes, formatPercent, isDeltaSignificant, measureArchiveSize, - upsertSizeComment, + readArgs, } from "./check-repo-size"; describe("formatBytes", async () => { @@ -113,6 +117,66 @@ describe("buildCommentBody", async () => { }); }); +describe("readArgs", async () => { + await it("defaults the base ref for local runs", () => { + const originalEnv = process.env; + const originalArgv = process.argv; + + try { + process.env = {}; + process.argv = ["node", "check-repo-size.ts", "--output-dir", "/tmp/out"]; + + const args = readArgs(); + + assert.equal(args.baseRef, DEFAULT_BASE_REF); + assert.equal(args.baseCommitish, `origin/${DEFAULT_BASE_REF}`); + assert.equal(args.outputDir, "/tmp/out"); + assert.equal(args.runUrl, undefined); + } finally { + process.env = originalEnv; + process.argv = originalArgv; + } + }); + + await it("uses the base SHA when provided by the workflow", () => { + const originalEnv = process.env; + const originalArgv = process.argv; + + try { + process.env = { + BASE_REF: "main", + BASE_SHA: "abc123", + RUN_URL: "https://example.test/run", + }; + process.argv = ["node", "check-repo-size.ts", "--output-dir", "/tmp/out"]; + + const args = readArgs(); + + assert.equal(args.baseRef, "main"); + assert.equal(args.baseCommitish, "abc123"); + assert.equal(args.outputDir, "/tmp/out"); + assert.equal(args.runUrl, "https://example.test/run"); + } finally { + process.env = originalEnv; + process.argv = originalArgv; + } + }); + + await it("throws when --output-dir is missing", () => { + const originalEnv = process.env; + const originalArgv = process.argv; + + try { + process.env = {}; + process.argv = ["node", "check-repo-size.ts"]; + assert.throws(() => readArgs(), /--output-dir is required/); + } finally { + process.env = originalEnv; + process.argv = originalArgv; + } + }); +}); + let repoDir: string; beforeEach(() => { @@ -193,142 +257,6 @@ describe("measureArchiveSize", async () => { }); }); -describe("upsertSizeComment", async () => { - const owner = "test-owner"; - const repo = "test-repo"; - const prNumber = 42; - - let client: ApiClient; - - beforeEach(() => { - client = getApiClient("test-token"); - }); - - afterEach(() => { - sinon.restore(); - }); - - function stubExistingComments(comments: Array<{ id: number; body: string }>) { - // upsertSizeComment calls `client.paginate(...)`, so stubbing `paginate` - // directly mocks the listing without depending on how paginate walks - // Octokit's response (link headers etc.). - return sinon.stub(client, "paginate").resolves(comments); - } - - await it("creates a new comment when none exists and the delta is significant", async () => { - stubExistingComments([]); - const createStub = sinon - .stub(client.rest.issues, "createComment") - .resolves({ data: { id: 999 } } as never); - - const result = await upsertSizeComment({ - client, - owner, - repo, - prNumber, - body: `${COMMENT_MARKER}\nhello`, - delta: 200, - baseSize: 1000, - }); - - assert.deepEqual(result, { action: "created", commentId: 999 }); - sinon.assert.calledOnce(createStub); - const createArgs = createStub.firstCall.args[0]!; - assert.equal(createArgs.owner, owner); - assert.equal(createArgs.repo, repo); - assert.equal(createArgs.issue_number, prNumber); - assert.ok(createArgs.body.includes(COMMENT_MARKER)); - }); - - await it("creates a new comment for a significant size decrease", async () => { - // Shrinkage matters too: it might indicate accidentally deleted tracked - // files. The full pipeline (not just isDeltaSignificant) needs to post on - // negative deltas. - stubExistingComments([]); - const createStub = sinon - .stub(client.rest.issues, "createComment") - .resolves({ data: { id: 999 } } as never); - - const result = await upsertSizeComment({ - client, - owner, - repo, - prNumber, - body: `${COMMENT_MARKER}\nhello`, - delta: -200, - baseSize: 1000, - }); - - assert.deepEqual(result, { action: "created", commentId: 999 }); - sinon.assert.calledOnce(createStub); - }); - - await it("skips when no existing comment and delta is below threshold", async () => { - stubExistingComments([]); - const createStub = sinon.stub(client.rest.issues, "createComment"); - const updateStub = sinon.stub(client.rest.issues, "updateComment"); - - const result = await upsertSizeComment({ - client, - owner, - repo, - prNumber, - body: `${COMMENT_MARKER}\nhello`, - delta: 50, - baseSize: 1000, - }); - - assert.equal(result.action, "skipped"); - sinon.assert.notCalled(createStub); - sinon.assert.notCalled(updateStub); - }); - - await it("updates the existing comment when the delta is significant", async () => { - stubExistingComments([{ id: 7, body: `${COMMENT_MARKER}\nold body` }]); - const updateStub = sinon - .stub(client.rest.issues, "updateComment") - .resolves({ data: { id: 7 } } as never); - - const result = await upsertSizeComment({ - client, - owner, - repo, - prNumber, - body: `${COMMENT_MARKER}\nnew body`, - delta: 200, - baseSize: 1000, - }); - - assert.deepEqual(result, { action: "updated", commentId: 7 }); - sinon.assert.calledOnce(updateStub); - const updateArgs = updateStub.firstCall.args[0]!; - assert.equal(updateArgs.comment_id, 7); - assert.ok(updateArgs.body.includes("new body")); - }); - - await it("updates an existing comment even when the delta is below threshold", async () => { - // This keeps the comment in sync after a PR that initially had a big diff - // gets reduced below the threshold by a follow-up commit. - stubExistingComments([{ id: 7, body: `${COMMENT_MARKER}\nold body` }]); - const updateStub = sinon - .stub(client.rest.issues, "updateComment") - .resolves({ data: { id: 7 } } as never); - - const result = await upsertSizeComment({ - client, - owner, - repo, - prNumber, - body: `${COMMENT_MARKER}\nnew body`, - delta: 1, - baseSize: 1000, - }); - - assert.deepEqual(result, { action: "updated", commentId: 7 }); - sinon.assert.calledOnce(updateStub); - }); -}); - function escapeRegExp(s: string): string { return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); } diff --git a/pr-checks/check-repo-size.ts b/pr-checks/check-repo-size.ts index 0b7d8cdcf2..11e6abf1f6 100644 --- a/pr-checks/check-repo-size.ts +++ b/pr-checks/check-repo-size.ts @@ -1,32 +1,28 @@ #!/usr/bin/env npx tsx /* -Computes the difference in the `.tar.gz`'d checkout size of the repo between the PR head and the PR -base, and posts/updates a sticky comment on the PR when the change is significant in either -direction. This size is relevant because it corresponds to the duration of the "Download action +Measures the difference in the `.tar.gz`'d checkout size of the repo between the PR head and the PR +base. This size is relevant because it corresponds to the duration of the "Download action repository" step that happens at the start of every job that uses this Action. -Designed to be invoked from the `Check repo size` workflow on PR events, but also runnable locally -(with --dry-run) for testing. +Writes the candidate sticky-comment body and a small metadata file to `--output-dir`. A separate +workflow job consumes those artifacts and decides whether to create or update a PR comment. */ import { spawn } from "node:child_process"; +import * as fs from "node:fs"; import * as path from "node:path"; import { parseArgs } from "node:util"; -import { type ApiClient, getApiClient } from "./api-client"; - /** Hidden marker used to find the existing sticky comment on a PR. */ export const COMMENT_MARKER = ""; export const DEFAULT_BASE_REF = "main"; -export const DEFAULT_REPOSITORY = "github/codeql-action"; /** - * Fraction of the base archive size at which a delta is considered - * significant enough to warrant a new sticky comment. We always update an - * existing comment regardless, so the comment stays in sync as the diff - * evolves. + * Fraction of the base archive size at which a delta is considered significant enough to warrant + * a new sticky comment. We always update an existing comment regardless, so the comment stays in + * sync as the diff evolves. */ export const SIGNIFICANT_DELTA_FRACTION = 0.1; @@ -66,9 +62,8 @@ export async function measureArchiveSize( } /** - * Format a byte count into a human-readable string with binary units. If - * `signed` is true, a leading `+` is prepended for non-negative values so - * gains and losses are visually distinct. + * Format a byte count into a human-readable string with binary units. If `signed` is true, a + * leading `+` is prepended for non-negative values so gains and losses are visually distinct. */ export function formatBytes(bytes: number, signed = false): string { const sign = bytes < 0 ? "-" : signed ? "+" : ""; @@ -131,130 +126,46 @@ export function isDeltaSignificant( return Math.abs(delta) >= baseSize * fraction; } -export interface UpsertOptions { - client: ApiClient; - owner: string; - repo: string; - prNumber: number; - body: string; - delta: number; - baseSize: number; -} - -export type UpsertResult = - | { action: "updated"; commentId: number } - | { action: "created"; commentId: number } - | { action: "skipped"; reason: string }; - -/** - * Find an existing sticky comment on the PR by HTML marker. If one exists, - * always update it (so it stays in sync). Otherwise, only create a new - * comment when the delta is currently significant. - */ -export async function upsertSizeComment( - opts: UpsertOptions, -): Promise { - const { client, owner, repo, prNumber, body, delta, baseSize } = opts; - - const comments = await client.paginate( - "GET /repos/{owner}/{repo}/issues/{issue_number}/comments", - { - owner, - repo, - issue_number: prNumber, - per_page: 100, - }, - ); - const existing = comments.find((c) => - (c.body ?? "").includes(COMMENT_MARKER), - ); - - if (existing) { - await client.rest.issues.updateComment({ - owner, - repo, - comment_id: existing.id, - body, - }); - return { action: "updated", commentId: existing.id }; - } - - if (isDeltaSignificant(delta, baseSize, SIGNIFICANT_DELTA_FRACTION)) { - const { data } = await client.rest.issues.createComment({ - owner, - repo, - issue_number: prNumber, - body, - }); - return { action: "created", commentId: data.id }; - } - - return { - action: "skipped", - reason: - `delta ${delta} bytes is below ` + - `${(SIGNIFICANT_DELTA_FRACTION * 100).toFixed(2)}% of base size ` + - `${baseSize} bytes`, - }; -} - interface MainArgs { - /** Base ref of the PR. Defaults to `main`, and is used as the label in the PR comment. */ + /** Base ref of the PR. Defaults to `main`. Used as the label in the PR comment. */ baseRef: string; - /** Base commit to archive. Defaults to `origin/main` for local dry runs. */ + /** Base commit-ish to archive. Defaults to `origin/` for local runs. */ baseCommitish: string; - /** Numeric PR number used to find / create / update the sticky comment. Required outside dry-run. */ - prNumber?: number; - /** `owner/repo` slug, defaulting to `github/codeql-action`, split before being passed to Octokit. */ - ownerRepo: string; /** Optional URL of the workflow run, surfaced in the comment footer. */ runUrl?: string; - /** When true, log the would-be comment instead of calling GitHub. */ - dryRun: boolean; - /** GitHub token used to authenticate Octokit. Required unless `dryRun` is true. */ - token?: string; + /** Directory where `body.md` and `metadata.json` are written. */ + outputDir: string; } export function readArgs(): MainArgs { const { values } = parseArgs({ options: { - "dry-run": { type: "boolean", default: false }, + "output-dir": { type: "string" }, }, strict: true, }); - const dryRun = values["dry-run"] ?? false; + const outputDir = values["output-dir"]; + if (!outputDir) { + throw new Error("--output-dir is required"); + } + const baseRef = process.env.BASE_REF ?? DEFAULT_BASE_REF; const baseCommitish = process.env.BASE_SHA ?? `origin/${baseRef}`; - const prNumberStr = process.env.PR_NUMBER; - const repo = process.env.GITHUB_REPOSITORY ?? DEFAULT_REPOSITORY; - - let prNumber: number | undefined; - if (prNumberStr) { - prNumber = Number.parseInt(prNumberStr, 10); - if (!Number.isFinite(prNumber)) { - throw new Error(`Invalid PR_NUMBER value: ${prNumberStr}`); - } - } else if (!dryRun) { - throw new Error("Missing PR_NUMBER env var"); - } return { baseRef, baseCommitish, - prNumber, - ownerRepo: repo, runUrl: process.env.RUN_URL, - dryRun, - token: process.env.GITHUB_TOKEN, + outputDir, }; } async function main(): Promise { const args = readArgs(); - // The script lives at `/pr-checks/check-repo-size.ts`, so the repo - // root is always the parent directory. + // The script lives at `/pr-checks/check-repo-size.ts`, so the repo root is the parent + // directory. const repoRoot = path.resolve(__dirname, ".."); console.log(`Measuring base archive size for ${args.baseCommitish}...`); @@ -266,7 +177,16 @@ async function main(): Promise { console.log(` ${prSize} bytes`); const delta = prSize - baseSize; - console.log(`Delta: ${delta} bytes`); + const significant = isDeltaSignificant( + delta, + baseSize, + SIGNIFICANT_DELTA_FRACTION, + ); + console.log( + `Delta: ${delta} bytes (significant=${significant}, threshold=${( + SIGNIFICANT_DELTA_FRACTION * 100 + ).toFixed(2)}%)`, + ); const body = buildCommentBody({ baseRef: args.baseRef, @@ -275,55 +195,17 @@ async function main(): Promise { runUrl: args.runUrl, }); - if (args.dryRun) { - const significant = isDeltaSignificant( - delta, - baseSize, - SIGNIFICANT_DELTA_FRACTION, - ); - console.log( - `--dry-run: significant=${significant} (threshold ${( - SIGNIFICANT_DELTA_FRACTION * 100 - ).toFixed(2)}%); would post:\n${body}`, - ); - return 0; - } - - if (!args.token) { - throw new Error( - "GITHUB_TOKEN env var is required when not running with --dry-run", - ); - } - if (args.prNumber === undefined) { - throw new Error("Missing PR_NUMBER env var"); - } - - const [owner, repo] = args.ownerRepo.split("/"); - if (!owner || !repo) { - throw new Error(`Invalid GITHUB_REPOSITORY value: ${args.ownerRepo}`); - } - - const result = await upsertSizeComment({ - client: getApiClient(args.token), - owner, - repo, - prNumber: args.prNumber, - body, - delta, - baseSize, - }); - - switch (result.action) { - case "updated": - console.log(`Updated existing comment ${result.commentId}.`); - break; - case "created": - console.log(`Created new comment ${result.commentId}.`); - break; - case "skipped": - console.log(`Skipped commenting: ${result.reason}.`); - break; - } + fs.mkdirSync(args.outputDir, { recursive: true }); + fs.writeFileSync(path.join(args.outputDir, "body.md"), body); + fs.writeFileSync( + path.join(args.outputDir, "metadata.json"), + `${JSON.stringify( + { significant, baseRef: args.baseRef, baseSize, prSize, delta }, + null, + 2, + )}\n`, + ); + console.log(`Wrote body.md and metadata.json to ${args.outputDir}.`); return 0; } diff --git a/pr-checks/excluded.yml b/pr-checks/excluded.yml index 104c1009eb..e2814240ac 100644 --- a/pr-checks/excluded.yml +++ b/pr-checks/excluded.yml @@ -12,5 +12,6 @@ is: - "Agent" - "Cleanup artifacts" - "Prepare" + - "Post repo size comment" - "Upload results" - "Label PR with size" diff --git a/pr-checks/package.json b/pr-checks/package.json index 0032b5fc53..2741560f68 100644 --- a/pr-checks/package.json +++ b/pr-checks/package.json @@ -11,8 +11,6 @@ }, "devDependencies": { "@types/node": "^20.19.39", - "@types/sinon": "^21.0.1", - "sinon": "^22.0.0", "tsx": "^4.21.0" } } From 2c8faa5e9fa0af7d4ba37ccd3edbdf5158bc05d7 Mon Sep 17 00:00:00 2001 From: Henry Mercer Date: Mon, 18 May 2026 20:28:53 +0100 Subject: [PATCH 7/7] Pass comment body file directly --- .github/workflows/pr-checks.yml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/pr-checks.yml b/.github/workflows/pr-checks.yml index f2f781e439..6dee8fc306 100644 --- a/.github/workflows/pr-checks.yml +++ b/.github/workflows/pr-checks.yml @@ -209,7 +209,6 @@ jobs: PR_NUMBER: ${{ github.event.pull_request.number }} run: | significant=$(jq -r '.significant' repo-size-comment/metadata.json) - body=$(cat repo-size-comment/body.md) comment_id=$( gh api "repos/$GITHUB_REPOSITORY/issues/$PR_NUMBER/comments" \ --paginate \ @@ -219,10 +218,10 @@ jobs: if [[ -n "$comment_id" ]]; then echo "Updating existing comment $comment_id." - gh api --method PATCH "repos/$GITHUB_REPOSITORY/issues/comments/$comment_id" --field body="$body" + gh api --method PATCH "repos/$GITHUB_REPOSITORY/issues/comments/$comment_id" --field body=@repo-size-comment/body.md elif [[ "$significant" == "true" ]]; then echo "Creating new repo size comment." - gh api --method POST "repos/$GITHUB_REPOSITORY/issues/$PR_NUMBER/comments" --field body="$body" + gh api --method POST "repos/$GITHUB_REPOSITORY/issues/$PR_NUMBER/comments" --field body=@repo-size-comment/body.md else echo "Skipping repo size comment because the delta is below the threshold and no sticky comment exists." fi