Skip to content

Commit 355cb87

Browse files
dsymeCopilot
andauthored
fix(bundle): fetch prerequisite commits by SHA instead of broad deepen (#39466)
* fix(bundle): fetch prerequisite commits by SHA instead of broad deepen The safe_outputs bundle-application path could hang while "iteratively deepening origin/<base>" when updating a PR branch with a long, complex history. The deepen gate asked whether each bundle prerequisite was an ancestor of origin/<base>; for push-to-PR-branch updates the prerequisite lives on the PR branch and is never an ancestor of base, so deepening base could never satisfy it and escalated toward a full --unshallow that timed out. Rework ensureFullHistoryForBundle: - Primary path now fetches the exact prerequisite commit SHAs (declared by git bundle verify) directly from origin via `git fetch origin <sha>...`, which GitHub honors and which brings precisely the needed objects regardless of which branch they live on. - Gate changed from base-branch ancestry (merge-base --is-ancestor) to object presence (git cat-file -e <sha>^{commit}), which is what bundle apply actually requires. - Fallback deepen now steps by a small fixed increment (5 commits) up to a bounded iteration cap, re-checking presence each step, so a single fetch cannot pull a huge slice of monorepo history. --unshallow remains a last resort only. Update tests to cover the direct-SHA primary path and the deepen-by-5 fallback. * Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --------- Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>
1 parent 7261603 commit 355cb87

4 files changed

Lines changed: 201 additions & 66 deletions

File tree

actions/setup/js/create_pull_request.test.cjs

Lines changed: 44 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -219,15 +219,14 @@ describe("create_pull_request - bundle transport shallow checkout", () => {
219219
return Promise.resolve({ exitCode: 0, stdout: "true\n", stderr: "" });
220220
}
221221
if (cmd === "git" && args[0] === "bundle" && args[1] === "verify") {
222-
// Declare a fake prerequisite so ensureFullHistoryForBundle proceeds to deepen.
222+
// Declare a fake prerequisite so ensureFullHistoryForBundle proceeds.
223223
return Promise.resolve({ exitCode: 1, stdout: "", stderr: `The bundle requires this ref:\n${"a".repeat(40)}\n` });
224224
}
225-
if (cmd === "git" && args[0] === "merge-base" && args[1] === "--is-ancestor") {
226-
// Report prereq missing initially → iterative deepen kicks in; after the
227-
// first deepen fetch we still report missing so the fallback --unshallow
228-
// path is exercised. The default mock for exec() resolves successfully,
229-
// so all 7 deepen steps complete instantly before the fallback fires.
230-
return Promise.resolve({ exitCode: 1, stdout: "", stderr: "" });
225+
if (cmd === "git" && args[0] === "cat-file" && args[1] === "-e") {
226+
// Report the prerequisite object as already present by default so
227+
// ensureFullHistoryForBundle returns early (no fetch). Tests that need
228+
// to exercise the fetch/deepen path override this within the test.
229+
return Promise.resolve({ exitCode: 0, stdout: "", stderr: "" });
231230
}
232231
if (cmd === "git" && args[0] === "rev-list") {
233232
return Promise.resolve({ exitCode: 0, stdout: "1\n", stderr: "" });
@@ -267,7 +266,7 @@ describe("create_pull_request - bundle transport shallow checkout", () => {
267266
vi.clearAllMocks();
268267
});
269268

270-
it("should deepen origin/<base> before fetching bundle in shallow repositories", async () => {
269+
it("should fetch bundle prerequisite commits directly from origin in shallow repositories", async () => {
271270
const patchPath = canonicalPatchPath("feature/test");
272271
fs.writeFileSync(
273272
patchPath,
@@ -290,6 +289,35 @@ index 0000000..abc1234
290289
const bundlePath = canonicalBundlePath("feature/test");
291290
fs.writeFileSync(bundlePath, "bundle content");
292291

292+
// Force the prerequisite-missing path so the direct SHA fetch runs.
293+
const prereq = "a".repeat(40);
294+
let prereqFetched = false;
295+
global.exec.getExecOutput = vi.fn().mockImplementation((cmd, args) => {
296+
if (cmd === "git" && args[0] === "rev-parse" && args[1] === "--is-shallow-repository") {
297+
return Promise.resolve({ exitCode: 0, stdout: "true\n", stderr: "" });
298+
}
299+
if (cmd === "git" && args[0] === "bundle" && args[1] === "verify") {
300+
return Promise.resolve({ exitCode: 1, stdout: "", stderr: `The bundle requires this ref:\n${prereq}\n` });
301+
}
302+
if (cmd === "git" && args[0] === "config") {
303+
return Promise.resolve({ exitCode: 1, stdout: "", stderr: "" });
304+
}
305+
if (cmd === "git" && args[0] === "cat-file" && args[1] === "-e") {
306+
// Missing until the direct SHA fetch brings it in.
307+
return Promise.resolve({ exitCode: prereqFetched ? 0 : 1, stdout: "", stderr: "" });
308+
}
309+
if (cmd === "git" && args[0] === "rev-list") {
310+
return Promise.resolve({ exitCode: 0, stdout: "1\n", stderr: "" });
311+
}
312+
return Promise.resolve({ exitCode: 0, stdout: "", stderr: "" });
313+
});
314+
global.exec.exec = vi.fn().mockImplementation((cmd, args) => {
315+
if (cmd === "git" && Array.isArray(args) && args[0] === "fetch" && args.includes("origin") && args.includes(prereq)) {
316+
prereqFetched = true;
317+
}
318+
return Promise.resolve(0);
319+
});
320+
293321
const { main } = require("./create_pull_request.cjs");
294322
const handler = await main({ base_branch: "main", preserve_branch_name: true });
295323
const result = await handler({ title: "Test PR", body: "Test body", branch: "feature/test" }, {});
@@ -305,11 +333,14 @@ index 0000000..abc1234
305333
const bundleTempRef = bundleFetchCall[1][2].split(":")[1];
306334
expect(global.exec.exec).toHaveBeenCalledWith("git", ["update-ref", "refs/heads/feature/test", bundleTempRef]);
307335
expect(global.exec.exec).toHaveBeenCalledWith("git", ["reset", "--hard"]);
308-
const bundleFetchCallIndex = global.exec.getExecOutput.mock.calls.findIndex(([, args]) => Array.isArray(args) && args[0] === "fetch" && args[1] === bundlePath);
309-
// Iterative deepen replaces a single --unshallow: assert the first --deepen step ran.
310-
const deepenCallIndex = global.exec.exec.mock.calls.findIndex(([, args]) => Array.isArray(args) && args[0] === "fetch" && typeof args[1] === "string" && args[1].startsWith("--deepen="));
311-
expect(deepenCallIndex).toBeGreaterThanOrEqual(0);
312-
expect(bundleFetchCallIndex).toBeGreaterThanOrEqual(0);
336+
// Primary path: the exact prerequisite SHA is fetched directly from origin,
337+
// with no broad iterative deepen and no --unshallow.
338+
const directFetch = global.exec.exec.mock.calls.find(([, args]) => Array.isArray(args) && args[0] === "fetch" && args.includes("origin") && args.includes(prereq));
339+
expect(directFetch).toBeTruthy();
340+
const deepenCall = global.exec.exec.mock.calls.find(([, args]) => Array.isArray(args) && args[0] === "fetch" && typeof args[1] === "string" && args[1].startsWith("--deepen="));
341+
expect(deepenCall).toBeUndefined();
342+
const unshallowCall = global.exec.exec.mock.calls.find(([, args]) => Array.isArray(args) && args[0] === "fetch" && args.includes("--unshallow"));
343+
expect(unshallowCall).toBeUndefined();
313344
});
314345

315346
it("should pass signed_commits false to bundle pushes", async () => {

actions/setup/js/git_helpers.cjs

Lines changed: 84 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -233,11 +233,22 @@ function hasMergeCommitsInRange(baseRef, headRef, options = {}) {
233233
}
234234

235235
/**
236-
* Deepen sequence (per call to `git fetch --deepen=N`). Each value adds N
237-
* commits to the existing shallow history. Total reachable depth after the
238-
* final step is the sum of these values (~7850 commits).
236+
* Fallback deepen step size (commits added per `git fetch --deepen=N` call).
237+
*
238+
* The primary path fetches the exact prerequisite commit SHAs directly from
239+
* origin (see `ensureFullHistoryForBundle`), so this iterative deepen only runs
240+
* when fetch-by-SHA is unavailable or insufficient. We deepen in small
241+
* increments so a single fetch never tries to pull a huge slice of history,
242+
* which can time out on large monorepos with long, complex branch histories.
243+
*/
244+
const BUNDLE_DEEPEN_STEP = 5;
245+
246+
/**
247+
* Maximum number of fallback deepen iterations before giving up and attempting
248+
* `--unshallow`. With a step of 5 this caps the fallback at ~1000 commits of
249+
* deepening (200 * 5) before the last-resort unshallow.
239250
*/
240-
const BUNDLE_DEEPEN_STEPS = [50, 100, 200, 500, 1000, 2000, 4000];
251+
const BUNDLE_DEEPEN_MAX_ITERATIONS = 200;
241252

242253
/**
243254
* Extract prerequisite commit SHAs declared in a git bundle file.
@@ -291,18 +302,25 @@ async function getBundlePrerequisites(execApi, bundleFilePath, options = {}) {
291302
}
292303

293304
/**
294-
* Check which of the given SHAs are NOT yet ancestors of `targetRef`.
305+
* Check which of the given commit SHAs are NOT present in the local object
306+
* store. Uses `git cat-file -e <sha>^{commit}`, which exits non-zero when the
307+
* object is missing.
308+
*
309+
* This is the correct gate for bundle application: `git fetch <bundle>` only
310+
* needs the prerequisite *objects* to exist locally — it does not require them
311+
* to be reachable from any particular branch. (A prerequisite commit can live
312+
* on the pull request branch and never be an ancestor of the base branch, so an
313+
* ancestry-based check would loop forever trying to deepen the base.)
295314
*
296315
* @param {{ getExecOutput: Function }} execApi
297316
* @param {string[]} shas
298-
* @param {string} targetRef
299317
* @param {Object} [options]
300-
* @returns {Promise<string[]>} SHAs still missing (not ancestors / not present).
318+
* @returns {Promise<string[]>} SHAs whose commit object is not present locally.
301319
*/
302-
async function findMissingAncestors(execApi, shas, targetRef, options = {}) {
320+
async function findMissingObjects(execApi, shas, options = {}) {
303321
const missing = [];
304322
for (const sha of shas) {
305-
const { exitCode } = await execApi.getExecOutput("git", ["merge-base", "--is-ancestor", sha, targetRef], { ...options, ignoreReturnCode: true, silent: true });
323+
const { exitCode } = await execApi.getExecOutput("git", ["cat-file", "-e", `${sha}^{commit}`], { ...options, ignoreReturnCode: true, silent: true });
306324
if (exitCode !== 0) {
307325
missing.push(sha);
308326
}
@@ -311,21 +329,31 @@ async function findMissingAncestors(execApi, shas, targetRef, options = {}) {
311329
}
312330

313331
/**
314-
* Probe shallow-repository status before fetching a git bundle, and deepen
315-
* the local clone as needed so the bundle's prerequisite commits become
316-
* reachable from `origin/<baseRef>`.
332+
* Ensure a shallow checkout contains the prerequisite commits a git bundle
333+
* needs before `git fetch <bundle>` is attempted.
334+
*
335+
* Bundles generated from a commit range declare prerequisite commits. A shallow
336+
* checkout (e.g. `fetch-depth: 20`) may not contain them, and `git fetch
337+
* <bundle>` rejects the bundle before the caller can update refs.
317338
*
318-
* Bundles generated from a commit range can declare prerequisite commits. A
319-
* shallow checkout (e.g. `fetch-depth: 20`) may not contain those prerequisites,
320-
* and `git fetch <bundle>` will reject the bundle before the caller can update
321-
* refs. On a high-churn monorepo, `git fetch --unshallow` is catastrophic — it
322-
* downloads the entire history. Instead we iterate `git fetch origin <baseRef>
323-
* --deepen=<N>` with progressively larger N until every declared prerequisite
324-
* satisfies `git merge-base --is-ancestor <prereq> origin/<baseRef>`.
339+
* Strategy (best → worst):
340+
* 1. **Direct SHA fetch (primary).** The bundle declares *exactly* which
341+
* commits it requires (`git bundle verify`). We fetch those SHAs directly
342+
* from origin (`git fetch origin <sha>...`). GitHub honors fetch-by-SHA, so
343+
* this brings precisely the needed objects and is deterministic — it works
344+
* even when a prerequisite lives on the PR branch and is not an ancestor of
345+
* the base branch. This avoids walking back the base history entirely.
346+
* 2. **Iterative deepen (fallback).** Only when fetch-by-SHA is unavailable or
347+
* insufficient, deepen `origin/<baseRef>` in small `BUNDLE_DEEPEN_STEP`
348+
* increments (re-checking object presence each step) up to
349+
* `BUNDLE_DEEPEN_MAX_ITERATIONS`. Small steps keep any single fetch cheap so
350+
* it cannot time out by pulling a huge slice of a large monorepo's history.
351+
* 3. **`--unshallow` (last resort).** On a high-churn monorepo this downloads
352+
* the entire history, so it is only attempted after the bounded deepen.
325353
*
326354
* When `deepenOptions.baseRef` or `deepenOptions.bundleFilePath` is missing
327-
* (legacy callers), the function falls back to the previous behavior of a
328-
* single `git fetch --unshallow origin`.
355+
* (legacy callers), the function falls back to a single
356+
* `git fetch --unshallow origin`.
329357
*
330358
* @param {{ getExecOutput: Function, exec: Function }} execApi - Exec API to run git commands.
331359
* @param {Object} [options] - Options passed through to exec calls.
@@ -351,7 +379,7 @@ async function ensureFullHistoryForBundle(execApi, options = {}, deepenOptions =
351379

352380
// Legacy path: no base ref / bundle info known — fall back to a single
353381
// unshallow. Callers in monorepos should always supply baseRef + bundleFilePath
354-
// to get incremental deepening instead.
382+
// to get targeted prerequisite fetching instead.
355383
if (!baseRef || !bundleFilePath) {
356384
core.info("Repository is shallow; fetching full history before bundle processing (no baseRef/bundle info; using --unshallow)");
357385
await execApi.exec("git", ["fetch", "--unshallow", "origin"], options);
@@ -364,31 +392,52 @@ async function ensureFullHistoryForBundle(execApi, options = {}, deepenOptions =
364392
return;
365393
}
366394

367-
const targetRef = `origin/${baseRef}`;
368-
const alreadyMissing = await findMissingAncestors(execApi, prereqs, targetRef, options);
369-
if (alreadyMissing.length === 0) {
370-
core.info(`Bundle prerequisites already reachable from ${targetRef}; no deepen required`);
395+
let missing = await findMissingObjects(execApi, prereqs, options);
396+
if (missing.length === 0) {
397+
core.info("Bundle prerequisite commits already present locally; no fetch required");
371398
return;
372399
}
373400

374-
core.info(`Repository is shallow; iteratively deepening ${targetRef} to satisfy ${alreadyMissing.length} bundle prerequisite commit(s)`);
375-
let missing = alreadyMissing;
376-
for (const depth of BUNDLE_DEEPEN_STEPS) {
377-
core.info(`Fetching origin ${baseRef} with --deepen=${depth} (${missing.length} prerequisite(s) still missing)`);
401+
// PRIMARY: fetch the exact prerequisite commit SHAs directly from origin.
402+
// The bundle tells us precisely which commits it needs, so a targeted fetch by
403+
// SHA brings exactly those objects without deepening the base branch history.
404+
core.info(`Repository is shallow; fetching ${missing.length} bundle prerequisite commit(s) directly from origin by SHA`);
405+
const useBlobFilter = await isShallowOrSparseCheckout(execApi, options);
406+
const directFetchArgs = useBlobFilter ? ["fetch", "--filter=blob:none", "origin", ...missing] : ["fetch", "origin", ...missing];
407+
if (useBlobFilter) {
408+
core.info("Using --filter=blob:none for prerequisite SHA fetch (shallow or sparse checkout detected)");
409+
}
410+
try {
411+
await execApi.exec("git", directFetchArgs, options);
412+
missing = await findMissingObjects(execApi, prereqs, options);
413+
if (missing.length === 0) {
414+
core.info("Bundle prerequisite commits fetched directly from origin; no deepen required");
415+
return;
416+
}
417+
core.warning(`${missing.length} prerequisite commit(s) still missing after direct SHA fetch; falling back to iterative deepen`);
418+
} catch (directFetchError) {
419+
core.warning(`Direct prerequisite SHA fetch failed: ${getErrorMessage(directFetchError)}; falling back to iterative deepen`);
420+
}
421+
422+
// FALLBACK: deepen origin/<base> in small increments, re-checking object
423+
// presence after each step, until the prerequisites are present or the
424+
// iteration cap is reached.
425+
core.info(`Iteratively deepening origin/${baseRef} by ${BUNDLE_DEEPEN_STEP} commit(s) at a time to satisfy ${missing.length} prerequisite commit(s)`);
426+
for (let iteration = 1; iteration <= BUNDLE_DEEPEN_MAX_ITERATIONS; iteration++) {
378427
try {
379-
await execApi.exec("git", ["fetch", `--deepen=${depth}`, "origin", baseRef], options);
428+
await execApi.exec("git", ["fetch", `--deepen=${BUNDLE_DEEPEN_STEP}`, "origin", baseRef], options);
380429
} catch (fetchError) {
381-
core.warning(`git fetch --deepen=${depth} origin ${baseRef} failed: ${getErrorMessage(fetchError)}; aborting iterative deepen`);
430+
core.warning(`git fetch --deepen=${BUNDLE_DEEPEN_STEP} origin ${baseRef} failed: ${getErrorMessage(fetchError)}; aborting iterative deepen`);
382431
break;
383432
}
384-
missing = await findMissingAncestors(execApi, prereqs, targetRef, options);
433+
missing = await findMissingObjects(execApi, prereqs, options);
385434
if (missing.length === 0) {
386-
core.info(`Bundle prerequisites reachable after --deepen=${depth}`);
435+
core.info(`Bundle prerequisite commits present after deepening ${iteration * BUNDLE_DEEPEN_STEP} commit(s)`);
387436
return;
388437
}
389438
}
390439

391-
core.warning(`Bundle prerequisites still not reachable after iterative deepen (${missing.length} remaining); attempting --unshallow as a last resort`);
440+
core.warning(`Bundle prerequisites still not present after iterative deepen (${missing.length} remaining); attempting --unshallow as a last resort`);
392441
try {
393442
await execApi.exec("git", ["fetch", "--unshallow", "origin", baseRef], options);
394443
} catch (unshallowError) {

0 commit comments

Comments
 (0)