fix: avoid embedding large adversarial review diffs (#179)

* fix: avoid embedding large adversarial review diffs

* fix: preserve untracked content in lightweight review

* address comments

* fix: handle ENOBUFS type check in git diff sizing
This commit is contained in:
Dominik Kundel 2026-04-07 22:04:40 -07:00 committed by GitHub
parent d216a5fdea
commit bc8fa661a5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 264 additions and 30 deletions

View File

@ -32,6 +32,7 @@ Actively try to disprove the change.
Look for violated invariants, missing guards, unhandled failure paths, and assumptions that stop being true under stress. Look for violated invariants, missing guards, unhandled failure paths, and assumptions that stop being true under stress.
Trace how bad inputs, retries, concurrent actions, or partially completed operations move through the code. Trace how bad inputs, retries, concurrent actions, or partially completed operations move through the code.
If the user supplied a focus area, weight it heavily, but still report any other material issue you can defend. If the user supplied a focus area, weight it heavily, but still report any other material issue you can defend.
{{REVIEW_COLLECTION_GUIDANCE}}
</review_method> </review_method>
<finding_bar> <finding_bar>

View File

@ -241,6 +241,7 @@ function buildAdversarialReviewPrompt(context, focusText) {
REVIEW_KIND: "Adversarial Review", REVIEW_KIND: "Adversarial Review",
TARGET_LABEL: context.target.label, TARGET_LABEL: context.target.label,
USER_FOCUS: focusText || "No extra focus provided.", USER_FOCUS: focusText || "No extra focus provided.",
REVIEW_COLLECTION_GUIDANCE: context.collectionGuidance,
REVIEW_INPUT: context.content REVIEW_INPUT: context.content
}); });
} }

View File

@ -2,9 +2,11 @@ import fs from "node:fs";
import path from "node:path"; import path from "node:path";
import { isProbablyText } from "./fs.mjs"; import { isProbablyText } from "./fs.mjs";
import { runCommand, runCommandChecked } from "./process.mjs"; import { formatCommandFailure, runCommand, runCommandChecked } from "./process.mjs";
const MAX_UNTRACKED_BYTES = 24 * 1024; const MAX_UNTRACKED_BYTES = 24 * 1024;
const DEFAULT_INLINE_DIFF_MAX_FILES = 2;
const DEFAULT_INLINE_DIFF_MAX_BYTES = 256 * 1024;
function git(cwd, args, options = {}) { function git(cwd, args, options = {}) {
return runCommand("git", args, { cwd, ...options }); return runCommand("git", args, { cwd, ...options });
@ -14,6 +16,64 @@ function gitChecked(cwd, args, options = {}) {
return runCommandChecked("git", args, { cwd, ...options }); return runCommandChecked("git", args, { cwd, ...options });
} }
function listUniqueFiles(...groups) {
return [...new Set(groups.flat().filter(Boolean))].sort();
}
function normalizeMaxInlineFiles(value) {
const parsed = Number(value);
if (!Number.isFinite(parsed) || parsed < 0) {
return DEFAULT_INLINE_DIFF_MAX_FILES;
}
return Math.floor(parsed);
}
function normalizeMaxInlineDiffBytes(value) {
const parsed = Number(value);
if (!Number.isFinite(parsed) || parsed < 0) {
return DEFAULT_INLINE_DIFF_MAX_BYTES;
}
return Math.floor(parsed);
}
function measureGitOutputBytes(cwd, args, maxBytes) {
const result = git(cwd, args, { maxBuffer: maxBytes + 1 });
if (result.error && /** @type {NodeJS.ErrnoException} */ (result.error).code === "ENOBUFS") {
return maxBytes + 1;
}
if (result.error) {
throw result.error;
}
if (result.status !== 0) {
throw new Error(formatCommandFailure(result));
}
return Buffer.byteLength(result.stdout, "utf8");
}
function measureCombinedGitOutputBytes(cwd, argSets, maxBytes) {
let totalBytes = 0;
for (const args of argSets) {
const remainingBytes = maxBytes - totalBytes;
if (remainingBytes < 0) {
return maxBytes + 1;
}
totalBytes += measureGitOutputBytes(cwd, args, remainingBytes);
if (totalBytes > maxBytes) {
return totalBytes;
}
}
return totalBytes;
}
function buildBranchComparison(cwd, baseRef) {
const mergeBase = gitChecked(cwd, ["merge-base", "HEAD", baseRef]).stdout.trim();
return {
mergeBase,
commitRange: `${mergeBase}..HEAD`,
reviewRange: `${baseRef}...HEAD`
};
}
export function ensureGitRepository(cwd) { export function ensureGitRepository(cwd) {
const result = git(cwd, ["rev-parse", "--show-toplevel"]); const result = git(cwd, ["rev-parse", "--show-toplevel"]);
const errorCode = result.error && "code" in result.error ? result.error.code : null; const errorCode = result.error && "code" in result.error ? result.error.code : null;
@ -161,55 +221,115 @@ function formatUntrackedFile(cwd, relativePath) {
return [`### ${relativePath}`, "```", buffer.toString("utf8").trimEnd(), "```"].join("\n"); return [`### ${relativePath}`, "```", buffer.toString("utf8").trimEnd(), "```"].join("\n");
} }
function collectWorkingTreeContext(cwd, state) { function collectWorkingTreeContext(cwd, state, options = {}) {
const status = gitChecked(cwd, ["status", "--short"]).stdout.trim(); const includeDiff = options.includeDiff !== false;
const stagedDiff = gitChecked(cwd, ["diff", "--cached", "--binary", "--no-ext-diff", "--submodule=diff"]).stdout; const status = gitChecked(cwd, ["status", "--short", "--untracked-files=all"]).stdout.trim();
const unstagedDiff = gitChecked(cwd, ["diff", "--binary", "--no-ext-diff", "--submodule=diff"]).stdout; const changedFiles = listUniqueFiles(state.staged, state.unstaged, state.untracked);
const untrackedBody = state.untracked.map((file) => formatUntrackedFile(cwd, file)).join("\n\n");
const parts = [ let parts;
formatSection("Git Status", status), if (includeDiff) {
formatSection("Staged Diff", stagedDiff), const stagedDiff = gitChecked(cwd, ["diff", "--cached", "--binary", "--no-ext-diff", "--submodule=diff"]).stdout;
formatSection("Unstaged Diff", unstagedDiff), const unstagedDiff = gitChecked(cwd, ["diff", "--binary", "--no-ext-diff", "--submodule=diff"]).stdout;
formatSection("Untracked Files", untrackedBody) const untrackedBody = state.untracked.map((file) => formatUntrackedFile(cwd, file)).join("\n\n");
]; parts = [
formatSection("Git Status", status),
formatSection("Staged Diff", stagedDiff),
formatSection("Unstaged Diff", unstagedDiff),
formatSection("Untracked Files", untrackedBody)
];
} else {
const stagedStat = gitChecked(cwd, ["diff", "--shortstat", "--cached"]).stdout.trim();
const unstagedStat = gitChecked(cwd, ["diff", "--shortstat"]).stdout.trim();
const untrackedBody = state.untracked.map((file) => formatUntrackedFile(cwd, file)).join("\n\n");
parts = [
formatSection("Git Status", status),
formatSection("Staged Diff Stat", stagedStat),
formatSection("Unstaged Diff Stat", unstagedStat),
formatSection("Changed Files", changedFiles.join("\n")),
formatSection("Untracked Files", untrackedBody)
];
}
return { return {
mode: "working-tree", mode: "working-tree",
summary: `Reviewing ${state.staged.length} staged, ${state.unstaged.length} unstaged, and ${state.untracked.length} untracked file(s).`, summary: `Reviewing ${state.staged.length} staged, ${state.unstaged.length} unstaged, and ${state.untracked.length} untracked file(s).`,
content: parts.join("\n") content: parts.join("\n"),
changedFiles
}; };
} }
function collectBranchContext(cwd, baseRef) { function collectBranchContext(cwd, baseRef, options = {}) {
const mergeBase = gitChecked(cwd, ["merge-base", "HEAD", baseRef]).stdout.trim(); const includeDiff = options.includeDiff !== false;
const commitRange = `${mergeBase}..HEAD`; const comparison = options.comparison ?? buildBranchComparison(cwd, baseRef);
const currentBranch = getCurrentBranch(cwd); const currentBranch = getCurrentBranch(cwd);
const logOutput = gitChecked(cwd, ["log", "--oneline", "--decorate", commitRange]).stdout.trim(); const changedFiles = gitChecked(cwd, ["diff", "--name-only", comparison.commitRange]).stdout.trim().split("\n").filter(Boolean);
const diffStat = gitChecked(cwd, ["diff", "--stat", commitRange]).stdout.trim(); const logOutput = gitChecked(cwd, ["log", "--oneline", "--decorate", comparison.commitRange]).stdout.trim();
const diff = gitChecked(cwd, ["diff", "--binary", "--no-ext-diff", "--submodule=diff", commitRange]).stdout; const diffStat = gitChecked(cwd, ["diff", "--stat", comparison.commitRange]).stdout.trim();
return { return {
mode: "branch", mode: "branch",
summary: `Reviewing branch ${currentBranch} against ${baseRef} from merge-base ${mergeBase}.`, summary: `Reviewing branch ${currentBranch} against ${baseRef} from merge-base ${comparison.mergeBase}.`,
content: [ content: includeDiff
formatSection("Commit Log", logOutput), ? [
formatSection("Diff Stat", diffStat), formatSection("Commit Log", logOutput),
formatSection("Branch Diff", diff) formatSection("Diff Stat", diffStat),
].join("\n") formatSection(
"Branch Diff",
gitChecked(cwd, ["diff", "--binary", "--no-ext-diff", "--submodule=diff", comparison.commitRange]).stdout
)
].join("\n")
: [
formatSection("Commit Log", logOutput),
formatSection("Diff Stat", diffStat),
formatSection("Changed Files", changedFiles.join("\n"))
].join("\n"),
changedFiles,
comparison
}; };
} }
export function collectReviewContext(cwd, target) { function buildAdversarialCollectionGuidance(options = {}) {
if (options.includeDiff !== false) {
return "Use the repository context below as primary evidence.";
}
return "The repository context below is a lightweight summary. Inspect the target diff yourself with read-only git commands before finalizing findings.";
}
export function collectReviewContext(cwd, target, options = {}) {
const repoRoot = getRepoRoot(cwd); const repoRoot = getRepoRoot(cwd);
const state = getWorkingTreeState(cwd); const currentBranch = getCurrentBranch(repoRoot);
const currentBranch = getCurrentBranch(cwd); const maxInlineFiles = normalizeMaxInlineFiles(options.maxInlineFiles);
const maxInlineDiffBytes = normalizeMaxInlineDiffBytes(options.maxInlineDiffBytes);
let details; let details;
let includeDiff;
let diffBytes;
if (target.mode === "working-tree") { if (target.mode === "working-tree") {
details = collectWorkingTreeContext(repoRoot, state); const state = getWorkingTreeState(repoRoot);
diffBytes = measureCombinedGitOutputBytes(
repoRoot,
[
["diff", "--cached", "--binary", "--no-ext-diff", "--submodule=diff"],
["diff", "--binary", "--no-ext-diff", "--submodule=diff"]
],
maxInlineDiffBytes
);
includeDiff =
options.includeDiff ??
(listUniqueFiles(state.staged, state.unstaged, state.untracked).length <= maxInlineFiles &&
diffBytes <= maxInlineDiffBytes);
details = collectWorkingTreeContext(repoRoot, state, { includeDiff });
} else { } else {
details = collectBranchContext(repoRoot, target.baseRef); const comparison = buildBranchComparison(repoRoot, target.baseRef);
const fileCount = gitChecked(repoRoot, ["diff", "--name-only", comparison.commitRange]).stdout.trim().split("\n").filter(Boolean).length;
diffBytes = measureGitOutputBytes(
repoRoot,
["diff", "--binary", "--no-ext-diff", "--submodule=diff", comparison.commitRange],
maxInlineDiffBytes
);
includeDiff = options.includeDiff ?? (fileCount <= maxInlineFiles && diffBytes <= maxInlineDiffBytes);
details = collectBranchContext(repoRoot, target.baseRef, { includeDiff, comparison });
} }
return { return {
@ -217,6 +337,10 @@ export function collectReviewContext(cwd, target) {
repoRoot, repoRoot,
branch: currentBranch, branch: currentBranch,
target, target,
fileCount: details.changedFiles.length,
diffBytes,
inputMode: includeDiff ? "inline-diff" : "self-collect",
collectionGuidance: buildAdversarialCollectionGuidance({ includeDiff }),
...details ...details
}; };
} }

View File

@ -7,6 +7,7 @@ export function runCommand(command, args = [], options = {}) {
env: options.env, env: options.env,
encoding: "utf8", encoding: "utf8",
input: options.input, input: options.input,
maxBuffer: options.maxBuffer,
stdio: options.stdio ?? "pipe", stdio: options.stdio ?? "pipe",
shell: process.platform === "win32" ? (process.env.SHELL || true) : false, shell: process.platform === "win32" ? (process.env.SHELL || true) : false,
windowsHide: true windowsHide: true

View File

@ -69,6 +69,23 @@ test("resolveReviewTarget requires an explicit base when no default branch can b
); );
}); });
test("collectReviewContext keeps inline diffs for tiny adversarial reviews", () => {
const cwd = makeTempDir();
initGitRepo(cwd);
fs.writeFileSync(path.join(cwd, "app.js"), "console.log('v1');\n");
run("git", ["add", "app.js"], { cwd });
run("git", ["commit", "-m", "init"], { cwd });
fs.writeFileSync(path.join(cwd, "app.js"), "console.log('INLINE_MARKER');\n");
const target = resolveReviewTarget(cwd, {});
const context = collectReviewContext(cwd, target);
assert.equal(context.inputMode, "inline-diff");
assert.equal(context.fileCount, 1);
assert.match(context.collectionGuidance, /primary evidence/i);
assert.match(context.content, /INLINE_MARKER/);
});
test("collectReviewContext skips untracked directories in working tree review", () => { test("collectReviewContext skips untracked directories in working tree review", () => {
const cwd = makeTempDir(); const cwd = makeTempDir();
initGitRepo(cwd); initGitRepo(cwd);
@ -101,3 +118,66 @@ test("collectReviewContext skips broken untracked symlinks instead of crashing",
assert.match(context.content, /### broken-link/); assert.match(context.content, /### broken-link/);
assert.match(context.content, /skipped: broken symlink or unreadable file/i); assert.match(context.content, /skipped: broken symlink or unreadable file/i);
}); });
test("collectReviewContext falls back to lightweight context for larger adversarial reviews", () => {
const cwd = makeTempDir();
initGitRepo(cwd);
for (const name of ["a.js", "b.js", "c.js"]) {
fs.writeFileSync(path.join(cwd, name), `export const value = "${name}-v1";\n`);
}
run("git", ["add", "a.js", "b.js", "c.js"], { cwd });
run("git", ["commit", "-m", "init"], { cwd });
fs.writeFileSync(path.join(cwd, "a.js"), 'export const value = "SELF_COLLECT_MARKER_A";\n');
fs.writeFileSync(path.join(cwd, "b.js"), 'export const value = "SELF_COLLECT_MARKER_B";\n');
fs.writeFileSync(path.join(cwd, "c.js"), 'export const value = "SELF_COLLECT_MARKER_C";\n');
const target = resolveReviewTarget(cwd, {});
const context = collectReviewContext(cwd, target);
assert.equal(context.inputMode, "self-collect");
assert.equal(context.fileCount, 3);
assert.match(context.collectionGuidance, /lightweight summary/i);
assert.match(context.collectionGuidance, /read-only git commands/i);
assert.doesNotMatch(context.content, /SELF_COLLECT_MARKER_[ABC]/);
assert.match(context.content, /## Changed Files/);
});
test("collectReviewContext falls back to lightweight context for oversized single-file diffs", () => {
const cwd = makeTempDir();
initGitRepo(cwd);
fs.writeFileSync(path.join(cwd, "app.js"), "export const value = 'v1';\n");
run("git", ["add", "app.js"], { cwd });
run("git", ["commit", "-m", "init"], { cwd });
fs.writeFileSync(path.join(cwd, "app.js"), `export const value = '${"x".repeat(512)}';\n`);
const target = resolveReviewTarget(cwd, {});
const context = collectReviewContext(cwd, target, { maxInlineDiffBytes: 128 });
assert.equal(context.fileCount, 1);
assert.equal(context.inputMode, "self-collect");
assert.ok(context.diffBytes > 128);
assert.doesNotMatch(context.content, /xxx/);
assert.match(context.content, /## Changed Files/);
});
test("collectReviewContext keeps untracked file content in lightweight working tree context", () => {
const cwd = makeTempDir();
initGitRepo(cwd);
for (const name of ["a.js", "b.js"]) {
fs.writeFileSync(path.join(cwd, name), `export const value = "${name}-v1";\n`);
}
run("git", ["add", "a.js", "b.js"], { cwd });
run("git", ["commit", "-m", "init"], { cwd });
fs.writeFileSync(path.join(cwd, "a.js"), 'export const value = "TRACKED_MARKER_A";\n');
fs.writeFileSync(path.join(cwd, "b.js"), 'export const value = "TRACKED_MARKER_B";\n');
fs.writeFileSync(path.join(cwd, "new-risk.js"), 'export const value = "UNTRACKED_RISK_MARKER";\n');
const target = resolveReviewTarget(cwd, {});
const context = collectReviewContext(cwd, target);
assert.equal(context.inputMode, "self-collect");
assert.equal(context.fileCount, 3);
assert.doesNotMatch(context.content, /TRACKED_MARKER_[AB]/);
assert.match(context.content, /## Untracked Files/);
assert.match(context.content, /UNTRACKED_RISK_MARKER/);
});

View File

@ -273,6 +273,33 @@ test("adversarial review accepts the same base-branch targeting as review", () =
assert.match(result.stdout, /Missing empty-state guard/); assert.match(result.stdout, /Missing empty-state guard/);
}); });
test("adversarial review asks Codex to inspect larger diffs itself", () => {
const repo = makeTempDir();
const binDir = makeTempDir();
installFakeCodex(binDir);
initGitRepo(repo);
fs.mkdirSync(path.join(repo, "src"));
for (const name of ["a.js", "b.js", "c.js"]) {
fs.writeFileSync(path.join(repo, "src", name), `export const value = "${name}-v1";\n`);
}
run("git", ["add", "src/a.js", "src/b.js", "src/c.js"], { cwd: repo });
run("git", ["commit", "-m", "init"], { cwd: repo });
fs.writeFileSync(path.join(repo, "src", "a.js"), 'export const value = "PROMPT_SELF_COLLECT_A";\n');
fs.writeFileSync(path.join(repo, "src", "b.js"), 'export const value = "PROMPT_SELF_COLLECT_B";\n');
fs.writeFileSync(path.join(repo, "src", "c.js"), 'export const value = "PROMPT_SELF_COLLECT_C";\n');
const result = run("node", [SCRIPT, "adversarial-review"], {
cwd: repo,
env: buildEnv(binDir)
});
assert.equal(result.status, 0, result.stderr);
const state = JSON.parse(fs.readFileSync(path.join(binDir, "fake-codex-state.json"), "utf8"));
assert.match(state.lastTurnStart.prompt, /lightweight summary/i);
assert.match(state.lastTurnStart.prompt, /read-only git commands/i);
assert.doesNotMatch(state.lastTurnStart.prompt, /PROMPT_SELF_COLLECT_[ABC]/);
});
test("review includes reasoning output when the app server returns it", () => { test("review includes reasoning output when the app server returns it", () => {
const repo = makeTempDir(); const repo = makeTempDir();
const binDir = makeTempDir(); const binDir = makeTempDir();