fix: avoid embedding large adversarial review diffs (#179)
* fix: avoid embedding large adversarial review diffs * fix: preserve untracked content in lightweight review * address comments * fix: handle ENOBUFS type check in git diff sizing
This commit is contained in:
parent
d216a5fdea
commit
bc8fa661a5
@ -32,6 +32,7 @@ Actively try to disprove the change.
|
||||
Look for violated invariants, missing guards, unhandled failure paths, and assumptions that stop being true under stress.
|
||||
Trace how bad inputs, retries, concurrent actions, or partially completed operations move through the code.
|
||||
If the user supplied a focus area, weight it heavily, but still report any other material issue you can defend.
|
||||
{{REVIEW_COLLECTION_GUIDANCE}}
|
||||
</review_method>
|
||||
|
||||
<finding_bar>
|
||||
|
||||
@ -241,6 +241,7 @@ function buildAdversarialReviewPrompt(context, focusText) {
|
||||
REVIEW_KIND: "Adversarial Review",
|
||||
TARGET_LABEL: context.target.label,
|
||||
USER_FOCUS: focusText || "No extra focus provided.",
|
||||
REVIEW_COLLECTION_GUIDANCE: context.collectionGuidance,
|
||||
REVIEW_INPUT: context.content
|
||||
});
|
||||
}
|
||||
|
||||
@ -2,9 +2,11 @@ import fs from "node:fs";
|
||||
import path from "node:path";
|
||||
|
||||
import { isProbablyText } from "./fs.mjs";
|
||||
import { runCommand, runCommandChecked } from "./process.mjs";
|
||||
import { formatCommandFailure, runCommand, runCommandChecked } from "./process.mjs";
|
||||
|
||||
const MAX_UNTRACKED_BYTES = 24 * 1024;
|
||||
const DEFAULT_INLINE_DIFF_MAX_FILES = 2;
|
||||
const DEFAULT_INLINE_DIFF_MAX_BYTES = 256 * 1024;
|
||||
|
||||
function git(cwd, args, options = {}) {
|
||||
return runCommand("git", args, { cwd, ...options });
|
||||
@ -14,6 +16,64 @@ function gitChecked(cwd, args, options = {}) {
|
||||
return runCommandChecked("git", args, { cwd, ...options });
|
||||
}
|
||||
|
||||
function listUniqueFiles(...groups) {
|
||||
return [...new Set(groups.flat().filter(Boolean))].sort();
|
||||
}
|
||||
|
||||
function normalizeMaxInlineFiles(value) {
|
||||
const parsed = Number(value);
|
||||
if (!Number.isFinite(parsed) || parsed < 0) {
|
||||
return DEFAULT_INLINE_DIFF_MAX_FILES;
|
||||
}
|
||||
return Math.floor(parsed);
|
||||
}
|
||||
|
||||
function normalizeMaxInlineDiffBytes(value) {
|
||||
const parsed = Number(value);
|
||||
if (!Number.isFinite(parsed) || parsed < 0) {
|
||||
return DEFAULT_INLINE_DIFF_MAX_BYTES;
|
||||
}
|
||||
return Math.floor(parsed);
|
||||
}
|
||||
|
||||
function measureGitOutputBytes(cwd, args, maxBytes) {
|
||||
const result = git(cwd, args, { maxBuffer: maxBytes + 1 });
|
||||
if (result.error && /** @type {NodeJS.ErrnoException} */ (result.error).code === "ENOBUFS") {
|
||||
return maxBytes + 1;
|
||||
}
|
||||
if (result.error) {
|
||||
throw result.error;
|
||||
}
|
||||
if (result.status !== 0) {
|
||||
throw new Error(formatCommandFailure(result));
|
||||
}
|
||||
return Buffer.byteLength(result.stdout, "utf8");
|
||||
}
|
||||
|
||||
function measureCombinedGitOutputBytes(cwd, argSets, maxBytes) {
|
||||
let totalBytes = 0;
|
||||
for (const args of argSets) {
|
||||
const remainingBytes = maxBytes - totalBytes;
|
||||
if (remainingBytes < 0) {
|
||||
return maxBytes + 1;
|
||||
}
|
||||
totalBytes += measureGitOutputBytes(cwd, args, remainingBytes);
|
||||
if (totalBytes > maxBytes) {
|
||||
return totalBytes;
|
||||
}
|
||||
}
|
||||
return totalBytes;
|
||||
}
|
||||
|
||||
function buildBranchComparison(cwd, baseRef) {
|
||||
const mergeBase = gitChecked(cwd, ["merge-base", "HEAD", baseRef]).stdout.trim();
|
||||
return {
|
||||
mergeBase,
|
||||
commitRange: `${mergeBase}..HEAD`,
|
||||
reviewRange: `${baseRef}...HEAD`
|
||||
};
|
||||
}
|
||||
|
||||
export function ensureGitRepository(cwd) {
|
||||
const result = git(cwd, ["rev-parse", "--show-toplevel"]);
|
||||
const errorCode = result.error && "code" in result.error ? result.error.code : null;
|
||||
@ -161,55 +221,115 @@ function formatUntrackedFile(cwd, relativePath) {
|
||||
return [`### ${relativePath}`, "```", buffer.toString("utf8").trimEnd(), "```"].join("\n");
|
||||
}
|
||||
|
||||
function collectWorkingTreeContext(cwd, state) {
|
||||
const status = gitChecked(cwd, ["status", "--short"]).stdout.trim();
|
||||
const stagedDiff = gitChecked(cwd, ["diff", "--cached", "--binary", "--no-ext-diff", "--submodule=diff"]).stdout;
|
||||
const unstagedDiff = gitChecked(cwd, ["diff", "--binary", "--no-ext-diff", "--submodule=diff"]).stdout;
|
||||
const untrackedBody = state.untracked.map((file) => formatUntrackedFile(cwd, file)).join("\n\n");
|
||||
function collectWorkingTreeContext(cwd, state, options = {}) {
|
||||
const includeDiff = options.includeDiff !== false;
|
||||
const status = gitChecked(cwd, ["status", "--short", "--untracked-files=all"]).stdout.trim();
|
||||
const changedFiles = listUniqueFiles(state.staged, state.unstaged, state.untracked);
|
||||
|
||||
const parts = [
|
||||
formatSection("Git Status", status),
|
||||
formatSection("Staged Diff", stagedDiff),
|
||||
formatSection("Unstaged Diff", unstagedDiff),
|
||||
formatSection("Untracked Files", untrackedBody)
|
||||
];
|
||||
let parts;
|
||||
if (includeDiff) {
|
||||
const stagedDiff = gitChecked(cwd, ["diff", "--cached", "--binary", "--no-ext-diff", "--submodule=diff"]).stdout;
|
||||
const unstagedDiff = gitChecked(cwd, ["diff", "--binary", "--no-ext-diff", "--submodule=diff"]).stdout;
|
||||
const untrackedBody = state.untracked.map((file) => formatUntrackedFile(cwd, file)).join("\n\n");
|
||||
parts = [
|
||||
formatSection("Git Status", status),
|
||||
formatSection("Staged Diff", stagedDiff),
|
||||
formatSection("Unstaged Diff", unstagedDiff),
|
||||
formatSection("Untracked Files", untrackedBody)
|
||||
];
|
||||
} else {
|
||||
const stagedStat = gitChecked(cwd, ["diff", "--shortstat", "--cached"]).stdout.trim();
|
||||
const unstagedStat = gitChecked(cwd, ["diff", "--shortstat"]).stdout.trim();
|
||||
const untrackedBody = state.untracked.map((file) => formatUntrackedFile(cwd, file)).join("\n\n");
|
||||
parts = [
|
||||
formatSection("Git Status", status),
|
||||
formatSection("Staged Diff Stat", stagedStat),
|
||||
formatSection("Unstaged Diff Stat", unstagedStat),
|
||||
formatSection("Changed Files", changedFiles.join("\n")),
|
||||
formatSection("Untracked Files", untrackedBody)
|
||||
];
|
||||
}
|
||||
|
||||
return {
|
||||
mode: "working-tree",
|
||||
summary: `Reviewing ${state.staged.length} staged, ${state.unstaged.length} unstaged, and ${state.untracked.length} untracked file(s).`,
|
||||
content: parts.join("\n")
|
||||
content: parts.join("\n"),
|
||||
changedFiles
|
||||
};
|
||||
}
|
||||
|
||||
function collectBranchContext(cwd, baseRef) {
|
||||
const mergeBase = gitChecked(cwd, ["merge-base", "HEAD", baseRef]).stdout.trim();
|
||||
const commitRange = `${mergeBase}..HEAD`;
|
||||
function collectBranchContext(cwd, baseRef, options = {}) {
|
||||
const includeDiff = options.includeDiff !== false;
|
||||
const comparison = options.comparison ?? buildBranchComparison(cwd, baseRef);
|
||||
const currentBranch = getCurrentBranch(cwd);
|
||||
const logOutput = gitChecked(cwd, ["log", "--oneline", "--decorate", commitRange]).stdout.trim();
|
||||
const diffStat = gitChecked(cwd, ["diff", "--stat", commitRange]).stdout.trim();
|
||||
const diff = gitChecked(cwd, ["diff", "--binary", "--no-ext-diff", "--submodule=diff", commitRange]).stdout;
|
||||
const changedFiles = gitChecked(cwd, ["diff", "--name-only", comparison.commitRange]).stdout.trim().split("\n").filter(Boolean);
|
||||
const logOutput = gitChecked(cwd, ["log", "--oneline", "--decorate", comparison.commitRange]).stdout.trim();
|
||||
const diffStat = gitChecked(cwd, ["diff", "--stat", comparison.commitRange]).stdout.trim();
|
||||
|
||||
return {
|
||||
mode: "branch",
|
||||
summary: `Reviewing branch ${currentBranch} against ${baseRef} from merge-base ${mergeBase}.`,
|
||||
content: [
|
||||
formatSection("Commit Log", logOutput),
|
||||
formatSection("Diff Stat", diffStat),
|
||||
formatSection("Branch Diff", diff)
|
||||
].join("\n")
|
||||
summary: `Reviewing branch ${currentBranch} against ${baseRef} from merge-base ${comparison.mergeBase}.`,
|
||||
content: includeDiff
|
||||
? [
|
||||
formatSection("Commit Log", logOutput),
|
||||
formatSection("Diff Stat", diffStat),
|
||||
formatSection(
|
||||
"Branch Diff",
|
||||
gitChecked(cwd, ["diff", "--binary", "--no-ext-diff", "--submodule=diff", comparison.commitRange]).stdout
|
||||
)
|
||||
].join("\n")
|
||||
: [
|
||||
formatSection("Commit Log", logOutput),
|
||||
formatSection("Diff Stat", diffStat),
|
||||
formatSection("Changed Files", changedFiles.join("\n"))
|
||||
].join("\n"),
|
||||
changedFiles,
|
||||
comparison
|
||||
};
|
||||
}
|
||||
|
||||
export function collectReviewContext(cwd, target) {
|
||||
function buildAdversarialCollectionGuidance(options = {}) {
|
||||
if (options.includeDiff !== false) {
|
||||
return "Use the repository context below as primary evidence.";
|
||||
}
|
||||
|
||||
return "The repository context below is a lightweight summary. Inspect the target diff yourself with read-only git commands before finalizing findings.";
|
||||
}
|
||||
|
||||
export function collectReviewContext(cwd, target, options = {}) {
|
||||
const repoRoot = getRepoRoot(cwd);
|
||||
const state = getWorkingTreeState(cwd);
|
||||
const currentBranch = getCurrentBranch(cwd);
|
||||
const currentBranch = getCurrentBranch(repoRoot);
|
||||
const maxInlineFiles = normalizeMaxInlineFiles(options.maxInlineFiles);
|
||||
const maxInlineDiffBytes = normalizeMaxInlineDiffBytes(options.maxInlineDiffBytes);
|
||||
let details;
|
||||
let includeDiff;
|
||||
let diffBytes;
|
||||
|
||||
if (target.mode === "working-tree") {
|
||||
details = collectWorkingTreeContext(repoRoot, state);
|
||||
const state = getWorkingTreeState(repoRoot);
|
||||
diffBytes = measureCombinedGitOutputBytes(
|
||||
repoRoot,
|
||||
[
|
||||
["diff", "--cached", "--binary", "--no-ext-diff", "--submodule=diff"],
|
||||
["diff", "--binary", "--no-ext-diff", "--submodule=diff"]
|
||||
],
|
||||
maxInlineDiffBytes
|
||||
);
|
||||
includeDiff =
|
||||
options.includeDiff ??
|
||||
(listUniqueFiles(state.staged, state.unstaged, state.untracked).length <= maxInlineFiles &&
|
||||
diffBytes <= maxInlineDiffBytes);
|
||||
details = collectWorkingTreeContext(repoRoot, state, { includeDiff });
|
||||
} else {
|
||||
details = collectBranchContext(repoRoot, target.baseRef);
|
||||
const comparison = buildBranchComparison(repoRoot, target.baseRef);
|
||||
const fileCount = gitChecked(repoRoot, ["diff", "--name-only", comparison.commitRange]).stdout.trim().split("\n").filter(Boolean).length;
|
||||
diffBytes = measureGitOutputBytes(
|
||||
repoRoot,
|
||||
["diff", "--binary", "--no-ext-diff", "--submodule=diff", comparison.commitRange],
|
||||
maxInlineDiffBytes
|
||||
);
|
||||
includeDiff = options.includeDiff ?? (fileCount <= maxInlineFiles && diffBytes <= maxInlineDiffBytes);
|
||||
details = collectBranchContext(repoRoot, target.baseRef, { includeDiff, comparison });
|
||||
}
|
||||
|
||||
return {
|
||||
@ -217,6 +337,10 @@ export function collectReviewContext(cwd, target) {
|
||||
repoRoot,
|
||||
branch: currentBranch,
|
||||
target,
|
||||
fileCount: details.changedFiles.length,
|
||||
diffBytes,
|
||||
inputMode: includeDiff ? "inline-diff" : "self-collect",
|
||||
collectionGuidance: buildAdversarialCollectionGuidance({ includeDiff }),
|
||||
...details
|
||||
};
|
||||
}
|
||||
|
||||
@ -7,6 +7,7 @@ export function runCommand(command, args = [], options = {}) {
|
||||
env: options.env,
|
||||
encoding: "utf8",
|
||||
input: options.input,
|
||||
maxBuffer: options.maxBuffer,
|
||||
stdio: options.stdio ?? "pipe",
|
||||
shell: process.platform === "win32" ? (process.env.SHELL || true) : false,
|
||||
windowsHide: true
|
||||
|
||||
@ -69,6 +69,23 @@ test("resolveReviewTarget requires an explicit base when no default branch can b
|
||||
);
|
||||
});
|
||||
|
||||
test("collectReviewContext keeps inline diffs for tiny adversarial reviews", () => {
|
||||
const cwd = makeTempDir();
|
||||
initGitRepo(cwd);
|
||||
fs.writeFileSync(path.join(cwd, "app.js"), "console.log('v1');\n");
|
||||
run("git", ["add", "app.js"], { cwd });
|
||||
run("git", ["commit", "-m", "init"], { cwd });
|
||||
fs.writeFileSync(path.join(cwd, "app.js"), "console.log('INLINE_MARKER');\n");
|
||||
|
||||
const target = resolveReviewTarget(cwd, {});
|
||||
const context = collectReviewContext(cwd, target);
|
||||
|
||||
assert.equal(context.inputMode, "inline-diff");
|
||||
assert.equal(context.fileCount, 1);
|
||||
assert.match(context.collectionGuidance, /primary evidence/i);
|
||||
assert.match(context.content, /INLINE_MARKER/);
|
||||
});
|
||||
|
||||
test("collectReviewContext skips untracked directories in working tree review", () => {
|
||||
const cwd = makeTempDir();
|
||||
initGitRepo(cwd);
|
||||
@ -101,3 +118,66 @@ test("collectReviewContext skips broken untracked symlinks instead of crashing",
|
||||
assert.match(context.content, /### broken-link/);
|
||||
assert.match(context.content, /skipped: broken symlink or unreadable file/i);
|
||||
});
|
||||
|
||||
test("collectReviewContext falls back to lightweight context for larger adversarial reviews", () => {
|
||||
const cwd = makeTempDir();
|
||||
initGitRepo(cwd);
|
||||
for (const name of ["a.js", "b.js", "c.js"]) {
|
||||
fs.writeFileSync(path.join(cwd, name), `export const value = "${name}-v1";\n`);
|
||||
}
|
||||
run("git", ["add", "a.js", "b.js", "c.js"], { cwd });
|
||||
run("git", ["commit", "-m", "init"], { cwd });
|
||||
fs.writeFileSync(path.join(cwd, "a.js"), 'export const value = "SELF_COLLECT_MARKER_A";\n');
|
||||
fs.writeFileSync(path.join(cwd, "b.js"), 'export const value = "SELF_COLLECT_MARKER_B";\n');
|
||||
fs.writeFileSync(path.join(cwd, "c.js"), 'export const value = "SELF_COLLECT_MARKER_C";\n');
|
||||
|
||||
const target = resolveReviewTarget(cwd, {});
|
||||
const context = collectReviewContext(cwd, target);
|
||||
|
||||
assert.equal(context.inputMode, "self-collect");
|
||||
assert.equal(context.fileCount, 3);
|
||||
assert.match(context.collectionGuidance, /lightweight summary/i);
|
||||
assert.match(context.collectionGuidance, /read-only git commands/i);
|
||||
assert.doesNotMatch(context.content, /SELF_COLLECT_MARKER_[ABC]/);
|
||||
assert.match(context.content, /## Changed Files/);
|
||||
});
|
||||
|
||||
test("collectReviewContext falls back to lightweight context for oversized single-file diffs", () => {
|
||||
const cwd = makeTempDir();
|
||||
initGitRepo(cwd);
|
||||
fs.writeFileSync(path.join(cwd, "app.js"), "export const value = 'v1';\n");
|
||||
run("git", ["add", "app.js"], { cwd });
|
||||
run("git", ["commit", "-m", "init"], { cwd });
|
||||
fs.writeFileSync(path.join(cwd, "app.js"), `export const value = '${"x".repeat(512)}';\n`);
|
||||
|
||||
const target = resolveReviewTarget(cwd, {});
|
||||
const context = collectReviewContext(cwd, target, { maxInlineDiffBytes: 128 });
|
||||
|
||||
assert.equal(context.fileCount, 1);
|
||||
assert.equal(context.inputMode, "self-collect");
|
||||
assert.ok(context.diffBytes > 128);
|
||||
assert.doesNotMatch(context.content, /xxx/);
|
||||
assert.match(context.content, /## Changed Files/);
|
||||
});
|
||||
|
||||
test("collectReviewContext keeps untracked file content in lightweight working tree context", () => {
|
||||
const cwd = makeTempDir();
|
||||
initGitRepo(cwd);
|
||||
for (const name of ["a.js", "b.js"]) {
|
||||
fs.writeFileSync(path.join(cwd, name), `export const value = "${name}-v1";\n`);
|
||||
}
|
||||
run("git", ["add", "a.js", "b.js"], { cwd });
|
||||
run("git", ["commit", "-m", "init"], { cwd });
|
||||
fs.writeFileSync(path.join(cwd, "a.js"), 'export const value = "TRACKED_MARKER_A";\n');
|
||||
fs.writeFileSync(path.join(cwd, "b.js"), 'export const value = "TRACKED_MARKER_B";\n');
|
||||
fs.writeFileSync(path.join(cwd, "new-risk.js"), 'export const value = "UNTRACKED_RISK_MARKER";\n');
|
||||
|
||||
const target = resolveReviewTarget(cwd, {});
|
||||
const context = collectReviewContext(cwd, target);
|
||||
|
||||
assert.equal(context.inputMode, "self-collect");
|
||||
assert.equal(context.fileCount, 3);
|
||||
assert.doesNotMatch(context.content, /TRACKED_MARKER_[AB]/);
|
||||
assert.match(context.content, /## Untracked Files/);
|
||||
assert.match(context.content, /UNTRACKED_RISK_MARKER/);
|
||||
});
|
||||
|
||||
@ -273,6 +273,33 @@ test("adversarial review accepts the same base-branch targeting as review", () =
|
||||
assert.match(result.stdout, /Missing empty-state guard/);
|
||||
});
|
||||
|
||||
test("adversarial review asks Codex to inspect larger diffs itself", () => {
|
||||
const repo = makeTempDir();
|
||||
const binDir = makeTempDir();
|
||||
installFakeCodex(binDir);
|
||||
initGitRepo(repo);
|
||||
fs.mkdirSync(path.join(repo, "src"));
|
||||
for (const name of ["a.js", "b.js", "c.js"]) {
|
||||
fs.writeFileSync(path.join(repo, "src", name), `export const value = "${name}-v1";\n`);
|
||||
}
|
||||
run("git", ["add", "src/a.js", "src/b.js", "src/c.js"], { cwd: repo });
|
||||
run("git", ["commit", "-m", "init"], { cwd: repo });
|
||||
fs.writeFileSync(path.join(repo, "src", "a.js"), 'export const value = "PROMPT_SELF_COLLECT_A";\n');
|
||||
fs.writeFileSync(path.join(repo, "src", "b.js"), 'export const value = "PROMPT_SELF_COLLECT_B";\n');
|
||||
fs.writeFileSync(path.join(repo, "src", "c.js"), 'export const value = "PROMPT_SELF_COLLECT_C";\n');
|
||||
|
||||
const result = run("node", [SCRIPT, "adversarial-review"], {
|
||||
cwd: repo,
|
||||
env: buildEnv(binDir)
|
||||
});
|
||||
|
||||
assert.equal(result.status, 0, result.stderr);
|
||||
const state = JSON.parse(fs.readFileSync(path.join(binDir, "fake-codex-state.json"), "utf8"));
|
||||
assert.match(state.lastTurnStart.prompt, /lightweight summary/i);
|
||||
assert.match(state.lastTurnStart.prompt, /read-only git commands/i);
|
||||
assert.doesNotMatch(state.lastTurnStart.prompt, /PROMPT_SELF_COLLECT_[ABC]/);
|
||||
});
|
||||
|
||||
test("review includes reasoning output when the app server returns it", () => {
|
||||
const repo = makeTempDir();
|
||||
const binDir = makeTempDir();
|
||||
|
||||
Loading…
Reference in New Issue
Block a user