Replace standalone test-ast-chunking.mjs (823 lines, custom check() harness, invisible to CI) with proper vitest integration tests. All unique assertions preserved; duplicates already in ast.test.ts dropped. Performance benchmarks and real-collection scanner removed (dev tools, not regression tests). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
200 lines
7.4 KiB
TypeScript
200 lines
7.4 KiB
TypeScript
/**
|
|
* Integration tests for AST-aware chunking.
|
|
*
|
|
* Migrated from the standalone test-ast-chunking.mjs script into the
|
|
* vitest suite. Covers the integration between AST break point extraction
|
|
* and the chunking pipeline — areas not tested by the unit-level ast.test.ts.
|
|
*/
|
|
|
|
import { describe, test, expect } from "vitest";
|
|
import { getASTBreakPoints } from "../src/ast.js";
|
|
import {
|
|
chunkDocument,
|
|
chunkDocumentAsync,
|
|
chunkDocumentWithBreakPoints,
|
|
mergeBreakPoints,
|
|
scanBreakPoints,
|
|
findCodeFences,
|
|
} from "../src/store.js";
|
|
|
|
// ==========================================================================
|
|
// mergeBreakPoints
|
|
// ==========================================================================
|
|
|
|
describe("mergeBreakPoints", () => {
|
|
test("merges regex and AST break points, higher score wins at same position", () => {
|
|
const regexPoints = [
|
|
{ pos: 10, score: 20, type: "blank" },
|
|
{ pos: 50, score: 1, type: "newline" },
|
|
{ pos: 100, score: 20, type: "blank" },
|
|
];
|
|
const astPoints = [
|
|
{ pos: 10, score: 90, type: "ast:func" },
|
|
{ pos: 75, score: 100, type: "ast:class" },
|
|
{ pos: 100, score: 60, type: "ast:import" },
|
|
];
|
|
|
|
const merged = mergeBreakPoints(regexPoints, astPoints);
|
|
|
|
expect(merged).toHaveLength(4);
|
|
expect(merged.find(p => p.pos === 10)?.score).toBe(90); // AST wins (90 > 20)
|
|
expect(merged.find(p => p.pos === 50)?.score).toBe(1); // regex only
|
|
expect(merged.find(p => p.pos === 75)?.score).toBe(100); // AST only
|
|
expect(merged.find(p => p.pos === 100)?.score).toBe(60); // AST wins (60 > 20)
|
|
});
|
|
|
|
test("result is sorted by position", () => {
|
|
const merged = mergeBreakPoints(
|
|
[{ pos: 100, score: 10, type: "a" }],
|
|
[{ pos: 5, score: 50, type: "b" }],
|
|
);
|
|
expect(merged[0]!.pos).toBeLessThan(merged[1]!.pos);
|
|
});
|
|
});
|
|
|
|
// ==========================================================================
|
|
// AST vs Regex chunking comparison
|
|
// ==========================================================================
|
|
|
|
describe("AST vs Regex chunking", () => {
|
|
// Generate a large TS file with 30 functions
|
|
const parts: string[] = [];
|
|
for (let i = 0; i < 30; i++) {
|
|
parts.push(`
|
|
export function handler${i}(req: Request, res: Response): void {
|
|
const startTime = Date.now();
|
|
const userId = req.params.userId;
|
|
const sessionToken = req.headers.authorization;
|
|
|
|
if (!userId || !sessionToken) {
|
|
res.status(400).json({ error: "Missing required parameters" });
|
|
return;
|
|
}
|
|
|
|
console.log(\`Processing request ${i} for user \${userId}\`);
|
|
const result = processBusinessLogic${i}(userId, sessionToken);
|
|
|
|
const elapsed = Date.now() - startTime;
|
|
res.json({ data: result, processingTimeMs: elapsed });
|
|
}
|
|
`);
|
|
}
|
|
const largeTS = parts.join("\n");
|
|
|
|
function countSplitFunctions(chunks: { text: string; pos: number }[]): number {
|
|
let splits = 0;
|
|
for (let i = 0; i < 30; i++) {
|
|
const funcStart = largeTS.indexOf(`function handler${i}(`);
|
|
const nextFunc = largeTS.indexOf(`function handler${i + 1}(`, funcStart + 1);
|
|
const funcEnd = nextFunc > 0 ? nextFunc : largeTS.length;
|
|
const chunkIndices = new Set<number>();
|
|
for (let ci = 0; ci < chunks.length; ci++) {
|
|
const chunkStart = chunks[ci]!.pos;
|
|
const chunkEnd = chunkStart + chunks[ci]!.text.length;
|
|
if (chunkStart < funcEnd && chunkEnd > funcStart) {
|
|
chunkIndices.add(ci);
|
|
}
|
|
}
|
|
if (chunkIndices.size > 1) splits++;
|
|
}
|
|
return splits;
|
|
}
|
|
|
|
test("AST splits fewer functions across chunk boundaries than regex", async () => {
|
|
const regexChunks = chunkDocument(largeTS);
|
|
const astChunks = await chunkDocumentAsync(largeTS, undefined, undefined, undefined, "handlers.ts", "auto");
|
|
|
|
const regexSplits = countSplitFunctions(regexChunks);
|
|
const astSplits = countSplitFunctions(astChunks);
|
|
|
|
expect(astSplits).toBeLessThanOrEqual(regexSplits);
|
|
});
|
|
|
|
test("markdown files produce identical chunks in auto vs regex mode", async () => {
|
|
const sections: string[] = [];
|
|
for (let i = 0; i < 15; i++) {
|
|
sections.push(`# Section ${i}\n\n${"Lorem ipsum dolor sit amet. ".repeat(40)}\n`);
|
|
}
|
|
const largeMD = sections.join("\n");
|
|
|
|
const mdRegex = chunkDocument(largeMD);
|
|
const mdAst = await chunkDocumentAsync(largeMD, undefined, undefined, undefined, "readme.md", "auto");
|
|
|
|
expect(mdAst).toHaveLength(mdRegex.length);
|
|
for (let i = 0; i < mdRegex.length; i++) {
|
|
expect(mdAst[i]?.text).toBe(mdRegex[i]?.text);
|
|
expect(mdAst[i]?.pos).toBe(mdRegex[i]?.pos);
|
|
}
|
|
});
|
|
|
|
test("regex strategy bypasses AST entirely", async () => {
|
|
const regexOnly = await chunkDocumentAsync(largeTS, undefined, undefined, undefined, "handlers.ts", "regex");
|
|
const syncRegex = chunkDocument(largeTS);
|
|
|
|
expect(regexOnly).toHaveLength(syncRegex.length);
|
|
for (let i = 0; i < syncRegex.length; i++) {
|
|
expect(regexOnly[i]?.text).toBe(syncRegex[i]?.text);
|
|
}
|
|
});
|
|
|
|
test("no filepath falls back to regex", async () => {
|
|
const noPathChunks = await chunkDocumentAsync(largeTS, undefined, undefined, undefined, undefined, "auto");
|
|
const syncRegex = chunkDocument(largeTS);
|
|
expect(noPathChunks).toHaveLength(syncRegex.length);
|
|
});
|
|
|
|
test("small file produces single chunk", async () => {
|
|
const smallChunks = await chunkDocumentAsync("export const x = 1;", undefined, undefined, undefined, "s.ts", "auto");
|
|
expect(smallChunks).toHaveLength(1);
|
|
});
|
|
});
|
|
|
|
// ==========================================================================
|
|
// chunkDocumentWithBreakPoints equivalence
|
|
// ==========================================================================
|
|
|
|
describe("chunkDocumentWithBreakPoints equivalence", () => {
|
|
test("produces identical output to chunkDocument for the same content", () => {
|
|
const content = "a".repeat(5000) + "\n\n" + "b".repeat(5000);
|
|
const old = chunkDocument(content);
|
|
const withBP = chunkDocumentWithBreakPoints(content, scanBreakPoints(content), findCodeFences(content));
|
|
|
|
expect(withBP).toHaveLength(old.length);
|
|
for (let i = 0; i < old.length; i++) {
|
|
expect(withBP[i]?.text).toBe(old[i]?.text);
|
|
expect(withBP[i]?.pos).toBe(old[i]?.pos);
|
|
}
|
|
});
|
|
});
|
|
|
|
// ==========================================================================
|
|
// Score assertions not covered by ast.test.ts unit tests
|
|
// ==========================================================================
|
|
|
|
describe("AST break point scores", () => {
|
|
test("TypeScript export (class) scores 90", async () => {
|
|
const code = `export class Foo {}\nexport function bar() {}`;
|
|
const points = await getASTBreakPoints(code, "a.ts");
|
|
const exportPoint = points.find(p => p.type === "ast:export");
|
|
expect(exportPoint?.score).toBe(90);
|
|
});
|
|
|
|
test("Python class scores 100", async () => {
|
|
const code = `class Foo:\n pass\n\ndef bar():\n pass`;
|
|
const points = await getASTBreakPoints(code, "a.py");
|
|
expect(points.find(p => p.type === "ast:class")?.score).toBe(100);
|
|
});
|
|
|
|
test("Go type scores 80", async () => {
|
|
const code = `package main\n\ntype Server struct {\n port int\n}\n\nfunc main() {}`;
|
|
const points = await getASTBreakPoints(code, "a.go");
|
|
expect(points.find(p => p.type === "ast:type")?.score).toBe(80);
|
|
});
|
|
|
|
test("Rust enum scores 80", async () => {
|
|
const code = `enum State {\n On,\n Off,\n}\n\nfn main() {}`;
|
|
const points = await getASTBreakPoints(code, "a.rs");
|
|
expect(points.find(p => p.type === "ast:enum")?.score).toBe(80);
|
|
});
|
|
});
|