340 lines
11 KiB
TypeScript
340 lines
11 KiB
TypeScript
/**
|
|
* ast.test.ts - Tests for AST-aware chunking support
|
|
*
|
|
* Tests language detection, AST break point extraction for each
|
|
* supported language, and graceful fallback on errors.
|
|
*/
|
|
|
|
import { describe, test, expect } from "vitest";
|
|
import { detectLanguage, getASTBreakPoints, extractSymbols, formatGrammarLoadError } from "../src/ast.js";
|
|
import type { SupportedLanguage } from "../src/ast.js";
|
|
|
|
// =============================================================================
|
|
// Language Detection
|
|
// =============================================================================
|
|
|
|
describe("detectLanguage", () => {
|
|
test("recognizes TypeScript extensions", () => {
|
|
expect(detectLanguage("src/auth.ts")).toBe("typescript");
|
|
expect(detectLanguage("src/auth.mts")).toBe("typescript");
|
|
expect(detectLanguage("src/auth.cts")).toBe("typescript");
|
|
});
|
|
|
|
test("recognizes TSX extension", () => {
|
|
expect(detectLanguage("src/App.tsx")).toBe("tsx");
|
|
});
|
|
|
|
test("recognizes JavaScript extensions", () => {
|
|
expect(detectLanguage("src/util.js")).toBe("javascript");
|
|
expect(detectLanguage("src/util.mjs")).toBe("javascript");
|
|
expect(detectLanguage("src/util.cjs")).toBe("javascript");
|
|
});
|
|
|
|
test("recognizes JSX as tsx", () => {
|
|
expect(detectLanguage("src/App.jsx")).toBe("tsx");
|
|
});
|
|
|
|
test("recognizes Python extension", () => {
|
|
expect(detectLanguage("src/auth.py")).toBe("python");
|
|
});
|
|
|
|
test("recognizes Go extension", () => {
|
|
expect(detectLanguage("src/auth.go")).toBe("go");
|
|
});
|
|
|
|
test("recognizes Rust extension", () => {
|
|
expect(detectLanguage("src/auth.rs")).toBe("rust");
|
|
});
|
|
|
|
test("returns null for markdown", () => {
|
|
expect(detectLanguage("docs/README.md")).toBeNull();
|
|
});
|
|
|
|
test("returns null for unknown extensions", () => {
|
|
expect(detectLanguage("data/file.csv")).toBeNull();
|
|
expect(detectLanguage("config.yaml")).toBeNull();
|
|
expect(detectLanguage("Makefile")).toBeNull();
|
|
});
|
|
|
|
test("is case-insensitive for extensions", () => {
|
|
expect(detectLanguage("src/Auth.TS")).toBe("typescript");
|
|
expect(detectLanguage("src/Auth.PY")).toBe("python");
|
|
});
|
|
|
|
test("works with virtual qmd:// paths", () => {
|
|
expect(detectLanguage("qmd://myproject/src/auth.ts")).toBe("typescript");
|
|
expect(detectLanguage("qmd://docs/README.md")).toBeNull();
|
|
});
|
|
});
|
|
|
|
// =============================================================================
|
|
// AST Break Points - TypeScript
|
|
// =============================================================================
|
|
|
|
describe("getASTBreakPoints - TypeScript", () => {
|
|
const TS_SAMPLE = `import { Database } from './db';
|
|
import type { User } from './types';
|
|
|
|
interface AuthConfig {
|
|
secret: string;
|
|
ttl: number;
|
|
}
|
|
|
|
type UserId = string;
|
|
|
|
export class AuthService {
|
|
constructor(private db: Database) {}
|
|
|
|
async authenticate(user: User, token: string): Promise<boolean> {
|
|
const session = await this.db.findSession(token);
|
|
return session?.userId === user.id;
|
|
}
|
|
|
|
validateToken(token: string): boolean {
|
|
return token.length === 64;
|
|
}
|
|
}
|
|
|
|
export function hashPassword(password: string): string {
|
|
return crypto.createHash('sha256').update(password).digest('hex');
|
|
}
|
|
`;
|
|
|
|
test("produces break points at function, class, and import boundaries", async () => {
|
|
const points = await getASTBreakPoints(TS_SAMPLE, "src/auth.ts");
|
|
expect(points.length).toBeGreaterThan(0);
|
|
|
|
// Should have import, interface, type, class (via export), method, and function break points
|
|
const types = points.map(p => p.type);
|
|
expect(types.some(t => t.includes("import"))).toBe(true);
|
|
expect(types.some(t => t.includes("iface"))).toBe(true);
|
|
expect(types.some(t => t.includes("type"))).toBe(true);
|
|
expect(types.some(t => t.includes("export") || t.includes("class"))).toBe(true);
|
|
expect(types.some(t => t.includes("method"))).toBe(true);
|
|
});
|
|
|
|
test("break points are sorted by position", async () => {
|
|
const points = await getASTBreakPoints(TS_SAMPLE, "src/auth.ts");
|
|
for (let i = 1; i < points.length; i++) {
|
|
expect(points[i]!.pos).toBeGreaterThanOrEqual(points[i - 1]!.pos);
|
|
}
|
|
});
|
|
|
|
test("scores align with expected hierarchy", async () => {
|
|
const points = await getASTBreakPoints(TS_SAMPLE, "src/auth.ts");
|
|
|
|
// Class/interface should score 100
|
|
const ifacePoint = points.find(p => p.type === "ast:iface");
|
|
expect(ifacePoint?.score).toBe(100);
|
|
|
|
// Function/method should score 90
|
|
const methodPoint = points.find(p => p.type === "ast:method");
|
|
expect(methodPoint?.score).toBe(90);
|
|
|
|
// Import should score 60
|
|
const importPoint = points.find(p => p.type === "ast:import");
|
|
expect(importPoint?.score).toBe(60);
|
|
});
|
|
|
|
test("break point positions match actual content positions", async () => {
|
|
const points = await getASTBreakPoints(TS_SAMPLE, "src/auth.ts");
|
|
|
|
// First import should be at position 0
|
|
const firstImport = points.find(p => p.type === "ast:import");
|
|
expect(firstImport).toBeDefined();
|
|
expect(TS_SAMPLE.slice(firstImport!.pos, firstImport!.pos + 6)).toBe("import");
|
|
});
|
|
});
|
|
|
|
// =============================================================================
|
|
// AST Break Points - Python
|
|
// =============================================================================
|
|
|
|
describe("getASTBreakPoints - Python", () => {
|
|
const PY_SAMPLE = `import os
|
|
from typing import Optional
|
|
|
|
class AuthService:
|
|
def __init__(self, db):
|
|
self.db = db
|
|
|
|
async def authenticate(self, user, token):
|
|
session = await self.db.find(token)
|
|
return session.user_id == user.id
|
|
|
|
def validate_token(self, token):
|
|
return len(token) == 64
|
|
|
|
def hash_password(password: str) -> str:
|
|
return hashlib.sha256(password.encode()).hexdigest()
|
|
|
|
@decorator
|
|
def decorated_func():
|
|
pass
|
|
`;
|
|
|
|
test("produces break points for class, function, import, and decorated definitions", async () => {
|
|
const points = await getASTBreakPoints(PY_SAMPLE, "auth.py");
|
|
const types = points.map(p => p.type);
|
|
|
|
expect(types.some(t => t.includes("import"))).toBe(true);
|
|
expect(types.some(t => t.includes("class"))).toBe(true);
|
|
expect(types.some(t => t.includes("func"))).toBe(true);
|
|
expect(types.some(t => t.includes("decorated"))).toBe(true);
|
|
});
|
|
|
|
test("captures method definitions inside classes", async () => {
|
|
const points = await getASTBreakPoints(PY_SAMPLE, "auth.py");
|
|
// Should capture __init__, authenticate, and validate_token as func
|
|
const funcPoints = points.filter(p => p.type === "ast:func");
|
|
expect(funcPoints.length).toBeGreaterThanOrEqual(3);
|
|
});
|
|
});
|
|
|
|
// =============================================================================
|
|
// AST Break Points - Go
|
|
// =============================================================================
|
|
|
|
describe("getASTBreakPoints - Go", () => {
|
|
const GO_SAMPLE = `package main
|
|
|
|
import "fmt"
|
|
|
|
type AuthService struct {
|
|
db *Database
|
|
}
|
|
|
|
func (s *AuthService) Authenticate(user User) bool {
|
|
return true
|
|
}
|
|
|
|
func HashPassword(password string) string {
|
|
return "hash"
|
|
}
|
|
`;
|
|
|
|
test("produces break points for type, function, method, and import", async () => {
|
|
const points = await getASTBreakPoints(GO_SAMPLE, "auth.go");
|
|
const types = points.map(p => p.type);
|
|
|
|
expect(types.some(t => t.includes("import"))).toBe(true);
|
|
expect(types.some(t => t.includes("type"))).toBe(true);
|
|
expect(types.some(t => t.includes("method"))).toBe(true);
|
|
expect(types.some(t => t.includes("func"))).toBe(true);
|
|
});
|
|
|
|
test("function and method both score 90", async () => {
|
|
const points = await getASTBreakPoints(GO_SAMPLE, "auth.go");
|
|
const funcPoint = points.find(p => p.type === "ast:func");
|
|
const methodPoint = points.find(p => p.type === "ast:method");
|
|
|
|
expect(funcPoint?.score).toBe(90);
|
|
expect(methodPoint?.score).toBe(90);
|
|
});
|
|
});
|
|
|
|
// =============================================================================
|
|
// AST Break Points - Rust
|
|
// =============================================================================
|
|
|
|
describe("getASTBreakPoints - Rust", () => {
|
|
const RS_SAMPLE = `use std::collections::HashMap;
|
|
|
|
struct AuthService {
|
|
db: Database,
|
|
}
|
|
|
|
impl AuthService {
|
|
fn authenticate(&self, user: &User) -> bool {
|
|
true
|
|
}
|
|
}
|
|
|
|
trait Authenticatable {
|
|
fn validate(&self) -> bool;
|
|
}
|
|
|
|
enum Role {
|
|
Admin,
|
|
User,
|
|
}
|
|
|
|
fn hash_password(password: &str) -> String {
|
|
String::new()
|
|
}
|
|
`;
|
|
|
|
test("produces break points for struct, impl, trait, enum, function, and use", async () => {
|
|
const points = await getASTBreakPoints(RS_SAMPLE, "auth.rs");
|
|
const types = points.map(p => p.type);
|
|
|
|
expect(types.some(t => t.includes("import"))).toBe(true); // use_declaration -> @import
|
|
expect(types.some(t => t.includes("struct"))).toBe(true);
|
|
expect(types.some(t => t.includes("impl"))).toBe(true);
|
|
expect(types.some(t => t.includes("trait"))).toBe(true);
|
|
expect(types.some(t => t.includes("enum"))).toBe(true);
|
|
expect(types.some(t => t.includes("func"))).toBe(true);
|
|
});
|
|
|
|
test("struct, impl, and trait all score 100", async () => {
|
|
const points = await getASTBreakPoints(RS_SAMPLE, "auth.rs");
|
|
const structPoint = points.find(p => p.type === "ast:struct");
|
|
const implPoint = points.find(p => p.type === "ast:impl");
|
|
const traitPoint = points.find(p => p.type === "ast:trait");
|
|
|
|
expect(structPoint?.score).toBe(100);
|
|
expect(implPoint?.score).toBe(100);
|
|
expect(traitPoint?.score).toBe(100);
|
|
});
|
|
});
|
|
|
|
// =============================================================================
|
|
// Error Handling & Fallback
|
|
// =============================================================================
|
|
|
|
describe("getASTBreakPoints - error handling", () => {
|
|
test("returns empty array for unsupported file types", async () => {
|
|
const points = await getASTBreakPoints("# Hello World", "readme.md");
|
|
expect(points).toEqual([]);
|
|
});
|
|
|
|
test("returns empty array for unknown extensions", async () => {
|
|
const points = await getASTBreakPoints("data,here", "file.csv");
|
|
expect(points).toEqual([]);
|
|
});
|
|
|
|
test("handles empty content gracefully", async () => {
|
|
const points = await getASTBreakPoints("", "empty.ts");
|
|
expect(points).toEqual([]);
|
|
});
|
|
|
|
test("handles syntactically invalid code gracefully", async () => {
|
|
// Tree-sitter is error-tolerant, so this should still parse (with error nodes)
|
|
// but should not crash
|
|
const points = await getASTBreakPoints("function { broken syntax %%%", "broken.ts");
|
|
// Should either return some partial break points or empty array — not throw
|
|
expect(Array.isArray(points)).toBe(true);
|
|
});
|
|
|
|
test("explains missing grammar packages with a repair command", () => {
|
|
const msg = formatGrammarLoadError(
|
|
"typescript",
|
|
new Error("Cannot find module 'tree-sitter-typescript/tree-sitter-typescript.wasm'"),
|
|
);
|
|
expect(msg).toContain("tree-sitter-typescript");
|
|
expect(msg).toContain("bun add tree-sitter-typescript@0.23.2");
|
|
expect(msg).toContain("falling back to regex");
|
|
});
|
|
});
|
|
|
|
// =============================================================================
|
|
// Symbol Extraction Stub (Phase 2)
|
|
// =============================================================================
|
|
|
|
describe("extractSymbols", () => {
|
|
test("returns empty array (Phase 2 stub)", () => {
|
|
const symbols = extractSymbols("function foo() {}", "typescript", 0, 18);
|
|
expect(symbols).toEqual([]);
|
|
});
|
|
});
|