diff --git a/src/store.ts b/src/store.ts index 5adafd9..6ec8ac1 100644 --- a/src/store.ts +++ b/src/store.ts @@ -2226,8 +2226,8 @@ export function findActiveDocument( } /** - * Find an active document, falling back to a legacy lowercase path. - * If found under the legacy path, renames it in-place and rebuilds the + * Find an active document, falling back to a case-insensitive path match. + * If found under a different casing, renames it in-place and rebuilds the * FTS entry. Embeddings are keyed by content hash, so the rename is * safe — no re-embedding required. * @@ -2242,10 +2242,12 @@ export function findOrMigrateLegacyDocument( const existing = findActiveDocument(db, collectionName, path); if (existing) return existing; - const legacyPath = path.toLowerCase(); - if (legacyPath === path) return null; - - const legacy = findActiveDocument(db, collectionName, legacyPath); + const legacy = db.prepare(` + SELECT id, hash, title FROM documents + WHERE collection = ? AND path COLLATE NOCASE = ? AND active = 1 + ORDER BY id + LIMIT 1 + `).get(collectionName, path) as { id: number; hash: string; title: string } | undefined; if (!legacy) return null; // Wrap rename + FTS rebuild in a transaction for atomicity. diff --git a/test/store.test.ts b/test/store.test.ts index 24b5a10..9f82624 100644 --- a/test/store.test.ts +++ b/test/store.test.ts @@ -9,7 +9,7 @@ import { describe, test, expect, beforeAll, afterAll, beforeEach, afterEach, vi } from "vitest"; import { openDatabase, loadSqliteVec } from "../src/db.js"; import type { Database } from "../src/db.js"; -import { unlink, mkdtemp, rmdir, writeFile, rm } from "node:fs/promises"; +import { unlink, mkdtemp, rmdir, writeFile, rm, mkdir, rename } from "node:fs/promises"; import { tmpdir } from "node:os"; import { join } from "node:path"; import YAML from "yaml"; @@ -46,12 +46,12 @@ import { normalizeDocid, isDocid, syncConfigToDb, + reindexCollection, STRONG_SIGNAL_MIN_SCORE, STRONG_SIGNAL_MIN_GAP, insertContent, insertDocument, generateEmbeddings, - reindexCollection, getHybridRrfWeights, type Store, type DocumentResult, @@ -2112,6 +2112,65 @@ describe("Reciprocal Rank Fusion", () => { }); }); +// ============================================================================= +// Reindex Collection Tests +// ============================================================================= + +describe("Reindex Collection", () => { + test("preserves document id and embeddings when file path changes only by case", async () => { + const store = await createTestStore(); + const collectionName = "docs"; + const collectionPath = join(testDir, `case-rename-${Date.now()}-${Math.random().toString(36).slice(2)}`); + await mkdir(collectionPath, { recursive: true }); + + const originalPath = join(collectionPath, "README.md"); + const renamedPath = join(collectionPath, "readme.md"); + const body = "# Case Rename\n\nContent that should keep the same embedding."; + await writeFile(originalPath, body); + + const firstResult = await reindexCollection(store, collectionPath, "**/*.md", collectionName); + expect(firstResult.indexed).toBe(1); + + const before = store.db.prepare(` + SELECT id, path, hash FROM documents + WHERE collection = ? AND active = 1 + `).get(collectionName) as { id: number; path: string; hash: string }; + expect(before.path).toBe("README.md"); + + store.db.prepare(` + INSERT INTO content_vectors (hash, seq, pos, model, embedded_at) + VALUES (?, 0, 0, 'test-model', ?) + `).run(before.hash, new Date().toISOString()); + + await rename(originalPath, renamedPath); + + const secondResult = await reindexCollection(store, collectionPath, "**/*.md", collectionName); + expect(secondResult.indexed).toBe(0); + expect(secondResult.unchanged).toBe(1); + expect(secondResult.removed).toBe(0); + + const afterRows = store.db.prepare(` + SELECT id, path, hash, active FROM documents + WHERE collection = ? + ORDER BY id + `).all(collectionName) as { id: number; path: string; hash: string; active: number }[]; + expect(afterRows).toHaveLength(1); + expect(afterRows[0]).toMatchObject({ id: before.id, path: "readme.md", hash: before.hash, active: 1 }); + + const vectorCount = store.db.prepare(` + SELECT COUNT(*) AS count FROM content_vectors WHERE hash = ? + `).get(before.hash) as { count: number }; + expect(vectorCount.count).toBe(1); + + const ftsRows = store.db.prepare(` + SELECT rowid, filepath FROM documents_fts WHERE rowid = ? + `).all(before.id) as { rowid: number; filepath: string }[]; + expect(ftsRows).toEqual([{ rowid: before.id, filepath: "docs/readme.md" }]); + + await cleanupTestDb(store); + }); +}); + // ============================================================================= // Index Status Tests // =============================================================================