refactor: remove OR operator from lex queries

Simplify to just: terms, "phrases", and -negation
This commit is contained in:
Tobi Lütke 2026-02-18 22:17:52 -05:00
parent 77e4d8f378
commit de3a83a553
No known key found for this signature in database
3 changed files with 14 additions and 60 deletions

View File

@ -42,7 +42,6 @@ Local search engine for markdown content.
**lex (keyword)**
- 2-5 terms, no filler words
- Include synonyms: `auth OR authentication`
- Exact phrase: `"connection pool"` (quoted)
- Exclude terms: `performance -sports` (minus prefix)
- Code identifiers work: `handleError async`
@ -75,9 +74,8 @@ First query gets 2x weight in fusion — put your best guess first.
| `term` | Prefix match | `perf` matches "performance" |
| `"phrase"` | Exact phrase | `"rate limiter"` |
| `-term` | Exclude | `performance -sports` |
| `OR` | Either term | `auth OR authentication` |
Note: `-term` and `OR` only work in lex queries, not vec/hyde.
Note: `-term` only works in lex queries, not vec/hyde.
### Collection Filtering

View File

@ -1993,7 +1993,6 @@ function sanitizeFTS5Term(term: string): string {
* Supports:
* - Quoted phrases: "exact phrase" "exact phrase" (exact match)
* - Negation: -term or -"phrase" uses FTS5 NOT operator
* - OR: term1 OR term2 (case-insensitive)
* - Plain terms: term "term"* (prefix match)
*
* FTS5 NOT is a binary operator: `term1 NOT term2` means "match term1 but not term2".
@ -2002,13 +2001,10 @@ function sanitizeFTS5Term(term: string): string {
* Examples:
* performance -sports "performance"* NOT "sports"*
* "machine learning" "machine learning"
* auth OR authentication ("auth"* OR "authentication"*)
*/
function buildFTS5Query(query: string): string | null {
const positive: string[] = [];
const negative: string[] = [];
const orGroups: string[][] = [[]]; // Track OR groupings
let currentOrGroup = 0;
let i = 0;
const s = query.trim();
@ -2037,7 +2033,6 @@ function buildFTS5Query(query: string): string | null {
negative.push(ftsPhrase);
} else {
positive.push(ftsPhrase);
orGroups[currentOrGroup]!.push(ftsPhrase);
}
}
}
@ -2047,24 +2042,13 @@ function buildFTS5Query(query: string): string | null {
while (i < s.length && !/[\s"]/.test(s[i]!)) i++;
const term = s.slice(start, i);
// Check for OR operator
if (term.toUpperCase() === 'OR') {
// Start new OR group
currentOrGroup++;
orGroups.push([]);
} else if (term.toUpperCase() === 'AND' || term.toUpperCase() === 'NOT') {
// AND is implicit, NOT should use - prefix
continue;
} else {
const sanitized = sanitizeFTS5Term(term);
if (sanitized) {
const ftsTerm = `"${sanitized}"*`; // Prefix match
if (negated) {
negative.push(ftsTerm);
} else {
positive.push(ftsTerm);
orGroups[currentOrGroup]!.push(ftsTerm);
}
const sanitized = sanitizeFTS5Term(term);
if (sanitized) {
const ftsTerm = `"${sanitized}"*`; // Prefix match
if (negated) {
negative.push(ftsTerm);
} else {
positive.push(ftsTerm);
}
}
}
@ -2073,30 +2057,14 @@ function buildFTS5Query(query: string): string | null {
if (positive.length === 0 && negative.length === 0) return null;
// If only negative terms, we can't search (FTS5 NOT is binary)
if (positive.length === 0) {
// Fall back to searching without negation
return null;
}
if (positive.length === 0) return null;
// Build the positive part with OR groups
let result: string;
if (orGroups.length > 1 && orGroups.some(g => g.length > 0)) {
// Has OR groups - build (a OR b) AND c structure
const orParts = orGroups.filter(g => g.length > 0).map(g =>
g.length === 1 ? g[0]! : `(${g.join(' OR ')})`
);
result = orParts.join(' AND ');
} else {
// Simple AND of all positive terms
result = positive.join(' AND ');
}
// Join positive terms with AND
let result = positive.join(' AND ');
// Add NOT clause for negative terms (FTS5: positive NOT negative)
if (negative.length > 0) {
// FTS5 NOT only works with single term on right side, chain them
for (const neg of negative) {
result = `${result} NOT ${neg}`;
}
// Add NOT clause for negative terms
for (const neg of negative) {
result = `${result} NOT ${neg}`;
}
return result;
@ -2111,15 +2079,6 @@ export function validateSemanticQuery(query: string): string | null {
if (/-\w/.test(query) || /-"/.test(query)) {
return 'Negation (-term) is not supported in vec/hyde queries. Use lex for exclusions.';
}
// Check for quoted exact phrases (semantic search doesn't do exact matching)
if (/"[^"]+"\s*$/.test(query.trim()) || /^"[^"]+"/.test(query.trim())) {
// Single quoted phrase is the whole query - that's fine for hyde
// But warn if it looks like they expect exact matching
}
// Check for OR operator (semantic search doesn't support boolean logic)
if (/\bOR\b/i.test(query)) {
return 'OR operator is not supported in vec/hyde queries. Use multiple lex queries or rephrase.';
}
return null;
}

View File

@ -340,9 +340,6 @@ describe("lex query syntax", () => {
expect(validateSemanticQuery('-"exact phrase"')).toContain("Negation");
});
test("rejects OR operator", () => {
expect(validateSemanticQuery("auth OR authentication")).toContain("OR");
});
test("accepts hyde-style hypothetical answers", () => {
expect(validateSemanticQuery(