refactor: remove OR operator from lex queries
Simplify to just: terms, "phrases", and -negation
This commit is contained in:
parent
77e4d8f378
commit
de3a83a553
@ -42,7 +42,6 @@ Local search engine for markdown content.
|
||||
|
||||
**lex (keyword)**
|
||||
- 2-5 terms, no filler words
|
||||
- Include synonyms: `auth OR authentication`
|
||||
- Exact phrase: `"connection pool"` (quoted)
|
||||
- Exclude terms: `performance -sports` (minus prefix)
|
||||
- Code identifiers work: `handleError async`
|
||||
@ -75,9 +74,8 @@ First query gets 2x weight in fusion — put your best guess first.
|
||||
| `term` | Prefix match | `perf` matches "performance" |
|
||||
| `"phrase"` | Exact phrase | `"rate limiter"` |
|
||||
| `-term` | Exclude | `performance -sports` |
|
||||
| `OR` | Either term | `auth OR authentication` |
|
||||
|
||||
Note: `-term` and `OR` only work in lex queries, not vec/hyde.
|
||||
Note: `-term` only works in lex queries, not vec/hyde.
|
||||
|
||||
### Collection Filtering
|
||||
|
||||
|
||||
67
src/store.ts
67
src/store.ts
@ -1993,7 +1993,6 @@ function sanitizeFTS5Term(term: string): string {
|
||||
* Supports:
|
||||
* - Quoted phrases: "exact phrase" → "exact phrase" (exact match)
|
||||
* - Negation: -term or -"phrase" → uses FTS5 NOT operator
|
||||
* - OR: term1 OR term2 (case-insensitive)
|
||||
* - Plain terms: term → "term"* (prefix match)
|
||||
*
|
||||
* FTS5 NOT is a binary operator: `term1 NOT term2` means "match term1 but not term2".
|
||||
@ -2002,13 +2001,10 @@ function sanitizeFTS5Term(term: string): string {
|
||||
* Examples:
|
||||
* performance -sports → "performance"* NOT "sports"*
|
||||
* "machine learning" → "machine learning"
|
||||
* auth OR authentication → ("auth"* OR "authentication"*)
|
||||
*/
|
||||
function buildFTS5Query(query: string): string | null {
|
||||
const positive: string[] = [];
|
||||
const negative: string[] = [];
|
||||
const orGroups: string[][] = [[]]; // Track OR groupings
|
||||
let currentOrGroup = 0;
|
||||
|
||||
let i = 0;
|
||||
const s = query.trim();
|
||||
@ -2037,7 +2033,6 @@ function buildFTS5Query(query: string): string | null {
|
||||
negative.push(ftsPhrase);
|
||||
} else {
|
||||
positive.push(ftsPhrase);
|
||||
orGroups[currentOrGroup]!.push(ftsPhrase);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -2047,24 +2042,13 @@ function buildFTS5Query(query: string): string | null {
|
||||
while (i < s.length && !/[\s"]/.test(s[i]!)) i++;
|
||||
const term = s.slice(start, i);
|
||||
|
||||
// Check for OR operator
|
||||
if (term.toUpperCase() === 'OR') {
|
||||
// Start new OR group
|
||||
currentOrGroup++;
|
||||
orGroups.push([]);
|
||||
} else if (term.toUpperCase() === 'AND' || term.toUpperCase() === 'NOT') {
|
||||
// AND is implicit, NOT should use - prefix
|
||||
continue;
|
||||
} else {
|
||||
const sanitized = sanitizeFTS5Term(term);
|
||||
if (sanitized) {
|
||||
const ftsTerm = `"${sanitized}"*`; // Prefix match
|
||||
if (negated) {
|
||||
negative.push(ftsTerm);
|
||||
} else {
|
||||
positive.push(ftsTerm);
|
||||
orGroups[currentOrGroup]!.push(ftsTerm);
|
||||
}
|
||||
const sanitized = sanitizeFTS5Term(term);
|
||||
if (sanitized) {
|
||||
const ftsTerm = `"${sanitized}"*`; // Prefix match
|
||||
if (negated) {
|
||||
negative.push(ftsTerm);
|
||||
} else {
|
||||
positive.push(ftsTerm);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -2073,30 +2057,14 @@ function buildFTS5Query(query: string): string | null {
|
||||
if (positive.length === 0 && negative.length === 0) return null;
|
||||
|
||||
// If only negative terms, we can't search (FTS5 NOT is binary)
|
||||
if (positive.length === 0) {
|
||||
// Fall back to searching without negation
|
||||
return null;
|
||||
}
|
||||
if (positive.length === 0) return null;
|
||||
|
||||
// Build the positive part with OR groups
|
||||
let result: string;
|
||||
if (orGroups.length > 1 && orGroups.some(g => g.length > 0)) {
|
||||
// Has OR groups - build (a OR b) AND c structure
|
||||
const orParts = orGroups.filter(g => g.length > 0).map(g =>
|
||||
g.length === 1 ? g[0]! : `(${g.join(' OR ')})`
|
||||
);
|
||||
result = orParts.join(' AND ');
|
||||
} else {
|
||||
// Simple AND of all positive terms
|
||||
result = positive.join(' AND ');
|
||||
}
|
||||
// Join positive terms with AND
|
||||
let result = positive.join(' AND ');
|
||||
|
||||
// Add NOT clause for negative terms (FTS5: positive NOT negative)
|
||||
if (negative.length > 0) {
|
||||
// FTS5 NOT only works with single term on right side, chain them
|
||||
for (const neg of negative) {
|
||||
result = `${result} NOT ${neg}`;
|
||||
}
|
||||
// Add NOT clause for negative terms
|
||||
for (const neg of negative) {
|
||||
result = `${result} NOT ${neg}`;
|
||||
}
|
||||
|
||||
return result;
|
||||
@ -2111,15 +2079,6 @@ export function validateSemanticQuery(query: string): string | null {
|
||||
if (/-\w/.test(query) || /-"/.test(query)) {
|
||||
return 'Negation (-term) is not supported in vec/hyde queries. Use lex for exclusions.';
|
||||
}
|
||||
// Check for quoted exact phrases (semantic search doesn't do exact matching)
|
||||
if (/"[^"]+"\s*$/.test(query.trim()) || /^"[^"]+"/.test(query.trim())) {
|
||||
// Single quoted phrase is the whole query - that's fine for hyde
|
||||
// But warn if it looks like they expect exact matching
|
||||
}
|
||||
// Check for OR operator (semantic search doesn't support boolean logic)
|
||||
if (/\bOR\b/i.test(query)) {
|
||||
return 'OR operator is not supported in vec/hyde queries. Use multiple lex queries or rephrase.';
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
@ -340,9 +340,6 @@ describe("lex query syntax", () => {
|
||||
expect(validateSemanticQuery('-"exact phrase"')).toContain("Negation");
|
||||
});
|
||||
|
||||
test("rejects OR operator", () => {
|
||||
expect(validateSemanticQuery("auth OR authentication")).toContain("OR");
|
||||
});
|
||||
|
||||
test("accepts hyde-style hypothetical answers", () => {
|
||||
expect(validateSemanticQuery(
|
||||
|
||||
Loading…
Reference in New Issue
Block a user