fix: allow hyphenated words in vec/hyde queries (#383)
The validateSemanticQuery regex rejected any hyphen followed by a word character, blocking common compound words (real-time, multi-client, kebab-case identifiers like better-sqlite3). Tighten the check to only match negation syntax at token boundaries (start of string or after whitespace). See https://github.com/tobi/qmd/issues/383 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
ae3604cb88
commit
d531211030
@ -2597,8 +2597,9 @@ function buildFTS5Query(query: string): string | null {
|
||||
* Returns error message if invalid, null if valid.
|
||||
*/
|
||||
export function validateSemanticQuery(query: string): string | null {
|
||||
// Check for negation syntax
|
||||
if (/-\w/.test(query) || /-"/.test(query)) {
|
||||
// Check for negation syntax — only at token boundaries (start of string or after whitespace).
|
||||
// Hyphenated words like "real-time" or "write-ahead" must not trigger this.
|
||||
if (/(^|\s)-[\w"]/.test(query)) {
|
||||
return 'Negation (-term) is not supported in vec/hyde queries. Use lex for exclusions.';
|
||||
}
|
||||
return null;
|
||||
|
||||
@ -361,17 +361,73 @@ describe("lex query syntax", () => {
|
||||
expect(validateSemanticQuery("what is the CAP theorem")).toBeNull();
|
||||
});
|
||||
|
||||
test("rejects negation syntax", () => {
|
||||
test("rejects negation at start of query", () => {
|
||||
expect(validateSemanticQuery("-redis connection pooling")).toContain("Negation");
|
||||
});
|
||||
|
||||
test("rejects negation after space", () => {
|
||||
expect(validateSemanticQuery("performance -sports")).toContain("Negation");
|
||||
});
|
||||
|
||||
test("rejects negated quoted phrase", () => {
|
||||
expect(validateSemanticQuery('-"exact phrase"')).toContain("Negation");
|
||||
});
|
||||
|
||||
test("rejects multiple negations", () => {
|
||||
expect(validateSemanticQuery("error handling -java -python")).toContain("Negation");
|
||||
});
|
||||
|
||||
test("rejects negation after leading whitespace", () => {
|
||||
expect(validateSemanticQuery(" -term at start")).toContain("Negation");
|
||||
});
|
||||
|
||||
test("rejects negation after tab", () => {
|
||||
expect(validateSemanticQuery("foo\t-bar")).toContain("Negation");
|
||||
});
|
||||
|
||||
test("accepts hyphenated compound words", () => {
|
||||
expect(validateSemanticQuery("long-lived server shared across clients")).toBeNull();
|
||||
expect(validateSemanticQuery("real-time voice processing pipeline")).toBeNull();
|
||||
expect(validateSemanticQuery("how does the rate-limiter handle burst traffic")).toBeNull();
|
||||
expect(validateSemanticQuery("self-hosted deployment options")).toBeNull();
|
||||
expect(validateSemanticQuery("multi-client session architecture")).toBeNull();
|
||||
expect(validateSemanticQuery("cross-platform compatibility")).toBeNull();
|
||||
expect(validateSemanticQuery("non-blocking I/O model")).toBeNull();
|
||||
expect(validateSemanticQuery("in-memory caching strategy")).toBeNull();
|
||||
expect(validateSemanticQuery("write-ahead log for crash recovery")).toBeNull();
|
||||
expect(validateSemanticQuery("copy-on-write semantics")).toBeNull();
|
||||
});
|
||||
|
||||
test("accepts multiple hyphens in a phrase", () => {
|
||||
expect(validateSemanticQuery("state-of-the-art embedding models")).toBeNull();
|
||||
expect(validateSemanticQuery("end-to-end testing")).toBeNull();
|
||||
expect(validateSemanticQuery("man-in-the-middle attack prevention")).toBeNull();
|
||||
});
|
||||
|
||||
test("accepts multiple hyphenated words in one query", () => {
|
||||
expect(validateSemanticQuery("built-in vs add-on features")).toBeNull();
|
||||
});
|
||||
|
||||
test("accepts short hyphenated terms", () => {
|
||||
expect(validateSemanticQuery("A-B testing for ML models")).toBeNull();
|
||||
expect(validateSemanticQuery("e-commerce platform")).toBeNull();
|
||||
});
|
||||
|
||||
test("accepts bare hyphen without word character", () => {
|
||||
expect(validateSemanticQuery("-")).toBeNull();
|
||||
});
|
||||
|
||||
test("accepts hyde-style hypothetical answers", () => {
|
||||
expect(validateSemanticQuery(
|
||||
"The CAP theorem states that a distributed system cannot simultaneously provide consistency, availability, and partition tolerance."
|
||||
)).toBeNull();
|
||||
});
|
||||
|
||||
test("accepts hyde with hyphenated words", () => {
|
||||
expect(validateSemanticQuery(
|
||||
"HTTP transport runs a single long-lived daemon shared across all clients, avoiding per-session model re-loading."
|
||||
)).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
describe("validateLexQuery", () => {
|
||||
|
||||
Loading…
Reference in New Issue
Block a user