diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..3d4c590 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,35 @@ +name: CI + +on: + push: + branches: [main] + pull_request: + branches: [main] + +jobs: + test: + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, macos-latest] + bun-version: ["latest", "1.1.0"] + + steps: + - uses: actions/checkout@v4 + + - uses: oven-sh/setup-bun@v2 + with: + bun-version: ${{ matrix.bun-version }} + + - name: Install SQLite (Ubuntu) + if: runner.os == 'Linux' + run: sudo apt-get update && sudo apt-get install -y libsqlite3-dev + + - name: Install SQLite (macOS) + if: runner.os == 'macOS' + run: brew install sqlite + + - run: bun install + + - run: bun test diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml new file mode 100644 index 0000000..c716816 --- /dev/null +++ b/.github/workflows/publish.yml @@ -0,0 +1,39 @@ +name: Publish + +on: + push: + tags: ["v*"] + +jobs: + publish: + runs-on: ubuntu-latest + + permissions: + contents: write + + steps: + - uses: actions/checkout@v4 + + - uses: oven-sh/setup-bun@v2 + with: + bun-version: latest + + - name: Install SQLite + run: sudo apt-get update && sudo apt-get install -y libsqlite3-dev + + - run: bun install + - run: bun test + + - uses: actions/setup-node@v4 + with: + node-version: 22 + registry-url: https://registry.npmjs.org + + - run: npm publish + env: + NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} + + - name: Create GitHub Release + env: + GH_TOKEN: ${{ github.token }} + run: gh release create "${{ github.ref_name }}" --generate-notes diff --git a/.gitignore b/.gitignore index 7f83d11..7378e7a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,13 +1,15 @@ node_modules/ +.npmrc *.sqlite .DS_Store archive/ texts/ .cursor/ -.github/ +.github/copilot/ *.md !README.md !CLAUDE.md +!CHANGELOG.md !skills/**/*.md !finetune/*.md finetune/outputs/ diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..86778d8 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,33 @@ +# Changelog + +All notable changes to QMD will be documented in this file. + +## [0.9.0] - 2026-02-15 + +Initial public release. + +### Features + +- **Hybrid search pipeline** — BM25 full-text + vector similarity + LLM reranking with Reciprocal Rank Fusion +- **Smart chunking** — scored markdown break points keep sections, paragraphs, and code blocks intact (~900 tokens/chunk, 15% overlap) +- **Query expansion** — fine-tuned Qwen3 1.7B model generates search variations for better recall +- **Cross-encoder reranking** — Qwen3-Reranker scores candidates with position-aware blending +- **Vector embeddings** — EmbeddingGemma 300M via node-llama-cpp, all on-device +- **MCP server** — stdio and HTTP transports for Claude Desktop, Claude Code, and any MCP client +- **Collection management** — index multiple directories with glob patterns +- **Context annotations** — add descriptions to collections and paths for richer search +- **Document IDs** — 6-char content hash for stable references across re-indexes +- **Multi-get** — retrieve multiple documents by glob pattern, comma list, or docids +- **Multiple output formats** — JSON, CSV, Markdown, XML, files list +- **Claude Code plugin** — inline status checks and MCP integration + +### Fixes + +- Handle dense content (code) that tokenizes beyond expected chunk size +- Proper cleanup of Metal GPU resources +- SQLite-vec readiness verification after extension load +- Reactivate deactivated documents on re-index +- BM25 score normalization with Math.abs +- Bun UTF-8 path corruption workaround + +[0.9.0]: https://github.com/tobi/qmd/releases/tag/v0.9.0 diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..81652d0 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2024-2026 Tobi Lutke + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md index d58c66a..05c7e87 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ QMD combines BM25 full-text search, vector semantic search, and LLM re-ranking ```sh # Install globally -bun install -g https://github.com/tobi/qmd +bun install -g @tobi/qmd # Create collections for your notes, docs, and meeting transcripts qmd collection add ~/notes --name notes @@ -252,7 +252,7 @@ Models are downloaded from HuggingFace and cached in `~/.cache/qmd/models/`. ## Installation ```sh -bun install -g github:tobi/qmd +bun install -g @tobi/qmd ``` Make sure `~/.bun/bin` is in your PATH. diff --git a/package.json b/package.json index 01d35f1..a2dd034 100644 --- a/package.json +++ b/package.json @@ -1,11 +1,19 @@ { - "name": "qmd", - "version": "1.0.0", - "description": "Quick Markdown Search - Full-text and vector search for markdown files", + "name": "@tobi/qmd", + "version": "0.9.0", + "description": "Query Markup Documents - On-device hybrid search for markdown files with BM25, vector search, and LLM reranking", "type": "module", "bin": { "qmd": "./qmd" }, + "files": [ + "src/**/*.ts", + "!src/**/*.test.ts", + "!src/test-preload.ts", + "qmd", + "LICENSE", + "CHANGELOG.md" + ], "scripts": { "test": "bun test --preload ./src/test-preload.ts", "qmd": "bun src/qmd.ts", @@ -15,7 +23,19 @@ "vsearch": "bun src/qmd.ts vsearch", "rerank": "bun src/qmd.ts rerank", "link": "bun link", - "inspector": "npx @modelcontextprotocol/inspector bun src/qmd.ts mcp" + "inspector": "npx @modelcontextprotocol/inspector bun src/qmd.ts mcp", + "release": "./scripts/release.sh" + }, + "publishConfig": { + "access": "public" + }, + "repository": { + "type": "git", + "url": "git+https://github.com/tobi/qmd.git" + }, + "homepage": "https://github.com/tobi/qmd#readme", + "bugs": { + "url": "https://github.com/tobi/qmd/issues" }, "dependencies": { "@modelcontextprotocol/sdk": "^1.25.1", @@ -43,11 +63,18 @@ "markdown", "search", "fts", + "full-text-search", "vector", + "semantic-search", "sqlite", "bm25", "embeddings", - "ollama" + "rag", + "mcp", + "reranking", + "knowledge-base", + "local-ai", + "llm" ], "license": "MIT" } diff --git a/scripts/release.sh b/scripts/release.sh new file mode 100755 index 0000000..8f2a5cc --- /dev/null +++ b/scripts/release.sh @@ -0,0 +1,124 @@ +#!/usr/bin/env bash +set -euo pipefail + +# QMD Release Script +# Usage: ./scripts/release.sh [patch|minor|major|] +# Examples: +# ./scripts/release.sh patch # 0.9.0 -> 0.9.1 +# ./scripts/release.sh minor # 0.9.0 -> 0.10.0 +# ./scripts/release.sh major # 0.9.0 -> 1.0.0 +# ./scripts/release.sh 1.0.0 # explicit version + +BUMP="${1:?Usage: release.sh [patch|minor|major|]}" + +# Ensure we're on main and clean +BRANCH=$(git branch --show-current) +if [[ "$BRANCH" != "main" ]]; then + echo "Error: must be on main branch (currently on $BRANCH)" >&2 + exit 1 +fi + +if [[ -n "$(git status --porcelain)" ]]; then + echo "Error: working directory not clean" >&2 + git status --short + exit 1 +fi + +# Read current version +CURRENT=$(jq -r .version package.json) +echo "Current version: $CURRENT" + +# Calculate new version +bump_version() { + local current="$1" type="$2" + IFS='.' read -r major minor patch <<< "$current" + case "$type" in + major) echo "$((major + 1)).0.0" ;; + minor) echo "$major.$((minor + 1)).0" ;; + patch) echo "$major.$minor.$((patch + 1))" ;; + *) echo "$type" ;; # explicit version + esac +} + +NEW=$(bump_version "$CURRENT" "$BUMP") +echo "New version: $NEW" +echo "" + +# Confirm +read -p "Release v$NEW? [y/N] " -n 1 -r +echo "" +[[ $REPLY =~ ^[Yy]$ ]] || { echo "Aborted."; exit 1; } + +# Gather commits since last tag (or all if no tags) +LAST_TAG=$(git describe --tags --abbrev=0 2>/dev/null || echo "") +if [[ -n "$LAST_TAG" ]]; then + RANGE="$LAST_TAG..HEAD" +else + RANGE="HEAD" +fi + +echo "" +echo "Commits since ${LAST_TAG:-beginning}:" +git log "$RANGE" --oneline --no-decorate +echo "" + +# Generate changelog entry +DATE=$(date +%Y-%m-%d) +ENTRY="## [$NEW] - $DATE"$'\n'$'\n' + +# Collect conventional commits +FEATS=$(git log "$RANGE" --oneline --no-decorate --grep="^feat" | sed 's/^[a-f0-9]* feat[:(]/- /' | sed 's/)$//' || true) +FIXES=$(git log "$RANGE" --oneline --no-decorate --grep="^fix" | sed 's/^[a-f0-9]* fix[:(]/- /' | sed 's/)$//' || true) +OTHER=$(git log "$RANGE" --oneline --no-decorate --grep="^feat" --grep="^fix" --grep="^docs" --grep="^chore" --grep="^refactor" --invert-grep | sed 's/^[a-f0-9]* /- /' || true) + +if [[ -n "$FEATS" ]]; then + ENTRY+="### Features"$'\n'$'\n'"$FEATS"$'\n'$'\n' +fi +if [[ -n "$FIXES" ]]; then + ENTRY+="### Fixes"$'\n'$'\n'"$FIXES"$'\n'$'\n' +fi +if [[ -n "$OTHER" ]]; then + ENTRY+="### Other"$'\n'$'\n'"$OTHER"$'\n'$'\n' +fi + +# Add link reference +LINK="[$NEW]: https://github.com/tobi/qmd/compare/v$CURRENT...v$NEW" + +# Show what will be added +echo "--- Changelog entry ---" +echo "$ENTRY" +echo "$LINK" +echo "--- End ---" +echo "" +read -p "Looks good? [y/N] " -n 1 -r +echo "" +[[ $REPLY =~ ^[Yy]$ ]] || { echo "Aborted."; exit 1; } + +# Update package.json version +jq --arg v "$NEW" '.version = $v' package.json > package.json.tmp && mv package.json.tmp package.json + +# Prepend changelog entry (after the header line) +if [[ -f CHANGELOG.md ]]; then + # Insert after "# Changelog" header and any blank lines + awk -v entry="$ENTRY$LINK" ' + /^# Changelog/ { print; getline; print; print ""; print entry; print ""; next } + { print } + ' CHANGELOG.md > CHANGELOG.md.tmp && mv CHANGELOG.md.tmp CHANGELOG.md +else + echo "# Changelog"$'\n'$'\n'"$ENTRY$LINK" > CHANGELOG.md +fi + +# Commit and tag +git add package.json CHANGELOG.md +git commit -m "release: v$NEW" +git tag -a "v$NEW" -m "v$NEW" + +echo "" +echo "Created commit and tag v$NEW" +echo "" +echo "Next steps:" +echo " git push origin main --tags # push to GitHub" +echo " npm publish # publish to npm" +echo "" +echo "Or both at once:" +echo " git push origin main --tags && npm publish"