From 533f0eed372f03a07d46569ba3fcbca0b1d8cf4e Mon Sep 17 00:00:00 2001
From: Tobi Lutke <tobi@shopify.com>
Date: Sat, 31 Jan 2026 12:15:56 -0500
Subject: [PATCH] docs: add finetune CLAUDE.md and update training workflow

- Add finetune/CLAUDE.md documenting the training pipeline
- Update configs to output to local outputs/ directory (gitignored)
- Document that all data/*.jsonl files are training data
- Document local CUDA training vs HuggingFace Jobs cloud training
- Enforce eval requirement before any model upload
- Single model repo (no -v1, -v2, -v4 versioning)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 .gitignore                 |   3 +
 finetune/CLAUDE.md         | 164 +++++++++++++++++++++++++++++++++++++
 finetune/configs/grpo.yaml |   4 +-
 finetune/configs/sft.yaml  |   2 +-
 4 files changed, 170 insertions(+), 3 deletions(-)
 create mode 100644 finetune/CLAUDE.md

diff --git a/.gitignore b/.gitignore
index 24cf666..7f83d11 100644
--- a/.gitignore
+++ b/.gitignore
@@ -9,3 +9,6 @@ texts/
 !README.md
 !CLAUDE.md
 !skills/**/*.md
+!finetune/*.md
+finetune/outputs/
+finetune/data/train/
diff --git a/finetune/CLAUDE.md b/finetune/CLAUDE.md
new file mode 100644
index 0000000..e8ea8d0
--- /dev/null
+++ b/finetune/CLAUDE.md
@@ -0,0 +1,164 @@
+# QMD Query Expansion Fine-Tuning
+
+## Overview
+
+Train Qwen3-1.7B to expand search queries into structured `hyde:/lex:/vec:` output for QMD's hybrid retrieval pipeline.
+
+## Output Format
+
+```
+hyde: A hypothetical document passage that would answer the query.
+lex: keyword1
+lex: keyword2
+vec: semantic query reformulation
+vec: another semantic variation
+```
+
+- `hyde:` always comes FIRST (one line max)
+- `lex:` lines for BM25 keyword search (1-3 lines, short keywords)
+- `vec:` lines for vector similarity search (1-3 lines, natural language)
+
+## Model Repository
+
+**Single destination**: `tobil/qmd-query-expansion-1.7B`
+
+- No versioned directories (`-v1`, `-v2`, `-v4`, etc.)
+- No separate `-sft` or `-grpo` repos for final models
+- Update the main repo only when eval scores improve
+- GGUF variants go to `tobil/qmd-query-expansion-1.7B-gguf`
+
+## Training Data
+
+All JSONL files in `data/` are training data:
+
+```
+data/
+├── qmd_expansion_v2.jsonl
+├── qmd_expansion_handcrafted_only.jsonl
+├── qmd_only_sampled.jsonl
+├── qmd_only_variants.jsonl
+└── ... any additional .jsonl files
+```
+
+**All `.jsonl` files in `data/` should be concatenated for training runs.**
+
+Each JSONL line: `{"input": "query", "output": "hyde:...\nlex:...\nvec:..."}`
+
+## Data Generation Tools
+
+| Script | Purpose |
+|--------|---------|
+| `dataset/generate_data.py` | Generate via Claude API (high quality) |
+| `dataset/generate_data_offline.py` | Transform from HuggingFace datasets |
+| `dataset/prepare_data.py` | Format for Qwen3 chat template |
+| `dataset/clean_data.py` | Detect and fix technical term issues |
+| `generate_only_variants.py` | Generate `/only:lex` and `/only:vec` variants |
+
+## Local Training Output
+
+All training outputs go to `outputs/` (gitignored):
+
+```
+outputs/
+├── sft/           # SFT checkpoint
+└── grpo/          # GRPO checkpoint
+```
+
+## Training Pipeline
+
+Always use **Qwen3-1.7B** as the base model unless explicitly stated otherwise.
+
+Training can run **locally** (requires CUDA GPU) or via **HuggingFace Jobs** (cloud GPU, no local hardware needed).
+
+### Stage 1: SFT
+
+```bash
+# Local (requires CUDA)
+uv run train.py sft --config configs/sft.yaml
+# Output: outputs/sft/
+
+# Cloud (HuggingFace Jobs - no local GPU needed)
+hf jobs uv run --flavor a10g-large --secrets HF_TOKEN --timeout 2h jobs/sft.py
+```
+
+### Stage 2: GRPO
+
+```bash
+# Local (requires CUDA)
+uv run train.py grpo --config configs/grpo.yaml
+# Output: outputs/grpo/
+
+# Cloud (HuggingFace Jobs - no local GPU needed)
+hf jobs uv run --flavor a10g-large --secrets HF_TOKEN --timeout 4h jobs/grpo.py
+```
+
+### HuggingFace Jobs
+
+If no local CUDA device is available, use `hf jobs` to run training in the cloud:
+
+```bash
+hf jobs ps                    # List running jobs
+hf jobs logs <job-id>         # Stream logs
+hf jobs inspect <job-id>      # Check status
+hf jobs cancel <job-id>       # Cancel a job
+```
+
+The `jobs/` directory contains self-contained scripts that include all dependencies inline.
+
+### Evaluation
+
+```bash
+# Eval local model
+uv run eval.py --model ./outputs/grpo
+
+# Eval HuggingFace model
+uv run eval.py --model tobil/qmd-query-expansion-1.7B
+
+# Save eval results to file
+uv run eval.py --model ./outputs/grpo -o eval_results.json
+```
+
+## Quality Scoring
+
+`reward.py` is the single source of truth for scoring:
+
+```bash
+# Self-test the reward function
+uv run reward.py
+```
+
+See `SCORING.md` for the full rubric.
+
+## Deployment Rules
+
+**Never upload without eval.** Every model push must include eval results.
+
+### Checklist
+
+1. Train SFT on all `data/*.jsonl` → `outputs/sft/`
+2. Train GRPO on top of SFT → `outputs/grpo/`
+3. **Run eval on local model**: `uv run eval.py --model ./outputs/grpo -o eval_results.json`
+4. Compare against current deployed model's eval
+5. If eval improves:
+   - Push to `tobil/qmd-query-expansion-1.7B`
+   - **Include eval output in the model card / commit message**
+6. Convert to GGUF and update `tobil/qmd-query-expansion-1.7B-gguf`
+7. Update `src/llm.ts` DEFAULT_GENERATE_MODEL if repo name changed
+
+## Key Files
+
+```
+finetune/
+├── reward.py          # Scoring function (single source of truth)
+├── train.py           # Unified SFT + GRPO training
+├── eval.py            # Generate and score expansions
+├── convert_gguf.py    # GGUF conversion
+├── SCORING.md         # Detailed scoring rubric
+├── CLAUDE.md          # This file
+├── data/              # All training JSONL files
+├── outputs/           # Local training outputs (gitignored)
+├── dataset/           # Data generation scripts
+├── jobs/              # Self-contained HuggingFace Jobs scripts
+├── configs/           # Training configs (sft.yaml, grpo.yaml)
+└── evals/             # Test queries and results
+```
diff --git a/finetune/configs/grpo.yaml b/finetune/configs/grpo.yaml
index 34d5e12..31529ca 100644
--- a/finetune/configs/grpo.yaml
+++ b/finetune/configs/grpo.yaml
@@ -9,8 +9,8 @@
 
 model:
   base: "Qwen/Qwen3-1.7B"
-  sft: "tobil/qmd-query-expansion-1.7B-sft"
-  output: "tobil/qmd-query-expansion-1.7B-grpo"
+  sft: "outputs/sft"  # Use local SFT output (or HF path if uploaded)
+  output: "outputs/grpo"  # Local training output (push to HF manually after eval)
 
 dataset:
   name: "tobil/qmd-query-expansion-train-v2"
diff --git a/finetune/configs/sft.yaml b/finetune/configs/sft.yaml
index 47556ec..3c506f1 100644
--- a/finetune/configs/sft.yaml
+++ b/finetune/configs/sft.yaml
@@ -5,7 +5,7 @@
 
 model:
   base: "Qwen/Qwen3-1.7B"
-  output: "tobil/qmd-query-expansion-1.7B-sft"
+  output: "outputs/sft"  # Local training output (push to HF manually after eval)
 
 dataset:
   name: "tobil/qmd-query-expansion-train-v2"