qmd/finetune/configs/sft_local.yaml

45 lines
1.0 KiB
YAML

# SFT Training Config - Local Data, Multi-GPU
# Usage: accelerate launch --config_file configs/accelerate_multi_gpu.yaml train.py sft --config configs/sft_local.yaml
model:
base: "Qwen/Qwen3-1.7B"
output: "outputs/sft" # Local output
push_to_hub: false
dataset:
name: "data/train" # Local path
text_field: "text"
split: "train"
eval_split: 0.1
training:
epochs: 5
batch_size: 2 # Per GPU, effective batch = 2 * 4 GPUs * 4 accum = 32
gradient_accumulation_steps: 4
learning_rate: 0.0002 # 2e-4 as float
max_length: 512
warmup_ratio: 0.03
lr_scheduler: "cosine"
ddp_find_unused_parameters: false
# Save checkpoints every 30 minutes
save_interval_minutes: 30
# Fallback time-step save cadence if needed (not used for wall-clock mode)
save_steps: 200
lora:
rank: 16
alpha: 32
dropout: 0.05
target_modules:
- "q_proj"
- "k_proj"
- "v_proj"
- "o_proj"
- "gate_proj"
- "up_proj"
- "down_proj"
tracking:
project: "qmd-query-expansion"
run_name: "{day} {time}"