diff --git a/finetune/CLAUDE.md b/finetune/CLAUDE.md index dd02a84..50a114b 100644 --- a/finetune/CLAUDE.md +++ b/finetune/CLAUDE.md @@ -83,11 +83,8 @@ hf jobs uv run --flavor a10g-large --secrets HF_TOKEN --timeout 2h jobs/sft.py ### Stage 2: (Experimental) GRPO ```bash -# Local (optional; experimental) -uv run train.py grpo --config experiments/grpo/grpo.yaml - # Experimental script -HF_TOKEN=${HF_TOKEN} uv run experiments/grpo/grpo.py +cd finetune && HF_TOKEN=${HF_TOKEN} uv run python experiments/grpo/grpo.py ``` ### HuggingFace Jobs diff --git a/finetune/README.md b/finetune/README.md index a845bcc..bbf4561 100644 --- a/finetune/README.md +++ b/finetune/README.md @@ -47,7 +47,8 @@ uv run eval.py tobil/qmd-query-expansion-1.7B uv run convert_gguf.py --size 1.7B # NOTE: GRPO is currently experimental and moved to finetune/experiments/grpo -# if you want to run it manually, use uv run python experiments/grpo/grpo.py +# if you want to run it manually, use: +# cd finetune && uv run python experiments/grpo/grpo.py ``` ### Local training (if you have a GPU) @@ -56,7 +57,7 @@ uv run convert_gguf.py --size 1.7B uv run train.py sft --config configs/sft.yaml # Experimental GRPO -uv run train.py grpo --config experiments/grpo/grpo.yaml +cd finetune && uv run python experiments/grpo/grpo.py ``` ### Monitoring HF Jobs @@ -138,7 +139,7 @@ It is not part of the default production path for this repository. ```bash # Optional experimental GRPO run -uv run train.py grpo --config experiments/grpo/grpo.yaml +cd finetune && uv run python experiments/grpo/grpo.py ``` ## Evaluation diff --git a/finetune/train.py b/finetune/train.py index ce5612d..2d6646c 100644 --- a/finetune/train.py +++ b/finetune/train.py @@ -417,8 +417,6 @@ def cmd_grpo(args): ) print("To run experimental GRPO, use:") print(" cd finetune && uv run python experiments/grpo/grpo.py") - print("Or, if you have local config wiring ready:") - print(" uv run train.py grpo --config experiments/grpo/grpo.yaml") return import torch @@ -664,22 +662,9 @@ Examples: "--dry-run", action="store_true", help="Print config and exit" ) - grpo_parser = sub.add_parser( - "grpo", - help="Experimental: GRPO reinforcement learning (moved to experiments/grpo/)", - ) - grpo_parser.add_argument("--config", required=True, help="Path to GRPO config YAML") - grpo_parser.add_argument( - "--dry-run", action="store_true", help="Print config, test reward, and exit" - ) - args = parser.parse_args() - if args.stage == "sft": - cmd_sft(args) - elif args.stage == "grpo": - cmd_grpo(args) - + cmd_sft(args) if __name__ == "__main__": main()