Remove grpo command from default train entrypoint
This commit is contained in:
parent
189916d6fb
commit
d6f3688d91
@ -83,11 +83,8 @@ hf jobs uv run --flavor a10g-large --secrets HF_TOKEN --timeout 2h jobs/sft.py
|
||||
### Stage 2: (Experimental) GRPO
|
||||
|
||||
```bash
|
||||
# Local (optional; experimental)
|
||||
uv run train.py grpo --config experiments/grpo/grpo.yaml
|
||||
|
||||
# Experimental script
|
||||
HF_TOKEN=${HF_TOKEN} uv run experiments/grpo/grpo.py
|
||||
cd finetune && HF_TOKEN=${HF_TOKEN} uv run python experiments/grpo/grpo.py
|
||||
```
|
||||
|
||||
### HuggingFace Jobs
|
||||
|
||||
@ -47,7 +47,8 @@ uv run eval.py tobil/qmd-query-expansion-1.7B
|
||||
uv run convert_gguf.py --size 1.7B
|
||||
|
||||
# NOTE: GRPO is currently experimental and moved to finetune/experiments/grpo
|
||||
# if you want to run it manually, use uv run python experiments/grpo/grpo.py
|
||||
# if you want to run it manually, use:
|
||||
# cd finetune && uv run python experiments/grpo/grpo.py
|
||||
```
|
||||
|
||||
### Local training (if you have a GPU)
|
||||
@ -56,7 +57,7 @@ uv run convert_gguf.py --size 1.7B
|
||||
uv run train.py sft --config configs/sft.yaml
|
||||
|
||||
# Experimental GRPO
|
||||
uv run train.py grpo --config experiments/grpo/grpo.yaml
|
||||
cd finetune && uv run python experiments/grpo/grpo.py
|
||||
```
|
||||
|
||||
### Monitoring HF Jobs
|
||||
@ -138,7 +139,7 @@ It is not part of the default production path for this repository.
|
||||
|
||||
```bash
|
||||
# Optional experimental GRPO run
|
||||
uv run train.py grpo --config experiments/grpo/grpo.yaml
|
||||
cd finetune && uv run python experiments/grpo/grpo.py
|
||||
```
|
||||
|
||||
## Evaluation
|
||||
|
||||
@ -417,8 +417,6 @@ def cmd_grpo(args):
|
||||
)
|
||||
print("To run experimental GRPO, use:")
|
||||
print(" cd finetune && uv run python experiments/grpo/grpo.py")
|
||||
print("Or, if you have local config wiring ready:")
|
||||
print(" uv run train.py grpo --config experiments/grpo/grpo.yaml")
|
||||
return
|
||||
|
||||
import torch
|
||||
@ -664,22 +662,9 @@ Examples:
|
||||
"--dry-run", action="store_true", help="Print config and exit"
|
||||
)
|
||||
|
||||
grpo_parser = sub.add_parser(
|
||||
"grpo",
|
||||
help="Experimental: GRPO reinforcement learning (moved to experiments/grpo/)",
|
||||
)
|
||||
grpo_parser.add_argument("--config", required=True, help="Path to GRPO config YAML")
|
||||
grpo_parser.add_argument(
|
||||
"--dry-run", action="store_true", help="Print config, test reward, and exit"
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.stage == "sft":
|
||||
cmd_sft(args)
|
||||
elif args.stage == "grpo":
|
||||
cmd_grpo(args)
|
||||
|
||||
cmd_sft(args)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
Loading…
Reference in New Issue
Block a user