fix: 修复训练脚本两处参数传递问题

- prepare_checkpoint 增加 DDP 判断，单卡时不访问 .module - dpo_beta 改为 beta，对齐 DPOStrategy 参数名
2026-05-17 11:04:40 +08:00 · 2026-05-17 11:04:40 +08:00 · 8a11a7d444
parent 1d54491809
commit 8a11a7d444
1 changed files with 4 additions and 2 deletions
--- a/scripts/tools/train.py
+++ b/scripts/tools/train.py
@ -180,7 +180,9 @@ def create_scheduler(


 def prepare_checkpoint(model: nn.Module) -> dict:
+    if isinstance(model, DDP):
        return model.module.state_dict()
+    return model.state_dict()


 def compute_total_steps(
@ -253,7 +255,7 @@ def train(
    model = model.to(dtype=torch.bfloat16)

    strategy_kwargs = {
-        "dpo_beta": dpo_beta,
+        "beta": dpo_beta,
        "label_smoothing": label_smoothing,
        "clip_eps": grpo_clip_eps,
        "kl_coef": grpo_kl_coef,