fix: 修复训练脚本两处参数传递问题

- prepare_checkpoint 增加 DDP 判断,单卡时不访问 .module
- dpo_beta 改为 beta,对齐 DPOStrategy 参数名
This commit is contained in:
ViperEkura 2026-05-17 11:04:40 +08:00
parent 1d54491809
commit 8a11a7d444
1 changed files with 4 additions and 2 deletions

View File

@ -180,7 +180,9 @@ def create_scheduler(
def prepare_checkpoint(model: nn.Module) -> dict: def prepare_checkpoint(model: nn.Module) -> dict:
if isinstance(model, DDP):
return model.module.state_dict() return model.module.state_dict()
return model.state_dict()
def compute_total_steps( def compute_total_steps(
@ -253,7 +255,7 @@ def train(
model = model.to(dtype=torch.bfloat16) model = model.to(dtype=torch.bfloat16)
strategy_kwargs = { strategy_kwargs = {
"dpo_beta": dpo_beta, "beta": dpo_beta,
"label_smoothing": label_smoothing, "label_smoothing": label_smoothing,
"clip_eps": grpo_clip_eps, "clip_eps": grpo_clip_eps,
"kl_coef": grpo_kl_coef, "kl_coef": grpo_kl_coef,