fix: 修复训练脚本两处参数传递问题
- prepare_checkpoint 增加 DDP 判断,单卡时不访问 .module - dpo_beta 改为 beta,对齐 DPOStrategy 参数名
This commit is contained in:
parent
1d54491809
commit
8a11a7d444
|
|
@ -180,7 +180,9 @@ def create_scheduler(
|
|||
|
||||
|
||||
def prepare_checkpoint(model: nn.Module) -> dict:
|
||||
if isinstance(model, DDP):
|
||||
return model.module.state_dict()
|
||||
return model.state_dict()
|
||||
|
||||
|
||||
def compute_total_steps(
|
||||
|
|
@ -253,7 +255,7 @@ def train(
|
|||
model = model.to(dtype=torch.bfloat16)
|
||||
|
||||
strategy_kwargs = {
|
||||
"dpo_beta": dpo_beta,
|
||||
"beta": dpo_beta,
|
||||
"label_smoothing": label_smoothing,
|
||||
"clip_eps": grpo_clip_eps,
|
||||
"kl_coef": grpo_kl_coef,
|
||||
|
|
|
|||
Loading…
Reference in New Issue