fix: 修复训练脚本两处参数传递问题
- prepare_checkpoint 增加 DDP 判断,单卡时不访问 .module - dpo_beta 改为 beta,对齐 DPOStrategy 参数名
This commit is contained in:
parent
1d54491809
commit
8a11a7d444
|
|
@ -180,7 +180,9 @@ def create_scheduler(
|
||||||
|
|
||||||
|
|
||||||
def prepare_checkpoint(model: nn.Module) -> dict:
|
def prepare_checkpoint(model: nn.Module) -> dict:
|
||||||
|
if isinstance(model, DDP):
|
||||||
return model.module.state_dict()
|
return model.module.state_dict()
|
||||||
|
return model.state_dict()
|
||||||
|
|
||||||
|
|
||||||
def compute_total_steps(
|
def compute_total_steps(
|
||||||
|
|
@ -253,7 +255,7 @@ def train(
|
||||||
model = model.to(dtype=torch.bfloat16)
|
model = model.to(dtype=torch.bfloat16)
|
||||||
|
|
||||||
strategy_kwargs = {
|
strategy_kwargs = {
|
||||||
"dpo_beta": dpo_beta,
|
"beta": dpo_beta,
|
||||||
"label_smoothing": label_smoothing,
|
"label_smoothing": label_smoothing,
|
||||||
"clip_eps": grpo_clip_eps,
|
"clip_eps": grpo_clip_eps,
|
||||||
"kl_coef": grpo_kl_coef,
|
"kl_coef": grpo_kl_coef,
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue