From e9def84ce7c1e9425a716b487ff496fe85712275 Mon Sep 17 00:00:00 2001 From: ViperEkura <3081035982@qq.com> Date: Tue, 26 May 2026 19:59:22 +0800 Subject: [PATCH] =?UTF-8?q?fix=20:=20perplexity.py=20left=20padding=20?= =?UTF-8?q?=E5=AF=BC=E8=87=B4=20batch>1=20=E6=97=B6=20PPL=20=E8=AE=A1?= =?UTF-8?q?=E7=AE=97=E9=94=99=E8=AF=AF?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- scripts/tools/perplexity.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/tools/perplexity.py b/scripts/tools/perplexity.py index a410320..84b2640 100644 --- a/scripts/tools/perplexity.py +++ b/scripts/tools/perplexity.py @@ -44,8 +44,8 @@ def process_file( for seq in batch_encoded: pad_len = max_len - len(seq) - padded_seq = [tokenizer.pad_id] * pad_len + seq - mask = [False] * pad_len + [True] * len(seq) + padded_seq = seq + [tokenizer.pad_id] * pad_len + mask = [True] * len(seq) + [False] * pad_len padded_ids.append(padded_seq) masks.append(mask)