From 376e9eba8037e873d383452eda27f4e6b8ae2d07 Mon Sep 17 00:00:00 2001 From: ViperEkura <3081035982@qq.com> Date: Thu, 18 Jun 2026 16:45:16 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20IFEval=20=E4=BD=BF=E7=94=A8=20chat=20te?= =?UTF-8?q?mplate=20=E6=A0=BC=E5=BC=8F=E5=8C=96=20prompt=EF=BC=8C=E6=B7=BB?= =?UTF-8?q?=E5=8A=A0=20model.eval()?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - generate_one 用 tokenizer.apply_chat_template 包 user 消息 - 新增 model.eval() 关闭 dropout,确保确定性输出 --- scripts/eval/evaluate_ifeval.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/scripts/eval/evaluate_ifeval.py b/scripts/eval/evaluate_ifeval.py index b8b2358..70f1320 100644 --- a/scripts/eval/evaluate_ifeval.py +++ b/scripts/eval/evaluate_ifeval.py @@ -343,14 +343,20 @@ def verify_response(response: str, instruction_id: str, kwargs: dict) -> Optiona def generate_one( engine: InferenceEngine, + tokenizer: AutoTokenizer, prompt: str, max_tokens: int, temperature: float, top_p: float, top_k: int, ) -> str: + formatted = tokenizer.apply_chat_template( + [{"role": "user", "content": prompt}], + tokenize=False, + add_generation_prompt=True, + ) output = engine.generate( - prompt=prompt, + prompt=formatted, stream=False, max_tokens=max_tokens, temperature=temperature, @@ -364,6 +370,7 @@ def generate_one( def evaluate( engine: InferenceEngine, + tokenizer: AutoTokenizer, problems: List[dict], max_tokens: int, temperature: float, @@ -385,7 +392,7 @@ def evaluate( samples = [] for _ in range(num_samples): response = generate_one( - engine, prompt, max_tokens, temperature, top_p, top_k + engine, tokenizer, prompt, max_tokens, temperature, top_p, top_k ) samples.append(response) @@ -536,6 +543,7 @@ def main(): model = AutoModel.from_pretrained(args.param_path) tokenizer = AutoTokenizer.from_pretrained(args.param_path) model.to(device="cuda", dtype=torch.bfloat16) + model.eval() engine = InferenceEngine( model=model, @@ -545,6 +553,7 @@ def main(): results = evaluate( engine=engine, + tokenizer=tokenizer, problems=problems, max_tokens=args.max_tokens, temperature=args.temperature,