feat: IFEval 使用 chat template 格式化 prompt,添加 model.eval()

- generate_one 用 tokenizer.apply_chat_template 包 user 消息
- 新增 model.eval() 关闭 dropout,确保确定性输出
This commit is contained in:
ViperEkura 2026-06-18 16:45:16 +08:00
parent a62c2e11a2
commit 376e9eba80
1 changed files with 11 additions and 2 deletions

View File

@ -343,14 +343,20 @@ def verify_response(response: str, instruction_id: str, kwargs: dict) -> Optiona
def generate_one( def generate_one(
engine: InferenceEngine, engine: InferenceEngine,
tokenizer: AutoTokenizer,
prompt: str, prompt: str,
max_tokens: int, max_tokens: int,
temperature: float, temperature: float,
top_p: float, top_p: float,
top_k: int, top_k: int,
) -> str: ) -> str:
formatted = tokenizer.apply_chat_template(
[{"role": "user", "content": prompt}],
tokenize=False,
add_generation_prompt=True,
)
output = engine.generate( output = engine.generate(
prompt=prompt, prompt=formatted,
stream=False, stream=False,
max_tokens=max_tokens, max_tokens=max_tokens,
temperature=temperature, temperature=temperature,
@ -364,6 +370,7 @@ def generate_one(
def evaluate( def evaluate(
engine: InferenceEngine, engine: InferenceEngine,
tokenizer: AutoTokenizer,
problems: List[dict], problems: List[dict],
max_tokens: int, max_tokens: int,
temperature: float, temperature: float,
@ -385,7 +392,7 @@ def evaluate(
samples = [] samples = []
for _ in range(num_samples): for _ in range(num_samples):
response = generate_one( response = generate_one(
engine, prompt, max_tokens, temperature, top_p, top_k engine, tokenizer, prompt, max_tokens, temperature, top_p, top_k
) )
samples.append(response) samples.append(response)
@ -536,6 +543,7 @@ def main():
model = AutoModel.from_pretrained(args.param_path) model = AutoModel.from_pretrained(args.param_path)
tokenizer = AutoTokenizer.from_pretrained(args.param_path) tokenizer = AutoTokenizer.from_pretrained(args.param_path)
model.to(device="cuda", dtype=torch.bfloat16) model.to(device="cuda", dtype=torch.bfloat16)
model.eval()
engine = InferenceEngine( engine = InferenceEngine(
model=model, model=model,
@ -545,6 +553,7 @@ def main():
results = evaluate( results = evaluate(
engine=engine, engine=engine,
tokenizer=tokenizer,
problems=problems, problems=problems,
max_tokens=args.max_tokens, max_tokens=args.max_tokens,
temperature=args.temperature, temperature=args.temperature,