feat: IFEval 使用 chat template 格式化 prompt,添加 model.eval()
- generate_one 用 tokenizer.apply_chat_template 包 user 消息 - 新增 model.eval() 关闭 dropout,确保确定性输出
This commit is contained in:
parent
a62c2e11a2
commit
376e9eba80
|
|
@ -343,14 +343,20 @@ def verify_response(response: str, instruction_id: str, kwargs: dict) -> Optiona
|
||||||
|
|
||||||
def generate_one(
|
def generate_one(
|
||||||
engine: InferenceEngine,
|
engine: InferenceEngine,
|
||||||
|
tokenizer: AutoTokenizer,
|
||||||
prompt: str,
|
prompt: str,
|
||||||
max_tokens: int,
|
max_tokens: int,
|
||||||
temperature: float,
|
temperature: float,
|
||||||
top_p: float,
|
top_p: float,
|
||||||
top_k: int,
|
top_k: int,
|
||||||
) -> str:
|
) -> str:
|
||||||
|
formatted = tokenizer.apply_chat_template(
|
||||||
|
[{"role": "user", "content": prompt}],
|
||||||
|
tokenize=False,
|
||||||
|
add_generation_prompt=True,
|
||||||
|
)
|
||||||
output = engine.generate(
|
output = engine.generate(
|
||||||
prompt=prompt,
|
prompt=formatted,
|
||||||
stream=False,
|
stream=False,
|
||||||
max_tokens=max_tokens,
|
max_tokens=max_tokens,
|
||||||
temperature=temperature,
|
temperature=temperature,
|
||||||
|
|
@ -364,6 +370,7 @@ def generate_one(
|
||||||
|
|
||||||
def evaluate(
|
def evaluate(
|
||||||
engine: InferenceEngine,
|
engine: InferenceEngine,
|
||||||
|
tokenizer: AutoTokenizer,
|
||||||
problems: List[dict],
|
problems: List[dict],
|
||||||
max_tokens: int,
|
max_tokens: int,
|
||||||
temperature: float,
|
temperature: float,
|
||||||
|
|
@ -385,7 +392,7 @@ def evaluate(
|
||||||
samples = []
|
samples = []
|
||||||
for _ in range(num_samples):
|
for _ in range(num_samples):
|
||||||
response = generate_one(
|
response = generate_one(
|
||||||
engine, prompt, max_tokens, temperature, top_p, top_k
|
engine, tokenizer, prompt, max_tokens, temperature, top_p, top_k
|
||||||
)
|
)
|
||||||
samples.append(response)
|
samples.append(response)
|
||||||
|
|
||||||
|
|
@ -536,6 +543,7 @@ def main():
|
||||||
model = AutoModel.from_pretrained(args.param_path)
|
model = AutoModel.from_pretrained(args.param_path)
|
||||||
tokenizer = AutoTokenizer.from_pretrained(args.param_path)
|
tokenizer = AutoTokenizer.from_pretrained(args.param_path)
|
||||||
model.to(device="cuda", dtype=torch.bfloat16)
|
model.to(device="cuda", dtype=torch.bfloat16)
|
||||||
|
model.eval()
|
||||||
|
|
||||||
engine = InferenceEngine(
|
engine = InferenceEngine(
|
||||||
model=model,
|
model=model,
|
||||||
|
|
@ -545,6 +553,7 @@ def main():
|
||||||
|
|
||||||
results = evaluate(
|
results = evaluate(
|
||||||
engine=engine,
|
engine=engine,
|
||||||
|
tokenizer=tokenizer,
|
||||||
problems=problems,
|
problems=problems,
|
||||||
max_tokens=args.max_tokens,
|
max_tokens=args.max_tokens,
|
||||||
temperature=args.temperature,
|
temperature=args.temperature,
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue