fix: max_seq_len 检查改为仅 prompt 超限发 STOP,max_tokens 超出部分 clamp

This commit is contained in:
ViperEkura 2026-05-10 20:17:47 +08:00
parent a3c8296135
commit a58fab8d6e
1 changed files with 3 additions and 1 deletions

View File

@ -147,11 +147,13 @@ class InferenceScheduler:
if len(prompt_ids) > self.max_prompt_len:
prompt_ids = prompt_ids[-self.max_prompt_len :]
if len(prompt_ids) + max_tokens > self.max_seq_len:
if len(prompt_ids) >= self.max_seq_len:
if stream_callback:
stream_callback(STOP)
return task_id
max_tokens = min(max_tokens, self.max_seq_len - len(prompt_ids))
task = Task(
task_id=task_id,
prompt_ids=prompt_ids,