fix: max_seq_len 检查改为仅 prompt 超限发 STOP,max_tokens 超出部分 clamp
This commit is contained in:
parent
a3c8296135
commit
a58fab8d6e
|
|
@ -147,11 +147,13 @@ class InferenceScheduler:
|
||||||
if len(prompt_ids) > self.max_prompt_len:
|
if len(prompt_ids) > self.max_prompt_len:
|
||||||
prompt_ids = prompt_ids[-self.max_prompt_len :]
|
prompt_ids = prompt_ids[-self.max_prompt_len :]
|
||||||
|
|
||||||
if len(prompt_ids) + max_tokens > self.max_seq_len:
|
if len(prompt_ids) >= self.max_seq_len:
|
||||||
if stream_callback:
|
if stream_callback:
|
||||||
stream_callback(STOP)
|
stream_callback(STOP)
|
||||||
return task_id
|
return task_id
|
||||||
|
|
||||||
|
max_tokens = min(max_tokens, self.max_seq_len - len(prompt_ids))
|
||||||
|
|
||||||
task = Task(
|
task = Task(
|
||||||
task_id=task_id,
|
task_id=task_id,
|
||||||
prompt_ids=prompt_ids,
|
prompt_ids=prompt_ids,
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue