From c4401512f2568ae9eda4af1956b2ac08da866878 Mon Sep 17 00:00:00 2001 From: ViperEkura <3081035982@qq.com> Date: Fri, 8 May 2026 15:52:27 +0800 Subject: [PATCH] =?UTF-8?q?fix:=20=E4=BF=AE=E5=A4=8D=E9=95=BF=E5=AF=B9?= =?UTF-8?q?=E8=AF=9D=E6=88=AA=E6=96=AD=E6=96=B9=E5=90=91=E9=94=99=E8=AF=AF?= =?UTF-8?q?=EF=BC=8C=E4=BF=9D=E7=95=99=E6=9C=80=E6=96=B0=20token=20?= =?UTF-8?q?=E8=80=8C=E9=9D=9E=E6=9C=80=E6=97=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - add_task 中 prompt 超长时改为保留末尾 token(prompt_ids[-max_prompt_len:]) 而非开头 token,确保多轮对话时模型能看到最近的提问上下文 --- astrai/inference/engine.py | 2 +- astrai/inference/scheduler.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/astrai/inference/engine.py b/astrai/inference/engine.py index e7a2d48..77cd1d5 100644 --- a/astrai/inference/engine.py +++ b/astrai/inference/engine.py @@ -148,7 +148,7 @@ class InferenceEngine: tokenizer: AutoTokenizer, max_batch_size: int = 1, max_seq_len: Optional[int] = None, - max_prompt_len: int = 512, + max_prompt_len: int = 2048, cache_capacity: int = 1000, ): """Initializes the engine and starts the scheduler background thread. diff --git a/astrai/inference/scheduler.py b/astrai/inference/scheduler.py index c9b690e..2c387a5 100644 --- a/astrai/inference/scheduler.py +++ b/astrai/inference/scheduler.py @@ -480,7 +480,7 @@ class InferenceScheduler: prompt_ids = self.tokenizer.encode(prompt) if len(prompt_ids) > self.max_prompt_len: - prompt_ids = prompt_ids[: self.max_prompt_len] + prompt_ids = prompt_ids[-self.max_prompt_len :] task = Task( task_id=task_id,