debug

2026-04-25 15:03:07 +08:00 · 2026-04-25 15:03:07 +08:00 · bb811506bf
parent 20e73c05e0
commit bb811506bf
15 changed files with 1838 additions and 892 deletions
--- a/assets/API.md
+++ b/assets/API.md
@ -18,7 +18,7 @@
 ```json
 {
  "success": true,
-  "message": "注册成功",
+  "message": "Registration successful",
  "data": {
    "id": 1,
    "username": "string"
@ -41,19 +41,31 @@
 ```json
 {
  "success": true,
-  "message": "登录成功",
+  "message": "Login successful",
  "data": {
    "access_token": "eyJ...",
    "token_type": "bearer",
    "user": {
      "id": 1,
      "username": "string",
-      "role": "user"
+      "email": "user@example.com",
      "role": "user",
      "permission_level": 1,
      "workspace_path": null,
      "is_active": true
    }
  }
 }
 ```
 **用户权限级别：**
 | 级别 | 名称 | 说明 |
 |------|------|------|
 | 1 | READ_ONLY | 只读权限 |
 | 2 | WRITE | 写入权限 |
 | 3 | EXECUTE | 执行权限 |
 | 4 | ADMIN | 管理员权限 |
 ### POST /api/auth/logout
 用户登出
@ -63,7 +75,7 @@
 ```json
 {
  "success": true,
-  "message": "登出成功"
+  "message": "Logout successful"
 }
 ```
@ -81,11 +93,39 @@
    "username": "string",
    "email": "user@example.com",
    "role": "user",
-    "is_active": true
+    "permission_level": 1,
    "workspace_path": null,
    "is_active": true,
    "created_at": "2024-01-01T00:00:00"
  }
 }
 ```
 ### GET /api/auth/users
 获取所有用户（管理员专用）
 **请求头：** `Authorization: Bearer <token>`
 **响应：**
 ```json
 {
  "success": true,
  "data": {
    "users": [...]
  }
 }
 ```
 ### PUT /api/auth/users/{user_id}
 更新用户权限（管理员专用）
 **请求体：**
 ```json
 {
  "permission_level": 2
 }
 ```
 ---
 ## 会话 `/api/conversations`
@ -123,10 +163,11 @@
 {
  "project_id": "string (可选)",
  "title": "新会话",
-  "model": "glm-5",
+  "model": "deepseek-chat",
-  "system_prompt": "string (可选)",
+  "provider_id": 1,
-  "temperature": 1.0,
+  "system_prompt": "You are a helpful assistant. (可选)",
-  "max_tokens": 65536,
+  "temperature": 0.7,
  "max_tokens": 2000,
  "thinking_enabled": false
 }
 ```
@ -139,9 +180,15 @@
  "data": {
    "id": "conv_xxx",
    "user_id": 1,
    "provider_id": 1,
    "title": "新会话",
-    "model": "glm-5",
+    "model": "deepseek-chat",
-    ...
+    "system_prompt": "You are a helpful assistant.",
    "temperature": 0.7,
    "max_tokens": 2000,
    "thinking_enabled": false,
    "created_at": "2024-01-01T00:00:00",
    "updated_at": "2024-01-01T00:00:00"
  }
 }
 ```
@ -149,32 +196,92 @@
 ### GET /api/conversations/{id}
 获取会话详情
-**路径参数：**
+**路径参数：** `id`: 会话ID
 - `id`: 会话ID
 **请求头：** `Authorization: Bearer <token>`
 ### PUT /api/conversations/{id}
 更新会话
 **路径参数：** `id`: 会话ID
 **请求头：** `Authorization: Bearer <token>`
 **请求体：**
 ```json
 {
  "title": "新标题",
  "model": "gpt-4",
  "provider_id": 1,
  "system_prompt": "You are...",
  "temperature": 0.8,
  "max_tokens": 4000,
  "thinking_enabled": true
 }
 ```
 ### DELETE /api/conversations/{id}
 删除会话
 **路径参数：** `id`: 会话ID
 **请求头：** `Authorization: Bearer <token>`
 ---
 ## 消息 `/api/messages`
-### GET /api/messages/{conversation_id}
+### GET /api/messages/
 获取消息列表
 **路径参数：**
 - `conversation_id`: 会话ID
 **查询参数：**
 - `conversation_id`: 会话ID（必需）
 - `limit` (可选): 返回数量，默认100
 **请求头：** `Authorization: Bearer <token>`
 **响应：**
 ```json
 {
  "success": true,
  "data": {
    "messages": [
      {
        "id": "msg_xxx",
        "conversation_id": "conv_xxx",
        "role": "user",
        "content": "用户消息",
        "text": "用户消息",
        "attachments": [],
        "process_steps": [],
        "token_count": 10,
        "usage": null,
        "created_at": "2024-01-01T00:00:00"
      },
      {
        "id": "msg_yyy",
        "conversation_id": "conv_xxx",
        "role": "assistant",
        "content": "AI 回复文本内容",
        "text": "AI 回复文本内容",
        "attachments": [],
        "process_steps": [
          {"id": "step-0", "index": 0, "type": "thinking", "content": "让我思考..."},
          {"id": "step-1", "index": 1, "type": "text", "content": "根据搜索结果..."},
          {"id": "step-2", "index": 2, "type": "tool_call", "id_ref": "call_xxx", "name": "web_search", "arguments": "..."},
          {"id": "step-3", "index": 3, "type": "tool_result", "id_ref": "call_xxx", "name": "web_search", "content": "...", "success": true}
        ],
        "token_count": 100,
        "usage": {"prompt_tokens": 50, "completion_tokens": 50, "total_tokens": 100},
        "created_at": "2024-01-01T00:00:01"
      }
    ],
    "title": "会话标题",
    "first_message": "用户的第一条消息..."
  }
 }
 ```
 ### POST /api/messages/
 发送消息（非流式）
@ -185,7 +292,7 @@
 {
  "conversation_id": "conv_xxx",
  "content": "用户消息",
-  "tools_enabled": true
+  "thinking_enabled": false
 }
 ```
@ -201,20 +308,182 @@
 ```
 ### POST /api/messages/stream
-发送消息（流式响应）
+发送消息（流式响应 - SSE）
 使用 Server-Sent Events (SSE) 返回流式响应。
-**事件类型：**
+**请求头：** `Authorization: Bearer <token>`
 - `text`: 文本增量
 - `tool_call`: 工具调用
 - `tool_result`: 工具结果
 - `done`: 完成
 - `error`: 错误
-### DELETE /api/messages/{id}
+**请求体：**
 ```json
 {
  "conversation_id": "conv_xxx",
  "content": "用户消息",
  "thinking_enabled": true,
  "enabled_tools": ["web_search", "file_read", "python_execute"]
 }
 ```
 **SSE 事件类型：**
 #### process_step
 结构化步骤事件（渲染顺序的唯一数据源）
 ```json
 event: process_step
 data: {"step": {"id": "step-0", "index": 0, "type": "thinking", "content": "让我思考一下..."}}
 event: process_step
 data: {"step": {"id": "step-1", "index": 1, "type": "text", "content": "以下是搜索结果："}}
 event: process_step
 data: {"step": {"id": "step-2", "index": 2, "type": "tool_call", "id_ref": "call_abc", "name": "web_search", "arguments": "{\"query\": \"...\"}"}}
 event: process_step
 data: {"step": {"id": "step-3", "index": 3, "type": "tool_result", "id_ref": "call_abc", "name": "web_search", "content": "{...}", "success": true}}
 ```
 **步骤类型说明：**
 | type | 说明 | 额外字段 |
 |------|------|---------|
 | `thinking` | 模型思考过程 | `content` |
 | `text` | 文本回复 | `content` |
 | `tool_call` | 工具调用 | `id_ref`, `name`, `arguments` |
 | `tool_result` | 工具执行结果 | `id_ref`, `name`, `content`, `success` |
 #### done
 响应完成
 ```json
 event: done
 data: {"message_id": "msg_xxx", "token_count": 150, "usage": {"prompt_tokens": 100, "completion_tokens": 50, "total_tokens": 150}}
 ```
 #### error
 错误信息
 ```json
 event: error
 data: {"content": "错误信息描述"}
 ```
 ### DELETE /api/messages/{message_id}
 删除消息
 **路径参数：** `message_id`: 消息ID
 **请求头：** `Authorization: Bearer <token>`
 ---
 ## LLM 提供商 `/api/providers`
 ### GET /api/providers/
 获取用户的 LLM 提供商列表
 **请求头：** `Authorization: Bearer <token>`
 **响应：**
 ```json
 {
  "success": true,
  "data": {
    "providers": [
      {
        "id": 1,
        "user_id": 1,
        "name": "DeepSeek",
        "provider_type": "openai",
        "base_url": "https://api.deepseek.com/v1",
        "default_model": "deepseek-chat",
        "max_tokens": 8192,
        "is_default": true,
        "enabled": true,
        "created_at": "2024-01-01T00:00:00",
        "updated_at": "2024-01-01T00:00:00"
      }
    ],
    "total": 1
  }
 }
 ```
 ### POST /api/providers/
 创建 LLM 提供商
 **请求头：** `Authorization: Bearer <token>`
 **请求体：**
 ```json
 {
  "name": "DeepSeek",
  "provider_type": "openai",
  "base_url": "https://api.deepseek.com/v1",
  "api_key": "sk-xxxx",
  "default_model": "deepseek-chat",
  "is_default": true
 }
 ```
 **provider_type 可选值：**
 - `openai` - OpenAI/DeepSeek/GLM 兼容 API
 - `anthropic` - Anthropic Claude API
 ### GET /api/providers/{provider_id}
 获取提供商详情
 **路径参数：** `provider_id`: 提供商ID
 **请求头：** `Authorization: Bearer <token>`
 ### PUT /api/providers/{provider_id}
 更新提供商
 **路径参数：** `provider_id`: 提供商ID
 **请求头：** `Authorization: Bearer <token>`
 **请求体：**
 ```json
 {
  "name": "新名称",
  "base_url": "https://api.example.com/v1",
  "api_key": "sk-yyyy",
  "default_model": "gpt-4",
  "max_tokens": 16384,
  "is_default": false,
  "enabled": true
 }
 ```
 ### DELETE /api/providers/{provider_id}
 删除提供商
 **路径参数：** `provider_id`: 提供商ID
 **请求头：** `Authorization: Bearer <token>`
 ### POST /api/providers/{provider_id}/test
 测试提供商连接
 **路径参数：** `provider_id`: 提供商ID
 **请求头：** `Authorization: Bearer <token>`
 **响应：**
 ```json
 {
  "success": true,
  "message": "HTTP 200: ...",
  "data": {
    "status_code": 200,
    "success": true,
    "response_body": "..."
  }
 }
 ```
 ---
 ## 工具 `/api/tools`
@ -223,7 +492,7 @@
 获取可用工具列表
 **查询参数：**
- `category` (可选): 工具分类
+- `category` (可选): 工具分类（code/file/shell/crawler/data）
 **请求头：** `Authorization: Bearer <token>`
@ -232,12 +501,21 @@
 {
  "success": true,
  "data": {
-    "tools": [...],
+    "tools": [
      {
        "name": "python_execute",
        "description": "Execute Python code",
        "category": "code",
        "parameters": {...}
      },
      ...
    ],
    "categorized": {
      "crawler": [...],
      "code": [...],
-      "data": [...],
+      "file": [...],
-      "weather": [...]
+      "shell": [...],
      "crawler": [...],
      "data": [...]
    },
    "total": 11
  }
@ -247,9 +525,39 @@
 ### GET /api/tools/{name}
 获取工具详情
 **路径参数：** `name`: 工具名称
 **请求头：** `Authorization: Bearer <token>`
 **响应：**
 ```json
 {
  "success": true,
  "data": {
    "name": "web_search",
    "description": "Search the web using DuckDuckGo",
    "category": "crawler",
    "parameters": {
      "type": "object",
      "properties": {
        "query": {
          "type": "string",
          "description": "Search query"
        }
      },
      "required": ["query"]
    }
  }
 }
 ```
 ### POST /api/tools/{name}/execute
 手动执行工具
 **路径参数：** `name`: 工具名称
 **请求头：** `Authorization: Bearer <token>`
 **请求体：**
 ```json
 {
@ -257,3 +565,89 @@
  "arg2": "value2"
 }
 ```
 **响应：**
 ```json
 {
  "success": true,
  "data": {
    "result": "..."
  }
 }
 ```
 ---
 ## 公共端点
 ### GET /api/health
 健康检查
 **响应：**
 ```json
 {
  "status": "ok",
  "message": "Luxx API is running"
 }
 ```
 ### GET /api/
 服务信息
 **响应：**
 ```json
 {
  "name": "Luxx",
  "version": "1.0.0",
  "description": "AI Chat API"
 }
 ```
 ---
 ## 工具说明
 ### 内置工具
 #### 代码执行 (code)
 | 工具 | 功能 | 权限 |
 |------|------|------|
 | `python_execute` | 执行 Python 代码 | EXECUTE |
 | `python_eval` | 计算表达式 | EXECUTE |
 #### 文件操作 (file)
 | 工具 | 功能 | 权限 |
 |------|------|------|
 | `file_read` | 读取文件内容 | READ_ONLY |
 | `file_write` | 写入文件内容 | WRITE |
 | `file_list` | 列出目录内容 | READ_ONLY |
 | `file_exists` | 检查文件是否存在 | READ_ONLY |
 | `file_grep` | 正则搜索文件 | READ_ONLY |
 #### Shell 命令 (shell)
 | 工具 | 功能 | 权限 |
 |------|------|------|
 | `shell_execute` | 执行 Shell 命令 | EXECUTE |
 #### 网页爬虫 (crawler)
 | 工具 | 功能 | 权限 |
 |------|------|------|
 | `web_search` | DuckDuckGo 搜索 | READ_ONLY |
 | `web_fetch` | 网页抓取 | READ_ONLY |
 | `batch_fetch` | 批量并发抓取 | READ_ONLY |
 #### 数据处理 (data)
 | 工具 | 功能 | 权限 |
 |------|------|------|
 | `process_data` | JSON 转换、格式化 | READ_ONLY |
 ### 权限检查
 工具执行时自动检查用户权限：
 ```
 工具要求的权限 <= 用户拥有的权限 → 允许执行
 工具要求的权限 >  用户拥有的权限 → 返回错误
 ```
 用户通过 `/api/auth/users/{user_id}` 接口设置权限级别。
--- a/assets/ARCHITECTURE.md
+++ b/assets/ARCHITECTURE.md
@ -17,39 +17,50 @@
 ```
 luxx/
-├── __init__.py           # FastAPI 应用工厂
+├── __init__.py              # FastAPI 应用工厂
-├── config.py             # 配置管理（YAML）
+├── config.py                # 配置管理（YAML）
-├── database.py           # 数据库连接
+├── database.py              # 数据库连接
-├── models.py             # ORM 模型
+├── models.py                # ORM 模型
-├── routes/               # API 路由层
+├── routes/                  # API 路由层
-│   ├── __init__.py      # 路由聚合
+│   ├── __init__.py         # 路由聚合
-│   ├── auth.py          # 认证 (登录/注册)
+│   ├── auth.py             # 认证 (登录/注册)
-│   ├── conversations.py # 会话管理 (CRUD)
+│   ├── conversations.py    # 会话管理 (CRUD)
-│   ├── messages.py      # 消息处理 (流式/同步)
+│   ├── messages.py         # 消息处理 (流式/同步)
-│   ├── providers.py     # LLM 提供商管理
+│   ├── providers.py        # LLM 提供商管理
-│   └── tools.py         # 工具管理
+│   └── tools.py            # 工具管理
-├── services/            # 服务层
+├── services/               # 服务层
-│   ├── chat.py         # 聊天服务门面
+│   ├── __init__.py        # 服务导出
-│   ├── agentic_loop.py # Agentic Loop 执行器
+│   ├── chat.py            # 聊天服务门面
-│   ├── stream_context.py# 流式状态管理
+│   ├── agentic_loop.py    # Agentic Loop 执行器
-│   ├── llm_response.py  # LLM 响应解析器
+│   ├── stream_context.py  # 流式状态管理
-│   ├── process_result.py# 处理结果
+│   ├── llm_response.py     # LLM 响应数据类
-│   └── llm_client.py   # LLM 客户端
+│   ├── process_result.py   # [已移除]
-├── tools/              # 工具系统
+│   ├── task.py            # 任务系统 (Task/TaskGraph/TaskService)
-│   ├── core.py         # 核心类 (ToolRegistry, ToolDefinition, ToolResult)
+│   ├── llm_client.py      # LLM 客户端
-│   ├── factory.py      # @tool 装饰器
+│   └── llm_adapters/      # LLM API 适配器
-│   ├── executor.py     # 工具执行器 (缓存/并行)
+│       ├── __init__.py    # 适配器导出
-│   ├── services.py     # 工具服务层
+│       ├── base.py        # ProviderAdapter 基类
-│   └── builtin/        # 内置工具
+│       ├── openai_adapter.py  # OpenAI/DeepSeek/GLM 适配器
-│       ├── __init__.py # 工具注册入口
+│       └── anthropic_adapter.py  # Anthropic Claude 适配器
-│       ├── code.py     # 代码执行 (python_execute, python_eval)
+├── tools/                  # 工具系统
-│       ├── crawler.py  # 网页爬虫 (web_search, web_fetch, batch_fetch)
+│   ├── __init__.py        # 工具注册入口
-│       └── data.py     # 数据处理 (process_data)
+│   ├── core.py            # 核心类 (ToolRegistry, ToolDefinition, ToolResult, ToolContext)
-└── utils/              # 工具函数
+│   ├── factory.py         # @tool 装饰器
-    └── helpers.py     # 密码哈希、ID生成、响应封装
+│   ├── executor.py        # 工具执行器 (缓存/并行)
 │   ├── services.py        # 工具服务层
 │   └── builtin/           # 内置工具
 │       ├── __init__.py    # 工具注册入口
 │       ├── code.py        # 代码执行 (python_execute, python_eval)
 │       ├── crawler.py     # 网页爬虫 (web_search, web_fetch, batch_fetch)
 │       ├── data.py        # 数据处理 (process_data)
 │       ├── file.py        # 文件操作 (file_read, file_write, file_list, file_exists, file_grep)
 │       └── shell.py       # Shell 命令 (shell_execute)
 └── utils/                  # 工具函数
    ├── __init__.py
    └── helpers.py         # 密码哈希、ID生成、响应封装
-run.py                  # 应用入口文件
+run.py                    # 应用入口文件
-config.yaml             # 配置文件
+config.yaml                # 配置文件
 ```
 ## 核心组件
@ -77,15 +88,36 @@ password: admin123
 app:
  secret_key: ${APP_SECRET_KEY}
  debug: true
  host: 0.0.0.0
  port: 8000
 database:
  type: sqlite
  url: sqlite:///./chat.db
 workspace:
  root: ./workspaces          # 用户工作空间根目录
  auto_create: true           # 自动创建用户目录
 llm:
  provider: deepseek
  api_key: ${DEEPSEEK_API_KEY}
  api_url: https://api.deepseek.com/v1
 tools:
  enable_cache: true
  cache_ttl: 300
  max_workers: 4
  max_iterations: 10
 logging:
  level: INFO
 ```
 **工作空间隔离机制：**
 - 每个用户的工作空间路径基于 `user_id` 的 SHA256 哈希值
 - 格式：`{workspace_root}/{hash_of_user_id}`
 - 所有文件操作必须在用户工作空间内，防止路径穿越攻击
 ```
 ### 3. 数据库 (`database.py`)
@ -103,10 +135,12 @@ erDiagram
        string email UK
        string password_hash
        string role
        int permission_level "1=READ_ONLY, 2=WRITE, 3=EXECUTE, 4=ADMIN"
        string workspace_path "用户工作空间路径"
        boolean is_active
        datetime created_at
    }
-    
+
    PROJECT {
        string id PK
        int user_id FK
@ -115,7 +149,7 @@ erDiagram
        datetime created_at
        datetime updated_at
    }
-    
+
    CONVERSATION {
        string id PK
        int user_id FK
@ -130,7 +164,7 @@ erDiagram
        datetime created_at
        datetime updated_at
    }
-    
+
    MESSAGE {
        string id PK
        string conversation_id FK
@ -140,7 +174,7 @@ erDiagram
        text usage "JSON 格式"
        datetime created_at
    }
-    
+
    LLM_PROVIDER {
        int id PK
        int user_id FK
@ -155,7 +189,7 @@ erDiagram
        datetime created_at
        datetime updated_at
    }
-    
+
    USER ||--o{ PROJECT : "has"
    USER ||--o{ CONVERSATION : "has"
    USER ||--o{ LLM_PROVIDER : "configures"
@ -164,6 +198,14 @@ erDiagram
    CONVERSATION ||--o{ MESSAGE : "has"
 ```
 **用户权限级别 (permission_level)：**
 | 级别 | 名称 | 说明 |
 |------|------|------|
 | 1 | READ_ONLY | 只读权限 |
 | 2 | WRITE | 写入权限（文件写入） |
 | 3 | EXECUTE | 执行权限（代码执行、Shell命令） |
 | 4 | ADMIN | 管理员权限 |
 ### Message Content JSON 结构
 `content` 字段统一使用 JSON 格式存储：
@ -183,8 +225,6 @@ erDiagram
 ```json
 {
  "text": "AI 回复的文本内容",
  "tool_calls": [...],
  "steps": [
    {"id": "step-0", "index": 0, "type": "thinking", "content": "..."},
    {"id": "step-1", "index": 1, "type": "text", "content": "..."},
@ -194,7 +234,9 @@ erDiagram
 }
 ```
-`steps` 字段是**渲染顺序的唯一数据源**，按 `index` 顺序排列。thinking、text、tool_call、tool_result 可以在多轮迭代中穿插出现。
+`steps` 字段是**唯一数据源**，按 `index` 顺序排列。thinking、text、tool_call、tool_result 可以在多轮迭代中穿插出现。
 **注意**：`text` 和 `content` 字段通过解析 `steps` 中所有 `type: "text"` 的内容动态计算得出。
 ### 5. 工具系统
@ -206,9 +248,25 @@ classDiagram
        +dict parameters
        +Callable handler
        +str category
        +CommandPermission required_permission
        +to_openai_format() dict
    }
    class ToolContext {
        +int user_id
        +str username
        +str workspace
        +int user_permission_level
    }
    class CommandPermission {
        <<enumeration>>
        READ_ONLY = 1
        WRITE = 2
        EXECUTE = 3
        ADMIN = 4
    }
    class ToolResult {
        +bool success
        +Any data
@ -224,7 +282,7 @@ classDiagram
        +get(name) ToolDefinition?
        +list_all() List~dict~
        +list_by_category(category) List~dict~
-        +execute(name, arguments) dict
+        +execute(name, arguments, context) dict
        +remove(name) bool
    }
@ -243,14 +301,51 @@ classDiagram
 #### 内置工具
-| 工具 | 功能 | 说明 |
+**代码执行 (code.py)**
 | 工具 | 功能 | 权限 |
 |------|------|------|
-| `python_execute` | 执行 Python 代码 | 支持 print 输出、变量访问 |
+| `python_execute` | 执行 Python 代码 | EXECUTE |
-| `python_eval` | 计算表达式 | 快速求值 |
+| `python_eval` | 计算表达式 | EXECUTE |
-| `web_search` | DuckDuckGo HTML | DuckDuckGo HTML 搜索 |
+
-| `web_fetch` | 网页抓取 | httpx + BeautifulSoup，支持 text/links/structured |
+**文件操作 (file.py)**
-| `batch_fetch` | 批量抓取 | 并发获取多个页面 |
+
-| `process_data` | 数据处理 | JSON 转换、格式化等 |
+| 工具 | 功能 | 权限 |
 |------|------|------|
 | `file_read` | 读取文件内容 | READ_ONLY |
 | `file_write` | 写入文件内容 | WRITE |
 | `file_list` | 列出目录内容 | READ_ONLY |
 | `file_exists` | 检查文件是否存在 | READ_ONLY |
 | `file_grep` | 正则搜索文件内容 | READ_ONLY |
 **Shell 命令 (shell.py)**
 | 工具 | 功能 | 权限 |
 |------|------|------|
 | `shell_execute` | 执行 Shell 命令 | EXECUTE |
 **网页爬虫 (crawler.py)**
 | 工具 | 功能 | 权限 |
 |------|------|------|
 | `web_search` | DuckDuckGo HTML 搜索 | READ_ONLY |
 | `web_fetch` | 网页抓取 | READ_ONLY |
 | `batch_fetch` | 批量并发抓取 | READ_ONLY |
 **数据处理 (data.py)**
 | 工具 | 功能 | 权限 |
 |------|------|------|
 | `process_data` | JSON 转换、格式化 | READ_ONLY |
 #### 权限检查机制
 工具执行时自动检查用户权限：
 ```
 工具要求的权限 <= 用户拥有的权限 → 允许执行
 工具要求的权限 >  用户拥有的权限 → 拒绝执行
 ```
 #### 工具开发规范
@ -312,26 +407,82 @@ ToolExecutor 返回结果
 ### 6. 服务层
-#### LLMResponseParser (`services/llm_response.py`)
+#### LLM 适配器 (`services/llm_adapters/`)
-统一解析器，兼容多种 LLM API 格式：
+
- **OpenAI**: `delta.content`, `delta.tool_calls`
+适配器模式统一处理不同 LLM API 格式：
- **DeepSeek**: `delta.content`, `delta.reasoning_content`
+
- **Anthropic**: `content_block` 类型事件
+```mermaid
- **MiniMax**: `<|im_start|>thinking...<|im_end|>` 标签
+classDiagram
    class ProviderAdapter {
        <<abstract>>
        +str provider_type
        +build_request() tuple
        +parse_stream_chunk() AsyncGenerator
        +parse_response() Dict
        +supports_thinking() bool
        +supports_tools() bool
    }
    class OpenAIAdapter {
        +str provider_type = "openai"
        +build_request() tuple
        +parse_stream_chunk() AsyncGenerator
        +parse_response() Dict
        +supports_tools() bool
    }
    class AnthropicAdapter {
        +str provider_type = "anthropic"
        +build_request() tuple
        +parse_stream_chunk() AsyncGenerator
        +parse_response() Dict
        +supports_thinking() bool
        +supports_tools() bool
    }
    ProviderAdapter <|-- OpenAIAdapter
    ProviderAdapter <|-- AnthropicAdapter
 ```
 **支持的功能对比：**
 | 适配器 | 工具调用 | Thinking/Reasoning | 流式响应 |
 |--------|----------|-------------------|----------|
 | OpenAI | ✅ | ✅ (DeepSeek) | ✅ |
 | Anthropic | ✅ | ✅ | ✅ |
 #### LLM 响应数据类 (`services/llm_response.py`)
 ```python
-from luxx.services.llm_response import llm_parser
+class StepType:
    """步骤类型常量"""
    THINKING = "thinking"
    TEXT = "text"
    TOOL_CALL = "tool_call"
    TOOL_RESULT = "tool_result"
 # 解析 OpenAI 格式
 parsed = llm_parser.parse_openai(delta)
-# 解析 Anthropic 格式
+@dataclass
-parsed = llm_parser.parse_anthropic(chunk)
+class Step:
    """单个步骤 - 用于存储和传输"""
    id: str
    index: int
    type: str  # thinking, text, tool_call, tool_result
    content: str = ""
    name: str = ""  # tool_call/tool_result
    arguments: str = ""  # tool_call
    id_ref: str = ""  # tool_result
    success: bool = True
-# 返回 ParsedDelta
+
-parsed.thinking    # 思考内容
+@dataclass
-parsed.text        # 文本内容
+class ParsedDelta:
-parsed.tool_calls  # 工具调用
+    """LLM 流式响应增量"""
    thinking: str = ""           # 思考内容（增量）
    text: str = ""               # 文本内容（增量）
    tool_call: Optional[Dict] = None  # 单个工具调用
    usage: Dict[str, int] = {}   # Token 用量
    is_complete: bool = False
 ```
 #### ChatService (`services/chat.py`)
@ -340,30 +491,101 @@ parsed.tool_calls  # 工具调用
 - 流式 SSE 响应
 - 工具调用编排（并行执行）
 - 消息历史管理
 - 自动重试机制
 - Token 用量追踪
 - 工作空间上下文传递
 #### AgenticLoop (`services/agentic_loop.py`)
 执行 Agentic Loop 的核心循环：
- 调用 LLM 获取响应
+- 调用 LLM 获取响应（流式）
- 使用 LLMResponseParser 解析响应
+- 解析 ParsedDelta，更新步骤状态
 - 管理 thinking/text/tool_call/tool_result 步骤
 - 工具并行执行
 - 最大迭代次数：10
 ```python
 # 执行流程
 async for delta in llm.stream_call(...):
    events = self._process_delta(delta, context, total_usage)
    yield from events
 # 工具调用时
 tool_results = self.tool_executor.process_tool_calls_parallel(...)
 messages.append({"role": "assistant", ...})
 messages.extend(tool_results)
 ```
 #### StreamContext (`services/stream_context.py`)
 流式状态管理：
 - 追踪当前步骤类型和索引
 - 累积 thinking 和 text 内容
- 管理 tool_calls 列表
+- 管理 tool_calls 列表和 tool_results
 - 生成 SSE 事件
 - 构建完整消息内容
 #### LLMClient (`services/llm_client.py`)
 LLM API 客户端：
- 多提供商：DeepSeek、GLM、OpenAI
+- 多提供商：OpenAI、DeepSeek、Anthropic
 - 自动适配器选择
 - 流式/同步调用
 - 错误处理和重试
 - Token 计数
 ### 7. 任务系统 (`services/task.py`)
 用于自主任务执行和依赖管理：
 ```mermaid
 classDiagram
    class Task {
        +str id
        +str name
        +str goal
        +TaskStatus status
        +List~Step~ steps
        +List~Task~ subtasks
    }
    class Step {
        +str id
        +str name
        +List~str~ depends_on
        +StepStatus status
    }
    class TaskGraph {
        +topological_sort() List~Step~
        +get_ready_steps() List~Step~
        +detect_cycles() List~List~str~~
        +validate() tuple
    }
    class TaskService {
        +create_task() Task
        +get_task() Task
        +update_task_status() Task
        +add_steps() List~Step~
        +build_graph() TaskGraph
    }
    Task "1" o-- "*" Step
    Task "1" o-- "*" Task
    TaskService ..> TaskGraph
 ```
 **任务状态 (TaskStatus)：**
 - `PENDING` - 待处理
 - `READY` - 就绪
 - `RUNNING` - 运行中
 - `BLOCK` - 阻塞
 - `TERMINATED` - 已终止
 **步骤状态 (StepStatus)：**
 - `PENDING` - 待执行
 - `RUNNING` - 执行中
 - `COMPLETED` - 已完成
 - `FAILED` - 失败
 - `SKIPPED` - 跳过
 ### 7. 认证系统 (`routes/auth.py`)
 - JWT Bearer Token
 - Bcrypt 密码哈希
@ -481,6 +703,10 @@ database:
  type: sqlite
  url: sqlite:///./chat.db
 workspace:
  root: ./workspaces          # 用户工作空间根目录
  auto_create: true           # 自动创建用户工作空间
 llm:
  provider: deepseek
  api_key: ${DEEPSEEK_API_KEY}
@ -491,6 +717,9 @@ tools:
  cache_ttl: 300
  max_workers: 4
  max_iterations: 10
 logging:
  level: INFO
 ```
 ## 环境变量
@ -501,6 +730,26 @@ tools:
 | `DEEPSEEK_API_KEY` | DeepSeek API | `sk-xxxx` |
 | `DATABASE_URL` | 数据库连接 | `sqlite:///./chat.db` |
 ## LLM 适配器配置
 ### OpenAI 兼容 (DeepSeek/GLM 等)
 ```yaml
 llm:
  provider: openai
  api_key: ${API_KEY}
  api_url: https://api.deepseek.com/v1  # 或其他兼容端点
 ```
 ### Anthropic Claude
 ```yaml
 llm:
  provider: anthropic
  api_key: ${ANTHROPIC_API_KEY}
  api_url: https://api.anthropic.com/v1
 ```
 ## 项目结构说明
 ### 入口文件
@ -530,5 +779,21 @@ ToolExecutor 支持结果缓存：
 1. 实时返回 thinking_content（模型思考过程）
 2. 实时返回 text 增量更新
-3. 工具调用串行执行，结果批量返回
+3. 工具调用并行执行，结果批量返回
 4. 最终 `done` 事件包含完整 message_id 和 token 用量
 ### 工作空间隔离
 每个用户的工作空间完全隔离：
 - 用户目录基于 user_id 的 SHA256 哈希生成
 - 所有文件操作强制在用户工作空间内
 - 支持权限级别控制文件操作能力
 ### MessageBuilder
 用于构建发送给 LLM 的消息列表：
 - `add_system()` - 添加系统消息
 - `add_user()` - 添加用户消息（JSON 格式）
 - `add_assistant()` - 添加助手消息
 - `add_tool_result()` - 添加工具结果消息
 - `extract_text()` - 从 JSON 内容中提取文本
--- a/luxx/models.py
+++ b/luxx/models.py
@ -141,9 +141,9 @@ class Conversation(Base):
 class Message(Base):
    """Message model
-    
+
    content 字段统一使用 JSON 格式存储：
-    
+
    **User 消息：**
    {
      "text": "用户输入的文本内容",
@ -151,11 +151,9 @@ class Message(Base):
        {"name": "utils.py", "extension": "py", "content": "..."}
      ]
    }
-    
+
    **Assistant 消息：**
    {
      "text": "AI 回复的文本内容",
      "tool_calls": [...],        // 遗留的扁平结构
      "steps": [                  // 有序步骤，用于渲染（主要数据源）
        {"id": "step-0", "index": 0, "type": "thinking", "content": "..."},
        {"id": "step-1", "index": 1, "type": "text", "content": "..."},
@ -163,6 +161,8 @@ class Message(Base):
        {"id": "step-3", "index": 3, "type": "tool_result", "id_ref": "call_xxx", "name": "...", "content": "..."}
      ]
    }
    注意：to_dict() 返回时会从 steps 动态计算 text 和 content 字段。
    """
    __tablename__ = "messages"
@ -204,20 +204,22 @@ class Message(Base):
            result["content"] = self.content
            result["text"] = self.content
            result["attachments"] = []
            result["tool_calls"] = []
            result["process_steps"] = []
            return result
-        
+
        # Extract common fields
        result["text"] = content_obj.get("text", "")
        result["attachments"] = content_obj.get("attachments", [])
        result["tool_calls"] = content_obj.get("tool_calls", [])
        # Extract steps as process_steps for frontend rendering
-        result["process_steps"] = content_obj.get("steps", [])
+        steps = content_obj.get("steps", [])
-        
+        result["process_steps"] = steps
-        # For backward compatibility
+
-        if "content" not in result:
+        # Extract text from steps (concatenate all text type steps)
-            result["content"] = result["text"]
+        text_content = "".join(
-        
+            s.get("content", "") for s in steps
            if s.get("type") == "text"
        )
        result["text"] = text_content
        result["content"] = text_content  # Alias for convenience
        # Extract attachments
        result["attachments"] = content_obj.get("attachments", [])
        return result
--- a/luxx/repositories.py
+++ b/luxx/repositories.py
@ -0,0 +1,214 @@
 """Repository layer for data access - follows Repository Pattern
 This module separates data access logic from business logic, following
 the Dependency Inversion Principle (DIP) from SOLID principles.
 """
 import json
 import logging
 from typing import List, Optional, Dict, Any
 from contextlib import contextmanager
 from luxx.database import SessionLocal
 from luxx.models import Message, LLMProvider, Conversation, User
 logger = logging.getLogger(__name__)
 class RepositoryError(Exception):
    """Base exception for repository errors"""
    pass
 class UnitOfWork:
    """Unit of Work pattern for managing database sessions
    Usage:
        with UnitOfWork() as uow:
            messages = uow.messages.get_by_conversation(conv_id)
            uow.commit()
    """
    def __init__(self):
        self._session = None
    def __enter__(self):
        self._session = SessionLocal()
        return self
    def __exit__(self, exc_type, exc_val, exc_tb):
        if exc_type is not None:
            self._session.rollback()
            return False
        return True
    @property
    def session(self):
        if self._session is None:
            raise RepositoryError("UnitOfWork not started. Use 'with UnitOfWork()'")
        return self._session
    def commit(self):
        """Commit the current transaction"""
        try:
            self._session.commit()
        except Exception as e:
            self._session.rollback()
            raise RepositoryError(f"Commit failed: {e}") from e
    def rollback(self):
        """Rollback the current transaction"""
        self._session.rollback()
    @property
    def messages(self) -> "MessageRepository":
        return MessageRepository(self._session)
    @property
    def providers(self) -> "ProviderRepository":
        return ProviderRepository(self._session)
    @property
    def conversations(self) -> "ConversationRepository":
        return ConversationRepository(self._session)
 class BaseRepository:
    """Base repository with common operations"""
    def __init__(self, session):
        self._session = session
 class MessageRepository(BaseRepository):
    """Repository for Message data access"""
    def get_by_id(self, msg_id: str) -> Optional[Message]:
        """Get message by ID"""
        return self._session.query(Message).filter(Message.id == msg_id).first()
    def get_by_conversation(self, conversation_id: str) -> List[Message]:
        """Get all messages for a conversation, ordered by creation time"""
        return self._session.query(Message).filter(
            Message.conversation_id == conversation_id
        ).order_by(Message.created_at).all()
    def create(
        self,
        msg_id: str,
        conversation_id: str,
        role: str,
        content: Dict[str, Any],
        token_count: int = 0,
        usage: Dict[str, Any] = None
    ) -> Message:
        """Create a new message"""
        msg = Message(
            id=msg_id,
            conversation_id=conversation_id,
            role=role,
            content=json.dumps(content, ensure_ascii=False),
            token_count=token_count,
            usage=json.dumps(usage) if usage else None
        )
        self._session.add(msg)
        return msg
    def delete(self, msg_id: str) -> bool:
        """Delete a message by ID"""
        msg = self.get_by_id(msg_id)
        if msg:
            self._session.delete(msg)
            return True
        return False
    def delete_by_conversation(self, conversation_id: str) -> int:
        """Delete all messages for a conversation"""
        count = self._session.query(Message).filter(
            Message.conversation_id == conversation_id
        ).delete()
        return count
 class ProviderRepository(BaseRepository):
    """Repository for LLM Provider data access"""
    def get_by_id(self, provider_id: int) -> Optional[LLMProvider]:
        """Get provider by ID"""
        return self._session.query(LLMProvider).filter(LLMProvider.id == provider_id).first()
    def get_by_user(self, user_id: int) -> List[LLMProvider]:
        """Get all providers for a user"""
        return self._session.query(LLMProvider).filter(
            LLMProvider.user_id == user_id
        ).all()
    def get_default(self, user_id: int) -> Optional[LLMProvider]:
        """Get the default provider for a user"""
        return self._session.query(LLMProvider).filter(
            LLMProvider.user_id == user_id,
            LLMProvider.is_default == True
        ).first()
    def create(self, **kwargs) -> LLMProvider:
        """Create a new provider"""
        provider = LLMProvider(**kwargs)
        self._session.add(provider)
        return provider
    def update(self, provider: LLMProvider) -> LLMProvider:
        """Update an existing provider"""
        self._session.add(provider)
        return provider
    def delete(self, provider_id: int) -> bool:
        """Delete a provider by ID"""
        provider = self.get_by_id(provider_id)
        if provider:
            self._session.delete(provider)
            return True
        return False
 class ConversationRepository(BaseRepository):
    """Repository for Conversation data access"""
    def get_by_id(self, conversation_id: str) -> Optional[Conversation]:
        """Get conversation by ID"""
        return self._session.query(Conversation).filter(
            Conversation.id == conversation_id
        ).first()
    def get_by_user(self, user_id: int, limit: int = 50) -> List[Conversation]:
        """Get recent conversations for a user"""
        return self._session.query(Conversation).filter(
            Conversation.user_id == user_id
        ).order_by(Conversation.updated_at.desc()).limit(limit).all()
    def create(self, **kwargs) -> Conversation:
        """Create a new conversation"""
        conversation = Conversation(**kwargs)
        self._session.add(conversation)
        return conversation
    def update(self, conversation: Conversation) -> Conversation:
        """Update an existing conversation"""
        self._session.add(conversation)
        return conversation
    def delete(self, conversation_id: str) -> bool:
        """Delete a conversation and its messages (cascade)"""
        conversation = self.get_by_id(conversation_id)
        if conversation:
            self._session.delete(conversation)
            return True
        return False
 # Factory function for creating services
 def create_message_repository() -> MessageRepository:
    """Factory for MessageRepository with its own session"""
    return MessageRepository(SessionLocal())
 def create_provider_repository() -> ProviderRepository:
    """Factory for ProviderRepository with its own session"""
    return ProviderRepository(SessionLocal())
--- a/luxx/services/init.py
+++ b/luxx/services/init.py
@ -1,4 +1,4 @@
 """Services module"""
 from luxx.services.llm_client import LLMClient
-from luxx.services.llm_response import ParsedDelta, LLMResponse
+from luxx.services.llm_response import ParsedDelta, Step, StepType
 from luxx.services.chat import ChatService, create_chat_service
--- a/luxx/services/agentic_loop.py
+++ b/luxx/services/agentic_loop.py
@ -1,12 +1,6 @@
 """AgenticLoop - Executes the Agentic Loop: LLM + Tools iteration.
-The loop:
+This module follows the Single Responsibility Principle.
 1. Call LLM with messages and tools
 2. Check for tool calls in response
 3. Execute tools in parallel
 4. Add results to messages
 5. Repeat (max 10 iterations)
 6. Return final response
 """
 import uuid
 import logging
@ -14,25 +8,21 @@ from typing import List, Dict, AsyncGenerator
 from luxx.tools.executor import ToolExecutor
 from luxx.services.llm_client import LLMClient
-from luxx.services.stream_context import StreamContext, _sse_event
+from luxx.services.stream_context import StreamState, StreamRenderer, StepType
 from luxx.services.process_result import ProcessResult
 from luxx.services.llm_response import ParsedDelta
 from luxx.services.events import sse_event
 logger = logging.getLogger(__name__)
 # Maximum iterations to prevent infinite loops
 MAX_ITERATIONS = 10
 class AgenticLoop:
-    """Executes the Agentic Loop: LLM + Tools iteration.
+    """Executes the agentic loop (LLM + Tools iteration)"""
    Supports multiple LLM Providers, auto-adapts response format.
    """
    def __init__(self, tool_executor: ToolExecutor):
        self.tool_executor = tool_executor
-    
+
    async def execute(
        self,
        llm: LLMClient,
@ -42,20 +32,15 @@ class AgenticLoop:
        temperature: float,
        max_tokens: int,
        thinking_enabled: bool,
-        context: 'StreamContext',
+        context: StreamState,
        tool_context: dict = None
    ) -> AsyncGenerator[str, None]:
        """Execute the agentic loop.
        Yields SSE events for each step.
        """
        total_usage = {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}
-        
+
        for iteration in range(MAX_ITERATIONS):
            context.reset()
            has_error = False
-            
+
            # Stream LLM response - now yields ParsedDelta directly
            async for delta in llm.stream_call(
                model=model,
                messages=messages,
@ -64,157 +49,123 @@ class AgenticLoop:
                max_tokens=max_tokens,
                thinking_enabled=thinking_enabled
            ):
-                # Process parsed delta
+                events = self._process_delta(delta, context, total_usage)
-                result = self._process_delta(delta, context, total_usage)
+                for event in events:
                # Yield events
                for event in result.events:
                    yield event
-                
+
-                # Check for errors
+                if not delta.has_content() and not delta.is_complete:
                if result.has_error:
                    has_error = True
                    break
-            
+
            # If error occurred, break the loop
            if has_error:
                break
-            
+
-            # Finalize current step
+            if delta.is_complete:
                for event in self._flush_remaining(context):
                    yield event
            context.finalize_step()
-            
+
            # Check for tool calls
            if context.tool_calls_list:
                # Execute tools and yield events
                for event in self._execute_tools(context, messages, tool_context):
                    yield event
                continue
-            
+
            # No tools - complete
            for event in self._complete(context, total_usage):
                yield event
            return
-        
+
        # Max iterations exceeded or error occurred
        if not has_error:
-            yield _sse_event("error", {"content": "Exceeded maximum tool call iterations"})
+            yield sse_event("error", {"content": "Exceeded maximum tool call iterations"})
-    
+
-    def _process_delta(
+    def _process_delta(self, delta: ParsedDelta, ctx: StreamState, total_usage: dict) -> List[str]:
-        self, 
+        """Process a single delta from the LLM stream"""
-        delta: ParsedDelta, 
+        events = []
-        ctx: 'StreamContext', 
+
        total_usage: dict
    ) -> ProcessResult:
        """Process ParsedDelta from adapter, return result with events and flags.
        Args:
            delta: ParsedDelta from LLM adapter
            ctx: StreamContext for state management
            total_usage: Accumulated token usage
        Returns:
            ProcessResult with events and flags
        """
        result = ProcessResult()
        # Check for error (empty delta with no content)
        if not delta.has_content() and not delta.is_complete:
            # Empty delta, possibly an error
            return result
        # Update usage
        if delta.usage:
            total_usage.update({
                "prompt_tokens": delta.usage.get("prompt_tokens", 0),
                "completion_tokens": delta.usage.get("completion_tokens", 0),
                "total_tokens": delta.usage.get("total_tokens", 0)
            })
-        
+
-        # Process thinking content (incremental)
+        if delta.content:
-        if delta.thinking:
+            result = ctx.process_content(delta.content)
-            logger.debug(f"Processing thinking: {delta.thinking[:50]}...")
+            if result["should_emit"]:
-            ctx.full_thinking += delta.thinking  # Accumulate incremental content
+                if result["thinking"]:
-            if not ctx.current_step_id or ctx.current_step_type != "thinking":
+                    ctx.full_thinking += result["thinking"]
-                ctx.start_step("thinking")
+                    ctx.start_step(StepType.THINKING)
-            result.add_event(_sse_event("process_step", {
+                    events.append(StreamRenderer.render_thinking(ctx))
-                "step": {
+
-                    "id": ctx.current_step_id,
+                if result["text"]:
-                    "index": ctx.current_step_idx,
+                    ctx.full_content += result["text"]
-                    "type": "thinking",
+                    ctx.start_step(StepType.TEXT)
-                    "content": ctx.full_thinking
+                    events.append(StreamRenderer.render_text(ctx))
-                }
+
-            }))
+                ctx._thinking_buf = ""
-            result.set_content()
+                ctx._text_buf = ""
-        
+
-        # Process text content (incremental)
+        if delta.has_tool_call():
-        if delta.text:
+            ctx.accumulate_tool_call(delta.tool_call)
-            ctx.full_content += delta.text  # Accumulate incremental content
+
-            if not ctx.current_step_id or ctx.current_step_type != "text":
+        return events
-                ctx.start_step("text")
+
-            result.add_event(_sse_event("process_step", {
+    def _execute_tools(self, ctx: StreamState, messages: list, tool_context: dict = None) -> List[str]:
-                "step": {
+        """Execute tools and add results to messages"""
                    "id": ctx.current_step_id,
                    "index": ctx.current_step_idx,
                    "type": "text",
                    "content": ctx.full_content
                }
            }))
            result.set_content()
        # Process tool calls
        if delta.tool_calls:
            for tc in delta.tool_calls:
                ctx.accumulate_tool_call(tc)
                result.set_tool_calls()
        return result
    def _execute_tools(self, ctx: 'StreamContext', messages: list, 
                       tool_context: dict = None) -> List[str]:
        """Execute tools and return list of events."""
        events = []
-        
+
-        # Emit tool call steps
+        for event in StreamRenderer.render_tool_calls(ctx):
        for event in ctx.emit_tool_calls():
            events.append(event)
-        
+
        # Execute in parallel
        tool_results = self.tool_executor.process_tool_calls_parallel(
            ctx.tool_calls_list, tool_context or {}
        )
-        
+
        # Get tool call IDs for result linking
        tool_ids = [tc.get("id") for tc in ctx.tool_calls_list]
        tool_step_ids = [
-            s["id"] for s in ctx.all_steps 
+            s.id for s in ctx.all_steps
-            if s["type"] == "tool_call" and s.get("id_ref") in tool_ids
+            if s.type == StepType.TOOL_CALL and s.id_ref in tool_ids
        ]
-        
+
        # Emit tool result steps
        for i, (tr, tc) in enumerate(zip(tool_results, ctx.tool_calls_list)):
            ref_id = tool_step_ids[i] if i < len(tool_step_ids) else f"step-{len(ctx.all_steps) - len(tool_results) + i}"
-            _, event = ctx.emit_tool_result(tr, ref_id)
+            _, event = StreamRenderer.render_tool_result(ctx, tr, ref_id)
            events.append(event)
-        
+
        # Prepare for next iteration
        messages.append({
            "role": "assistant",
            "content": ctx.full_content or "",
            "tool_calls": ctx.tool_calls_list
        })
        messages.extend(ctx.all_tool_results[-len(tool_results):])
-        
+
        return events
-    
+
-    def _complete(self, ctx: 'StreamContext', total_usage: dict) -> List[str]:
+    def _flush_remaining(self, ctx: StreamState) -> List[str]:
-        """Complete the loop and return list of events."""
+        """Flush remaining buffers on complete"""
        events = []
        thinking, text = ctx.flush()
        if thinking:
            ctx.full_thinking += thinking
            ctx.start_step(StepType.THINKING)
            events.append(StreamRenderer.render_thinking(ctx))
            ctx.finalize_step()
        if text:
            ctx.full_content += text
            ctx.start_step(StepType.TEXT)
            events.append(StreamRenderer.render_text(ctx))
            ctx.finalize_step()
        return events
    def _complete(self, ctx: StreamState, total_usage: dict) -> List[str]:
        """Signal completion of the agentic loop"""
        token_count = total_usage.get("completion_tokens") or len(ctx.full_content) // 4
        msg_id = str(uuid.uuid4())
        logger.info(f"[TOKEN] usage={total_usage}, count={token_count}")
-        
+
        ctx.set_completion(msg_id, token_count, total_usage)
-        
+
-        return [_sse_event("done", {
+        return [sse_event("done", {
            "message_id": msg_id,
            "token_count": token_count,
            "usage": total_usage
--- a/luxx/services/chat.py
+++ b/luxx/services/chat.py
@ -1,10 +1,13 @@
 """Chat service module with Agentic Loop pattern.
-This module provides the core chat service that orchestrates:
+This module follows SOLID principles:
- StreamContext: Manages streaming state transitions
+- Single Responsibility: Each class has one job
 - Dependency Inversion: Depend on abstractions (repositories) not concretions
 Components:
 - MessageBuilder: Constructs message lists
- AgenticLoop: Executes the agentic loop (LLM + tools iteration)
+- ChatService: Core chat service (orchestration only)
- ChatService: Core chat service facade
+- ProviderFactory: Creates LLM clients (Dependency Injection)
 """
 import json
 import logging
@ -15,9 +18,10 @@ from typing import List, Dict, Any, AsyncGenerator
 from luxx.tools.executor import ToolExecutor
 from luxx.tools.core import registry
 from luxx.services.llm_client import LLMClient
-from luxx.services.stream_context import StreamContext
+from luxx.services.stream_context import StreamState
 from luxx.services.agentic_loop import AgenticLoop
-from luxx.config import config
+from luxx.services.events import sse_event
 from luxx.repositories import UnitOfWork
 logger = logging.getLogger(__name__)
@ -79,65 +83,91 @@ class MessageBuilder:
        return content
-# ============== Factory Function ==============
+# ============== LLM Provider Factory ==============
-def get_llm_client(conversation=None) -> tuple:
+class LLMProviderFactory:
-    """Get LLM client based on conversation provider. Returns (client, max_tokens)"""
+    """Factory for creating LLM clients - follows Dependency Injection
    from luxx.models import LLMProvider
    from luxx.database import SessionLocal
-    max_tokens = None
+    This separates the creation of dependencies from their usage,
    following the Dependency Inversion Principle.
    """
-    if conversation and conversation.provider_id:
+    @staticmethod
-        db = SessionLocal()
+    def create_client(
-        try:
+        provider=None,
-            provider = db.query(LLMProvider).filter(
+        api_key: str = None,
-                LLMProvider.id == conversation.provider_id
+        api_url: str = None,
-            ).first()
+        model: str = None
-            if provider:
+    ) -> tuple:
-                max_tokens = provider.max_tokens
+        """Create LLM client from provider or direct parameters
-                client = LLMClient(
+        
-                    api_key=provider.api_key,
+        Args:
-                    api_url=provider.base_url,
+            provider: LLMProvider model instance (optional)
-                    model=provider.default_model
+            api_key: Direct API key (used if no provider)
-                )
+            api_url: Direct API URL (used if no provider)
-                return client, max_tokens
+            model: Direct model name (used if no provider)
-        finally:
+            
-            db.close()
+        Returns:
-    
+            tuple: (LLMClient, max_tokens)
-    return LLMClient(), max_tokens
+        """
        if provider is not None:
            client = LLMClient(
                api_key=provider.api_key,
                api_url=provider.base_url,
                model=provider.default_model,
                provider_type=provider.provider_type
            )
            return client, provider.max_tokens
        # Fallback to direct parameters
        client = LLMClient(
            api_key=api_key,
            api_url=api_url,
            model=model
        )
        return client, None
-# ============== ChatService ==============
+# ============== Chat Service ==============
 class ChatService:
-    """Core chat service with Agentic Loop support."""
+    """Core chat service with Agentic Loop support.
-    def __init__(self):
+    This class follows Single Responsibility - it orchestrates the chat flow
-        self.tool_executor = ToolExecutor()
+    but delegates data access to repositories and tool execution to executors.
-        self.agentic_loop = AgenticLoop(self.tool_executor)
+    
    Dependencies are injected via constructor for better testability.
    """
    def __init__(
        self,
        tool_executor: ToolExecutor = None,
        agentic_loop: AgenticLoop = None,
        provider_factory: LLMProviderFactory = None
    ):
        """Initialize ChatService with injected dependencies
        Args:
            tool_executor: Tool executor instance (creates default if None)
            agentic_loop: Agentic loop instance (creates default if None)
            provider_factory: LLM provider factory (uses default if None)
        """
        self._tool_executor = tool_executor or ToolExecutor()
        self._agentic_loop = agentic_loop or AgenticLoop(self._tool_executor)
        self._provider_factory = provider_factory or LLMProviderFactory()
    def build_messages(self, conversation, include_system: bool = True) -> List[Dict]:
-        """Build message list from conversation history."""
+        """Build message list from conversation history using Repository"""
        from luxx.database import SessionLocal
        from luxx.models import Message
        messages = []
        if include_system and conversation.system_prompt:
            messages.append({"role": "system", "content": conversation.system_prompt})
-        db = SessionLocal()
+        with UnitOfWork() as uow:
-        try:
+            db_messages = uow.messages.get_by_conversation(conversation.id)
            db_messages = db.query(Message).filter(
                Message.conversation_id == conversation.id
            ).order_by(Message.created_at).all()
            for msg in db_messages:
                content = MessageBuilder.extract_text(msg.content)
                messages.append({"role": msg.role, "content": content})
        finally:
            db.close()
        return messages
@ -168,9 +198,11 @@ class ChatService:
                "content": json.dumps({"text": user_message, "attachments": []})
            })
-            # Get tools and LLM client
+            # Get tools and LLM client via factory
            tools = self._get_tools(enabled_tools)
-            llm, provider_max_tokens = get_llm_client(conversation)
+            llm, provider_max_tokens = self._provider_factory.create_client(
                provider=conversation.provider
            )
            model = conversation.model or llm.default_model or "gpt-4"
            max_tokens = provider_max_tokens or 8192
@ -183,10 +215,10 @@ class ChatService:
            }
            # Stream context
-            ctx = StreamContext()
+            ctx = StreamState()
            # Execute agentic loop
-            async for event in self.agentic_loop.execute(
+            async for event in self._agentic_loop.execute(
                llm=llm,
                model=model,
                messages=messages,
@ -199,22 +231,19 @@ class ChatService:
            ):
                yield event
-            # Save message after successful completion (only if we have content)
+            # Save message after successful completion
-            if ctx._last_message_id and (ctx.full_content or ctx.all_tool_calls):
+            if ctx._last_message_id and ctx.all_steps:
                self._save_message(
                    conversation.id,
                    ctx._last_message_id,
-                    ctx.full_content,
+                    ctx.get_steps_for_save(),
                    ctx.all_tool_calls,
                    ctx.all_tool_results,
                    ctx.all_steps,
                    ctx._last_token_count,
                    ctx._last_usage
                )
        except Exception as e:
            logger.error(f"Stream error: {e}\n{traceback.format_exc()}")
-            yield _sse_event("error", {"content": str(e)})
+            yield sse_event("error", {"content": str(e)})
    async def non_stream_response(
        self,
@ -230,11 +259,13 @@ class ChatService:
                "role": "user",
                "content": json.dumps({"text": user_message, "attachments": []})
            })
-            
+
            tools = [] if not tools_enabled else None
-            llm, max_tokens = get_llm_client(conversation)
+            llm, max_tokens = self._provider_factory.create_client(
                provider=conversation.provider
            )
            model = conversation.model or llm.default_model or "gpt-4"
-            
+
            response = await llm.sync_call(
                model=model,
                messages=messages,
@ -243,14 +274,14 @@ class ChatService:
                max_tokens=max_tokens or 8192,
                thinking_enabled=thinking_enabled or conversation.thinking_enabled
            )
-            
+
            return {
                "success": True,
-                "content": response.content,
+                "content": response.get("content", ""),
-                "tool_calls": response.tool_calls,
+                "tool_calls": response.get("tool_calls", []),
-                "usage": response.usage
+                "usage": response.get("usage", {})
            }
-            
+
        except httpx.HTTPStatusError as e:
            error_msg = f"HTTP {e.response.status_code}: {e.response.text[:200] if e.response else 'No response body'}"
            logger.error(f"Non-stream HTTP error: {error_msg}")
@ -262,41 +293,40 @@ class ChatService:
            logger.error(f"Non-stream error: {type(e).__name__}: {e}\n{traceback.format_exc()}")
            return {"success": False, "error": f"{type(e).__name__}: {str(e)}"}
-    def _save_message(self, conversation_id: str, msg_id: str, full_content: str,
+    def _save_message(
-                       all_tool_calls: list, all_tool_results: list, all_steps: list,
+        self,
-                       token_count: int = 0, usage: dict = None):
+        conversation_id: str,
-        """Save assistant message to database."""
+        msg_id: str,
-        from luxx.database import SessionLocal
+        all_steps: list,
-        from luxx.models import Message
+        token_count: int = 0,
        usage: dict = None
    ):
        """Save assistant message to database using Repository"""
        content_json = {"steps": all_steps}
-        content_json = {"text": full_content, "steps": all_steps}
+        with UnitOfWork() as uow:
-        if all_tool_calls:
+            uow.messages.create(
-            content_json["tool_calls"] = all_tool_calls
+                msg_id=msg_id,
        db = SessionLocal()
        try:
            msg = Message(
                id=msg_id,
                conversation_id=conversation_id,
                role="assistant",
-                content=json.dumps(content_json, ensure_ascii=False),
+                content=content_json,
                token_count=token_count,
-                usage=json.dumps(usage) if usage else None
+                usage=usage
            )
-            db.add(msg)
+            uow.commit()
            db.commit()
        except Exception:
            db.rollback()
            raise
        finally:
            db.close()
-def _sse_event(event: str, data: dict) -> str:
+# ============== Factory Function ==============
-    """Format a Server-Sent Event string."""
+
-    return f"event: {event}\ndata: {json.dumps(data, ensure_ascii=False)}\n\n"
+def create_chat_service(
    tool_executor: ToolExecutor = None,
    agentic_loop: AgenticLoop = None
 ) -> ChatService:
    """Factory function to create ChatService instances"""
    return ChatService(
        tool_executor=tool_executor,
        agentic_loop=agentic_loop
    )
 def create_chat_service() -> ChatService:
    """Factory function to create ChatService instances."""
    return ChatService()
--- a/luxx/services/events.py
+++ b/luxx/services/events.py
@ -0,0 +1,31 @@
 """SSE (Server-Sent Events) utilities
 This module provides SSE formatting functions to avoid circular imports
 between stream_context.py and agentic_loop.py.
 """
 import json
 def sse_event(event: str, data: dict) -> str:
    """Format a Server-Sent Event string
    Args:
        event: Event type name
        data: Event data dictionary
    Returns:
        Formatted SSE string
    """
    return f"event: {event}\ndata: {json.dumps(data, ensure_ascii=False)}\n\n"
 def sse_comment(message: str) -> str:
    """Format a SSE comment (for keep-alive or debugging)
    Args:
        message: Comment message
    Returns:
        Formatted SSE comment string
    """
    return f": {message}\n\n"
--- a/luxx/services/llm_adapters/anthropic_adapter.py
+++ b/luxx/services/llm_adapters/anthropic_adapter.py
@ -2,38 +2,39 @@
 Supports Anthropic Claude API streaming and non-streaming responses.
 """
 import json
 import logging
 from typing import Dict, List, Any, AsyncGenerator
 from .base import ProviderAdapter
-from ..llm_response import ParsedDelta, LLMResponse
+from ..llm_response import ParsedDelta
 logger = logging.getLogger(__name__)
 class AnthropicAdapter(ProviderAdapter):
    """Anthropic Claude API adapter
-    
+
    Pure parsing adapter - no internal state management.
    Each parse_stream_chunk call returns incremental content.
    Accumulation is handled by the consumer (AgenticLoop).
-    
+
    Anthropic API uses a completely different format from OpenAI:
    - Endpoint: POST /v1/messages
    - Streaming: SSE events (content_block_start, content_block_delta, etc.)
    - Thinking: Independent thinking type content block
    - Tools: tool_use type content block
-    
+
    Reference: https://docs.anthropic.com/claude/reference/messages
    """
-    
+
    # Anthropic API endpoint suffix
    MESSAGES_PATH = "/v1/messages"
-    
+
    # Anthropic API version
    ANTHROPIC_VERSION = "2023-06-01"
-    
+
    # Content block types
    BLOCK_MESSAGE_START = "message_start"
    BLOCK_CONTENT_BLOCK_START = "content_block_start"
@ -42,24 +43,24 @@ class AnthropicAdapter(ProviderAdapter):
    BLOCK_MESSAGE_DELTA = "message_delta"
    BLOCK_MESSAGE_STOP = "message_stop"
    BLOCK_ERROR = "error"
-    
+
    # Delta types
    DELTA_THINKING = "thinking_delta"
    DELTA_TEXT = "text_delta"
    DELTA_INPUT_JSON = "input_json_delta"
-    
+
    # Content block subtypes
    SUBTYPE_THINKING = "thinking"
    SUBTYPE_TEXT = "text"
    SUBTYPE_TOOL_USE = "tool_use"
-    
+
    def __init__(self):
        pass
-    
+
    @property
    def provider_type(self) -> str:
        return "anthropic"
-    
+
    def build_request(
        self,
        model: str,
@ -68,30 +69,30 @@ class AnthropicAdapter(ProviderAdapter):
        **kwargs
    ) -> tuple[Dict[str, Any], Dict[str, str]]:
        """Build Anthropic-format request
-        
+
        Anthropic request format differs from OpenAI:
        - Uses "messages" instead of "message"
        - Requires "max_tokens"
        - Different tool format
        """
        api_key = kwargs.get("api_key", "")
-        
+
        headers = {
            "Content-Type": "application/json",
            "Authorization": f"Bearer {api_key}",
            "anthropic-version": self.ANTHROPIC_VERSION
        }
-        
+
        # Convert messages to Anthropic format
        anthropic_messages = self._convert_messages(messages)
-        
+
        body = {
            "model": model,
            "messages": anthropic_messages,
            "stream": kwargs.get("stream", True),
            "max_tokens": kwargs.get("max_tokens", 4096)
        }
-        
+
        # System message
        if "system" in kwargs:
            body["system"] = kwargs["system"]
@ -101,51 +102,51 @@ class AnthropicAdapter(ProviderAdapter):
                if msg.get("role") == "system":
                    body["system"] = msg.get("content", "")
                    break
-        
+
        # Thinking capability (Claude 3.5+)
        if kwargs.get("thinking_enabled"):
            body["thinking"] = {
                "type": "enabled",
                "budget_tokens": kwargs.get("thinking_budget_tokens", 10000)
            }
-        
+
        # Tool definitions
        if tools:
            body["tools"] = self._convert_tools(tools)
-        
+
        # Optional parameters
        if "temperature" in kwargs:
            body["temperature"] = kwargs["temperature"]
-        
+
        if "top_p" in kwargs:
            body["top_p"] = kwargs["top_p"]
-        
+
        if "stop_sequences" in kwargs:
            body["stop_sequences"] = kwargs["stop_sequences"]
-        
+
        return body, headers
-    
+
    def _convert_messages(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
        """Convert messages to Anthropic format
-        
+
        Anthropic message format:
        - role: user, assistant
        - content: str or List[Dict]
        """
        anthropic_messages = []
-        
+
        for msg in messages:
            role = msg.get("role", "user")
-            
+
            # Convert role
            if role == "system":
                continue  # System messages handled separately
-            
+
            anthropic_msg = {
                "role": "user" if role == "user" else "assistant",
                "content": []
            }
-            
+
            # Handle content
            content = msg.get("content", "")
            if isinstance(content, str):
@ -165,15 +166,15 @@ class AnthropicAdapter(ProviderAdapter):
                            anthropic_msg["content"].append(item)
                    else:
                        anthropic_msg["content"].append(str(item))
-            
+
            anthropic_messages.append(anthropic_msg)
-        
+
        return anthropic_messages
-    
+
    def _convert_tools(self, tools: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
        """Convert tool definitions to Anthropic format"""
        anthropic_tools = []
-        
+
        for tool in tools:
            anthropic_tool = {
                "name": tool.get("name", ""),
@ -181,133 +182,131 @@ class AnthropicAdapter(ProviderAdapter):
                "input_schema": tool.get("parameters", {"type": "object", "properties": {}})
            }
            anthropic_tools.append(anthropic_tool)
-        
+
        return anthropic_tools
-    
+
    def reset(self):
        """No-op for pure parsing adapter"""
        pass
-    
+
    async def parse_stream_chunk(
-        self, 
+        self,
        raw_chunk: str
    ) -> AsyncGenerator[ParsedDelta, None]:
        """Parse Anthropic-format SSE stream
-        
+
        Returns incremental content - no accumulation.
        """
        if not raw_chunk or raw_chunk.strip() == "":
            return
-        
+
        try:
            chunk = json.loads(raw_chunk)
        except json.JSONDecodeError:
            return
-        
+
        chunk_type = chunk.get("type", "")
-        
+
        # Handle errors
        if chunk_type == self.BLOCK_ERROR:
            error_msg = chunk.get("error", {}).get("type", "unknown_error")
            logger.error(f"Anthropic API error: {error_msg}")
            yield ParsedDelta()
            return
-        
+
        result = ParsedDelta()
-        
+
        if chunk_type == self.BLOCK_MESSAGE_START:
            # Message start - no content yet
            pass
-            
+
        elif chunk_type == self.BLOCK_CONTENT_BLOCK_START:
            # Content block start
            block = chunk.get("content_block", {})
            block_type = block.get("type")
-            
+
            if block_type == self.SUBTYPE_THINKING:
                # Thinking block start
                thinking_text = block.get("thinking", {}).get("thinking", "")
                result.thinking = thinking_text
-                
+
            elif block_type == self.SUBTYPE_TOOL_USE:
                # Tool use block start
                tool_index = chunk.get("index", 0)
                tool_name = block.get("name", "")
-                result.tool_calls = [{
+                result.tool_call = {
                    "index": tool_index,
                    "id": "",
                    "type": "function",
                    "function": {"name": tool_name, "arguments": ""}
-                }]
+                }
-            
+
            elif block_type == self.SUBTYPE_TEXT:
                # Text block start - nothing to output yet
                pass
-                
+
        elif chunk_type == self.BLOCK_CONTENT_BLOCK_DELTA:
            # Content block delta
            delta = chunk.get("delta", {})
            delta_type = delta.get("type", "")
-            
+
            if delta_type == self.DELTA_THINKING:
                # Thinking delta (incremental)
                thinking = delta.get("thinking", "")
                result.thinking = thinking
-                
+
            elif delta_type == self.DELTA_TEXT:
                # Text delta (incremental)
                text = delta.get("text", "")
                result.text = text
-                
+
            elif delta_type == self.DELTA_INPUT_JSON:
                # Tool arguments delta (incremental)
                partial_json = delta.get("partial_json", "")
                # For tool calls, we need to update the arguments
                # This is handled by the consumer (AgenticLoop)
                if partial_json:
-                    result.tool_calls = [{
+                    result.tool_call = {
                        "index": 0,
                        "function": {"arguments": partial_json}
-                    }]
+                    }
-            
+
        elif chunk_type == self.BLOCK_CONTENT_BLOCK_STOP:
            # Content block stop
            pass
-            
+
        elif chunk_type == self.BLOCK_MESSAGE_DELTA:
            # Message delta (usually contains usage)
            delta = chunk.get("delta", {})
            usage = chunk.get("usage", {})
-            
+
            result.usage = {
                "prompt_tokens": usage.get("input_tokens", 0),
                "completion_tokens": usage.get("output_tokens", 0),
                "total_tokens": usage.get("input_tokens", 0) + usage.get("output_tokens", 0)
            }
-            
+
            # Check if complete by stop reason
            if delta.get("stop_reason"):
                result.is_complete = True
-                
+
        elif chunk_type == self.BLOCK_MESSAGE_STOP:
            # Message stop
            result.is_complete = True
-        
+
        # Yield result if there's any content
        if result.has_content() or result.is_complete:
            yield result
-    
+
-    def parse_response(self, data: Dict[str, Any]) -> LLMResponse:
+    def parse_response(self, data: Dict[str, Any]) -> Dict:
        """Parse non-streaming response"""
        content = data.get("content", [])
        thinking = ""
        text_content = ""
        tool_calls = []
-        
+
        for block in content:
            if isinstance(block, dict):
                block_type = block.get("type")
-                
+
                if block_type == "thinking":
                    thinking = block.get("thinking", "")
                elif block_type == "text":
@ -318,19 +317,22 @@ class AnthropicAdapter(ProviderAdapter):
                        "name": block.get("name", ""),
                        "input": block.get("input", {})
                    })
-        
+
        usage = data.get("usage", {})
-        
+
-        return LLMResponse(
+        return {
-            content=text_content,
+            "content": text_content,
-            thinking=thinking,
+            "thinking": thinking,
-            tool_calls=tool_calls,
+            "tool_calls": tool_calls,
-            usage={
+            "usage": {
                "prompt_tokens": usage.get("input_tokens", 0),
                "completion_tokens": usage.get("output_tokens", 0),
                "total_tokens": usage.get("input_tokens", 0) + usage.get("output_tokens", 0)
            }
-        )
+        }
-    
+
    def supports_thinking(self) -> bool:
        return True
    def supports_tools(self) -> bool:
        return True
--- a/luxx/services/llm_adapters/openai_adapter.py
+++ b/luxx/services/llm_adapters/openai_adapter.py
@ -1,200 +1,86 @@
-"""OpenAI Adapter - OpenAI-compatible API adapter
+"""OpenAI Adapter - OpenAI/DeepSeek/GLM/MiniMax compatible API adapter"""
 Supports OpenAI, DeepSeek, GLM and other OpenAI-compatible APIs.
 """
 import json
 import logging
-from typing import Dict, List, Any, AsyncGenerator, Optional
+from typing import Dict, List, Any, AsyncGenerator
 from .base import ProviderAdapter
-from ..llm_response import ParsedDelta, LLMResponse
+from ..llm_response import ParsedDelta
 logger = logging.getLogger(__name__)
 class OpenAIAdapter(ProviderAdapter):
-    """OpenAI-compatible API adapter
+    """OpenAI-compatible API adapter"""
-    
+
-    Pure parsing adapter - no internal state management.
+    def __init__(self):
-    Each parse_stream_chunk call returns incremental content.
+        pass
-    Accumulation is handled by the consumer (AgenticLoop).
+
    """
    @property
    def provider_type(self) -> str:
        return "openai"
-    
+
-    def __init__(self):
+    def build_request(self, model: str, messages: List[Dict], tools=None, **kwargs) -> tuple:
        pass
    def build_request(
        self,
        model: str,
        messages: List[Dict[str, Any]],
        tools: List[Dict[str, Any]] = None,
        **kwargs
    ) -> tuple[Dict[str, Any], Dict[str, str]]:
        """Build OpenAI-format request"""
        api_key = kwargs.get("api_key", "")
-        
+        headers = {"Content-Type": "application/json", "Authorization": f"Bearer {api_key}"}
-        headers = {
+        body = {"model": model, "messages": messages, "stream": kwargs.get("stream", True)}
            "Content-Type": "application/json",
            "Authorization": f"Bearer {api_key}"
        }
        body = {
            "model": model,
            "messages": messages,
            "stream": kwargs.get("stream", True)
        }
        # Optional parameters
        if "temperature" in kwargs:
            body["temperature"] = kwargs["temperature"]
        if "max_tokens" in kwargs:
            body["max_tokens"] = kwargs["max_tokens"]
        if "top_p" in kwargs:
            body["top_p"] = kwargs["top_p"]
        if "frequency_penalty" in kwargs:
            body["frequency_penalty"] = kwargs["frequency_penalty"]
        if "presence_penalty" in kwargs:
            body["presence_penalty"] = kwargs["presence_penalty"]
        if "stop" in kwargs:
            body["stop"] = kwargs["stop"]
        if tools:
            body["tools"] = tools
-        if kwargs.get("thinking_enabled"):
+            body["tool_choice"] = "auto"
            body["thinking_enabled"] = True
        return body, headers
-    
+
    def reset(self):
        """No-op for pure parsing adapter"""
        pass
-    
+
-    async def parse_stream_chunk(
+    async def parse_stream_chunk(self, raw_chunk: str) -> AsyncGenerator[ParsedDelta, None]:
-        self, 
+        """Parse OpenAI/MiniMax format. Returns raw content for accumulation."""
-        raw_chunk: str
+        if not raw_chunk or not raw_chunk.strip():
    ) -> AsyncGenerator[ParsedDelta, None]:
        """Parse OpenAI-format SSE stream
        Returns incremental content - no accumulation.
        """
        # Parse SSE line
        event_type, data_str = self._parse_sse_line(raw_chunk)
        if not data_str or data_str == "[DONE]":
            if data_str == "[DONE]":
                yield ParsedDelta(is_complete=True)
            return
-        
+
        chunk_str = raw_chunk.strip()
        if chunk_str.startswith("data: "):
            chunk_str = chunk_str[6:]
        elif chunk_str.startswith("data:"):
            chunk_str = chunk_str[5:]
        if chunk_str.strip() == "[DONE]":
            yield ParsedDelta(is_complete=True)
            return
        try:
-            chunk = json.loads(data_str)
+            chunk = json.loads(chunk_str)
        except json.JSONDecodeError:
            return
-        
+
-        # Handle errors
+        choices = chunk.get("choices", [])
-        if event_type == "error" or "error" in chunk:
+        if not choices:
            yield ParsedDelta()
            return
-        
+
-        # Extract usage
+        delta = choices[0].get("delta", {})
-        usage = chunk.get("usage", {})
+        finish_reason = choices[0].get("finish_reason")
-        
+        content = delta.get("content", "")
-        # Parse choices
+
        for choice in chunk.get("choices", []):
            delta = choice.get("delta", {})
            content = delta.get("content") or ""
            # Extract thinking tags if present
            thinking, clean_text = self._extract_tags(content)
            # Tool calls
            tool_calls = delta.get("tool_calls", [])
            # Check if this is the final delta
            is_complete = bool(choice.get("finish_reason"))
            if thinking or clean_text or tool_calls or is_complete or usage:
                yield ParsedDelta(
                    thinking=thinking,
                    text=clean_text,
                    tool_calls=tool_calls if tool_calls else [],
                    is_complete=is_complete,
                    usage=usage if usage else {}
                )
    def parse_response(self, data: Dict[str, Any]) -> LLMResponse:
        """Parse non-streaming response"""
        choice = data.get("choices", [{}])[0]
        message = choice.get("message", {})
        content = message.get("content", "") or ""
        thinking, clean_content = self._extract_tags(content)
        if not thinking:
            thinking = message.get("reasoning_content") or ""
        tool_calls = message.get("tool_calls", [])
        usage = data.get("usage", {})
        return LLMResponse(
            content=clean_content,
            thinking=thinking,
            tool_calls=tool_calls,
            usage=usage
        )
    def _parse_sse_line(self, line: str) -> tuple:
        """Parse a single SSE line, return (event_type, data)"""
        if line.startswith("event:"):
            return line[6:].strip(), None
        elif line.startswith("data:"):
            return "", line[5:].strip()
        return "", None
    def _extract_tags(self, content: str) -> tuple:
        """Extract thinking tags and return (thinking, clean_text)
        Handles thinking tags that may be split across chunks:
        - First </think> in content closes any thinking block
        - Everything before first </think> is thinking
        - Everything after first </think> is clean text
        """
        if not content:
-            return "", ""
+            if finish_reason is not None:
-        
+                yield ParsedDelta(is_complete=True)
-        content_lower = content.lower()
+            return
-        
+
-        # Find first </think> (marks end of thinking block)
+        yield ParsedDelta(content=content)
-        end_idx = content_lower.find("</think>")
+
-        
+    def parse_response(self, data: Dict) -> Dict:
-        if end_idx != -1:
+        """Parse non-streaming response."""
-            # Found end tag - split at this point
+        choices = data.get("choices", [])
-            thinking_content = content[:end_idx].strip()
+        if not choices:
-            # Find if there's also a start tag before this
+            return {"content": "", "tool_calls": [], "usage": {}}
-            start_idx = content_lower.rfind("<think>", 0, end_idx)
+        message = choices[0].get("message", {})
-            
+        content = message.get("content", "")
-            if start_idx != -1:
+        tool_calls = message.get("tool_calls", [])
-                # There's a complete thinking block
+        usage = data.get("usage", {})
-                thinking = content[start_idx + 7:end_idx]
+        return {"content": content, "tool_calls": tool_calls, "usage": usage}
-                clean = content[end_idx + 9:]
+
-            else:
+    def supports_tools(self) -> bool:
-                # No start tag - this is the end of a split thinking block
+        return True
                # Everything before </think> was thinking
                thinking = content[:end_idx]
                clean = content[end_idx + 9:]
            return thinking, clean
        # No end tag found
        # Check if there's a start tag
        start_idx = content_lower.find("<think>")
        if start_idx != -1:
            # Has start tag but no end - all content after start is thinking
            thinking = content[start_idx + 7:]
            return thinking, ""
        else:
            # No tags at all - everything is clean
            return "", content
--- a/luxx/services/llm_client.py
+++ b/luxx/services/llm_client.py
@ -8,21 +8,23 @@ Supports various LLM API formats:
 Usage:
    from luxx.services.llm_client import LLMClient
-    
+
    # Auto-detect provider
    client = LLMClient(api_key="...", api_url="...")
-    
+
    # Specify provider
    client = LLMClient(api_key="...", api_url="...", provider_type="anthropic")
-    
+
    # Streaming call
    async for delta in client.stream_call(model, messages, tools=tools):
-        print(delta.text, delta.thinking, delta.tool_calls)
+        print(delta.text, delta.thinking, delta.tool_call)
 Extending Providers:
    LLMClient.register_adapter("my_provider", MyAdapter)
 """
 import json
 import logging
 import traceback
-from typing import Dict, List, Any, Optional, AsyncGenerator
+from typing import Dict, List, Any, Optional, AsyncGenerator, Type
 import httpx
@ -32,7 +34,7 @@ from luxx.services.llm_adapters import (
    OpenAIAdapter,
    AnthropicAdapter,
 )
-from luxx.services.llm_response import ParsedDelta, LLMResponse
+from luxx.services.llm_response import ParsedDelta
 logger = logging.getLogger(__name__)
@ -42,6 +44,9 @@ class LLMClient:
    Uses adapter pattern to support different API formats, auto-detects or manually specifies Provider type.
    Supports plugin registration for extending providers:
        LLMClient.register_adapter("my_provider", MyAdapter)
    Attributes:
        api_key: API key
        api_url: API base URL
@ -50,8 +55,8 @@ class LLMClient:
        adapter: Current adapter instance
    """
-    # Provider type to adapter class mapping
+    # Plugin registry for provider adapters (Open for Extension, Closed for Modification)
-    PROVIDER_ADAPTERS: Dict[str, type] = {
+    _adapter_registry: Dict[str, type] = {
        # OpenAI-compatible formats
        "openai": OpenAIAdapter,
        "deepseek": OpenAIAdapter,
@ -63,13 +68,40 @@ class LLMClient:
    }
    # URL keywords for provider detection
-    PROVIDER_URL_KEYWORDS: Dict[str, List[str]] = {
+    _url_keywords: Dict[str, List[str]] = {
        "anthropic": ["anthropic", "claude"],
        "deepseek": ["deepseek"],
        "glm": ["glm", "zhipu", "chatglm"],
        "openai": ["openai"],
    }
    @classmethod
    def register_adapter(cls, provider_type: str, adapter_class: Type[ProviderAdapter]) -> None:
        """Register a new adapter for a provider type
        This follows the Open-Closed Principle (OCP) - open for extension, closed for modification.
        Args:
            provider_type: Provider type identifier (e.g., "ollama", "groq")
            adapter_class: Adapter class (must inherit from ProviderAdapter)
        Example:
            class OllamaAdapter(ProviderAdapter):
                ...
            LLMClient.register_adapter("ollama", OllamaAdapter)
        """
        if not issubclass(adapter_class, ProviderAdapter):
            raise TypeError(f"{adapter_class.__name__} must inherit from ProviderAdapter")
        cls._adapter_registry[provider_type] = adapter_class
        logger.info(f"Registered adapter '{adapter_class.__name__}' for provider '{provider_type}'")
    @classmethod
    def list_providers(cls) -> List[str]:
        """List all registered provider types"""
        return list(cls._adapter_registry.keys())
    def __init__(
        self,
        api_key: str = None,
@ -110,7 +142,7 @@ class LLMClient:
        url = url or self.api_url
        url_lower = url.lower()
-        for provider, keywords in self.PROVIDER_URL_KEYWORDS.items():
+        for provider, keywords in self._url_keywords.items():
            for keyword in keywords:
                if keyword in url_lower:
                    logger.debug(f"Detected provider '{provider}' from URL: {url}")
@ -125,7 +157,7 @@ class LLMClient:
        Returns:
            ProviderAdapter subclass instance
        """
-        adapter_class = self.PROVIDER_ADAPTERS.get(
+        adapter_class = self._adapter_registry.get(
            self.provider_type,
            OpenAIAdapter
        )
@ -160,30 +192,30 @@ class LLMClient:
        messages: List[Dict[str, Any]],
        tools: List[Dict[str, Any]] = None,
        **kwargs
-    ) -> LLMResponse:
+    ) -> Dict:
        """Synchronous call to LLM (non-streaming)
-        
+
        Args:
            model: Model name
            messages: Message list
            tools: Tool definition list
            **kwargs: Other parameters (temperature, max_tokens, thinking_enabled, etc.)
-            
+
        Returns:
-            LLMResponse object
+            Dict with keys: content, thinking, tool_calls, usage
        """
        import asyncio
        return asyncio.get_event_loop().run_until_complete(
            self.async_sync_call(model, messages, tools, **kwargs)
        )
-    
+
    async def async_sync_call(
        self,
        model: str,
        messages: List[Dict[str, Any]],
        tools: List[Dict[str, Any]] = None,
        **kwargs
-    ) -> LLMResponse:
+    ) -> Dict:
        """Internal async sync call"""
        model = model or self.default_model
        kwargs["api_key"] = self.api_key
@ -259,9 +291,14 @@ class LLMClient:
                    response.raise_for_status()
                    async for line in response.aiter_lines():
-                        if line.strip():
+                        # MiniMax may send multiple SSE events concatenated on one line
-                            async for delta in self.adapter.parse_stream_chunk(line):
+                        # Format: data: {...}\ndata: {...}\n
-                                yield delta
+                        parts = line.split("data: ")
                        for part in parts:
                            part = part.strip()
                            if part and part != "[DONE]" and part.startswith("{"):
                                async for delta in self.adapter.parse_stream_chunk("data: " + part):
                                    yield delta
        except httpx.HTTPStatusError as e:
            status_code = e.response.status_code if e.response else "?"
--- a/luxx/services/llm_response.py
+++ b/luxx/services/llm_response.py
@ -1,65 +1,60 @@
-"""LLM Response - Unified message classes for LLM communication
+"""LLM Response - Unified message classes for LLM communication"""
 This module provides unified data classes for message passing throughout the LLM pipeline.
 """
 from typing import Dict, Any, List, Optional
 from dataclasses import dataclass, field
 from typing import Dict, Optional
 class StepType:
    """Step type constants"""
    THINKING = "thinking"
    TEXT = "text"
    TOOL_CALL = "tool_call"
    TOOL_RESULT = "tool_result"
@dataclass
 class Step:
    """Single step - used for storage and transport"""
    id: str
    index: int
    type: str
    content: str = ""
    name: str = ""
    arguments: str = ""
    id_ref: str = ""
    success: bool = True
    def to_dict(self) -> Dict:
        return {
            "id": self.id,
            "index": self.index,
            "type": self.type,
            "content": self.content,
            "name": self.name,
            "arguments": self.arguments,
            "id_ref": self.id_ref,
            "success": self.success
        }
@dataclass
 class ParsedDelta:
-    """Streaming response delta
+    """LLM streaming response delta"""
    Represents a single unit of streaming response data.
    Used for streaming responses where content is accumulated incrementally.
    Attributes:
        thinking: Accumulated thinking/reasoning content
        text: Accumulated text content  
        tool_calls: List of tool call requests
        is_complete: Whether this is the final delta
        usage: Token usage statistics
    """
    thinking: str = ""
    text: str = ""
    tool_calls: List[Dict] = field(default_factory=list)
    is_complete: bool = False
    usage: Dict[str, int] = field(default_factory=dict)
    def has_thinking(self) -> bool:
        """Check if there's thinking content"""
        return bool(self.thinking)
    def has_text(self) -> bool:
        """Check if there's text content"""
        return bool(self.text)
    def has_tool_calls(self) -> bool:
        """Check if there are tool calls"""
        return bool(self.tool_calls)
    def has_content(self) -> bool:
        """Check if there's any content"""
        return self.has_thinking() or self.has_text() or self.has_tool_calls()
@dataclass
 class LLMResponse:
    """Complete LLM response
    Represents a complete non-streaming response.
    Attributes:
        content: Final text content
        thinking: Final thinking content (if any)
        tool_calls: List of tool calls (if any)
        usage: Token usage statistics
    """
    content: str = ""
    thinking: str = ""
-    tool_calls: List[Dict] = field(default_factory=list)
+    text: str = ""
-    usage: Dict[str, int] = field(default=dict)
+    tool_call: Optional[Dict] = None
-    
+    usage: Dict[str, int] = field(default_factory=dict)
-    def has_tool_calls(self) -> bool:
+    is_complete: bool = False
-        """Check if there are tool calls"""
+
-        return bool(self.tool_calls)
+    def has_thinking(self) -> bool:
        return bool(self.thinking)
    def has_text(self) -> bool:
        return bool(self.text)
    def has_tool_call(self) -> bool:
        return self.tool_call is not None
    def has_content(self) -> bool:
        return bool(self.content) or self.has_thinking() or self.has_text() or self.has_tool_call()
--- a/luxx/services/process_result.py
+++ b/luxx/services/process_result.py
@ -1,37 +0,0 @@
 """ProcessResult - Result of processing an SSE line."""
 class ProcessResult:
    """Result of processing an SSE line.
    Attributes:
        events: List of SSE event strings to yield
        has_error: Whether an error occurred
        error_content: Error message if any
        has_content: Whether content was received
        has_tool_calls: Whether tool calls were received
    """
    def __init__(self):
        self.events: list = []
        self.has_error: bool = False
        self.error_content: str = ""
        self.has_content: bool = False
        self.has_tool_calls: bool = False
    def add_event(self, event: str):
        """Add an event to the result."""
        self.events.append(event)
    def set_error(self, content: str):
        """Set error state."""
        self.has_error = True
        self.error_content = content
    def set_content(self):
        """Mark that content was received."""
        self.has_content = True
    def set_tool_calls(self):
        """Mark that tool calls were received."""
        self.has_tool_calls = True
--- a/luxx/services/stream_context.py
+++ b/luxx/services/stream_context.py
@ -1,51 +1,143 @@
-"""StreamContext - Manages streaming state transitions during LLM response.
+"""Stream Context - Manages streaming state and content accumulation
-Tracks steps in order:
+This module follows the Composition over Inheritance principle initially,
- thinking: Model reasoning content
+but StreamContext inherits from StreamState for simplicity.
- text: Model response text
+The rendering logic is delegated to a separate StreamRenderer.
 - tool_call: Tool invocation request
 - tool_result: Tool execution result
 Each step has unique id and index for frontend rendering.
 """
-import json
+from dataclasses import dataclass
-from typing import List, Dict, Optional
+from typing import List, Dict, Any, Optional
 from enum import Enum
 from luxx.services.events import sse_event
-def _sse_event(event: str, data: dict) -> str:
+class StepType(str, Enum):
-    """Format a Server-Sent Event string."""
+    """Step type enumeration"""
-    return f"event: {event}\ndata: {json.dumps(data, ensure_ascii=False)}\n\n"
+    THINKING = "thinking"
    TEXT = "text"
    TOOL_CALL = "tool_call"
    TOOL_RESULT = "tool_result"
-class StreamContext:
+THINK_START = "<thinking>"
-    """Manages streaming state transitions during LLM response."""
+THINK_END = "</thinking>"
@dataclass
 class Step:
    """Represents a single step in the response process"""
    id: str
    index: int
    type: str
    content: str = ""
    name: str = ""
    arguments: str = ""
    id_ref: str = ""
    success: bool = True
    def to_dict(self) -> Dict[str, Any]:
        result = {
            "id": self.id,
            "index": self.index,
            "type": self.type,
        }
        if self.content:
            result["content"] = self.content
        if self.name:
            result["name"] = self.name
        if self.arguments:
            result["arguments"] = self.arguments
        if self.id_ref:
            result["id_ref"] = self.id_ref
        if self.type == StepType.TOOL_RESULT:
            result["success"] = self.success
        return result
 class StreamState:
    """Pure state management for streaming
    This class maintains all state but delegates rendering to StreamRenderer.
    """
    def __init__(self):
-        self.step_index = 0
+        self.reset()
        self.current_step_id = None
        self.current_step_idx = None
        self.current_step_type = None
        self.full_content = ""
        self.full_thinking = ""
        self.all_steps = []
        self.all_tool_calls = []
        self.all_tool_results = []
        self.tool_calls_list = []
        self._last_message_id = None
        self._last_token_count = 0
        self._last_usage = None
    def reset(self):
-        """Reset state for new iteration."""
+        """Reset all state for a new stream"""
-        self.current_step_id = None
+        self.step_index = 0
-        self.current_step_idx = None
+        self.current_step_id: Optional[str] = None
-        self.current_step_type = None
+        self.current_step_idx: Optional[int] = None
        self.current_step_type: Optional[str] = None
        self.full_content = ""
        self.full_thinking = ""
-        self.tool_calls_list = []
+        self.all_steps: List[Step] = []
        self.all_tool_results: List[Dict] = []
        self.tool_calls_list: List[Dict] = []
        self._last_message_id: Optional[str] = None
        self._last_token_count = 0
        self._last_usage: Optional[Dict] = None
        self._in_thinking = False
        self._thinking_buf = ""
        self._text_buf = ""
    def process_content(self, content: str) -> Dict:
        """Process raw content, handling thinking tags."""
        if not content:
            return {"thinking": "", "text": "", "should_emit": False, "thinking_only": False}
        thinking = ""
        text = ""
        should_emit = False
        thinking_only = False
        if THINK_START in content and not self._in_thinking:
            self._in_thinking = True
            idx = content.find(THINK_START) + len(THINK_START)
            content = content[idx:]
        if THINK_END in content:
            idx = content.find(THINK_END)
            thinking_content = content[:idx]
            self._thinking_buf += thinking_content
            content = content[idx + len(THINK_END):]
            if THINK_END in content:
                second_idx = content.find(THINK_END)
                text_content = content[:second_idx]
                self._text_buf += text_content
                content = content[second_idx + len(THINK_END):]
            self._in_thinking = False
            should_emit = True
            thinking_only = not bool(self._text_buf)
        if self._in_thinking:
            self._thinking_buf += content
        else:
            self._text_buf += content
        if should_emit:
            thinking = self._thinking_buf
            text = self._text_buf
        return {
            "thinking": thinking,
            "text": text,
            "should_emit": should_emit,
            "thinking_only": thinking_only
        }
    def flush(self) -> tuple:
        """Flush remaining buffers and return content"""
        thinking = self._thinking_buf
        text = self._text_buf
        self._thinking_buf = ""
        self._text_buf = ""
        return thinking, text
    def start_step(self, step_type: str) -> str:
-        """Start a new step with unique ID."""
+        """Start a new step and return its ID"""
        self.current_step_idx = self.step_index
        self.current_step_id = f"step-{self.step_index}"
        self.current_step_type = step_type
@ -53,20 +145,20 @@ class StreamContext:
        return self.current_step_id
    def finalize_step(self):
-        """Save current step to all_steps."""
+        """Finalize the current step and add to all_steps"""
        if self.current_step_id is None:
            return
-        
+        content = self.full_content if self.current_step_type == StepType.TEXT else self.full_thinking
-        content = self.full_content if self.current_step_type == "text" else self.full_thinking
+        step = Step(
-        self.all_steps.append({
+            id=self.current_step_id,
-            "id": self.current_step_id,
+            index=self.current_step_idx,
-            "index": self.current_step_idx,
+            type=self.current_step_type,
-            "type": self.current_step_type,
+            content=content
-            "content": content
+        )
-        })
+        self.all_steps.append(step)
    def accumulate_tool_call(self, tc_delta: Dict):
-        """Accumulate tool call delta."""
+        """Accumulate tool call delta"""
        idx = tc_delta.get("index", 0)
        if idx >= len(self.tool_calls_list):
            self.tool_calls_list.append({
@ -74,41 +166,63 @@ class StreamContext:
                "type": "function",
                "function": {"name": "", "arguments": ""}
            })
        func = tc_delta.get("function", {})
        if func.get("name"):
            self.tool_calls_list[idx]["function"]["name"] += func["name"]
        if func.get("arguments"):
            self.tool_calls_list[idx]["function"]["arguments"] += func["arguments"]
-    def emit_tool_calls(self) -> List[str]:
+    def add_tool_result(self, result: Dict):
-        """Emit tool call steps, return SSE events."""
+        """Add a tool result to history"""
        self.all_tool_results.append({
            "role": "tool",
            "tool_call_id": result.get("tool_call_id", ""),
            "content": result.get("content", "")
        })
    def set_completion(self, msg_id: str, token_count: int, usage: dict):
        """Set completion metadata"""
        self._last_message_id = msg_id
        self._last_token_count = token_count
        self._last_usage = usage
    def get_steps_for_save(self) -> List[Dict]:
        """Get all steps as dictionaries"""
        return [step.to_dict() for step in self.all_steps]
 class StreamRenderer:
    """Renders stream state to SSE events"""
    @staticmethod
    def render_tool_calls(state: StreamState) -> List[str]:
        """Render tool calls as SSE events"""
        events = []
-        for tc in self.tool_calls_list:
+        for tc in state.tool_calls_list:
-            step_id = f"step-{self.step_index}"
+            step_id = f"step-{state.step_index}"
-            self.step_index += 1
+            state.step_index += 1
-            
+            step = Step(
-            step = {
+                id=step_id,
-                "id": step_id,
+                index=state.step_index - 1,
-                "index": self.step_index - 1,
+                type=StepType.TOOL_CALL,
-                "type": "tool_call",
+                name=tc["function"]["name"],
-                "id_ref": tc.get("id", ""),
+                arguments=tc["function"]["arguments"],
-                "name": tc["function"]["name"],
+                id_ref=tc.get("id", "")
-                "arguments": tc["function"]["arguments"]
+            )
-            }
+            state.all_steps.append(step)
-            self.all_steps.append(step)
+            events.append(sse_event("process_step", {"step": step.to_dict()}))
            self.all_tool_calls.append(tc)
            events.append(_sse_event("process_step", {"step": step}))
        return events
-    def emit_tool_result(self, result: Dict, ref_step_id: str) -> tuple:
+    @staticmethod
-        """Emit tool result step, return (step, event)."""
+    def render_tool_result(state: StreamState, result: Dict, ref_step_id: str) -> tuple:
-        step_id = f"step-{self.step_index}"
+        """Render a tool result as SSE event"""
-        self.step_index += 1
+        import json
        step_id = f"step-{state.step_index}"
        state.step_index += 1
        content = result.get("content", "")
        success = True
        try:
            parsed = json.loads(content)
            if isinstance(parsed, dict):
@ -116,32 +230,49 @@ class StreamContext:
        except (json.JSONDecodeError, TypeError):
            pass
-        step = {
+        step = Step(
-            "id": step_id,
+            id=step_id,
-            "index": self.step_index - 1,
+            index=state.step_index - 1,
-            "type": "tool_result",
+            type=StepType.TOOL_RESULT,
-            "id_ref": ref_step_id,
+            name=result.get("name", ""),
-            "name": result.get("name", ""),
+            content=content,
-            "content": content,
+            id_ref=ref_step_id,
-            "success": success
+            success=success
-        }
+        )
-        self.all_steps.append(step)
+        state.all_steps.append(step)
-        self.all_tool_results.append({
+        state.add_tool_result(result)
            "role": "tool",
            "tool_call_id": result.get("tool_call_id", ""),
            "content": content
        })
-        return step, _sse_event("process_step", {"step": step})
+        return step, sse_event("process_step", {"step": step.to_dict()})
-    def set_completion(self, msg_id: str, token_count: int, usage: dict):
+    @staticmethod
-        """Set completion info for saving."""
+    def render_thinking(state: StreamState) -> str:
-        self._last_message_id = msg_id
+        """Render thinking content as SSE event"""
-        self._last_token_count = token_count
+        step = Step(
-        self._last_usage = usage
+            id=state.current_step_id,
            index=state.current_step_idx,
            type=StepType.THINKING,
            content=state.full_thinking
        )
        return sse_event("process_step", {"step": step.to_dict()})
-    def reset_completion(self):
+    @staticmethod
-        """Reset completion info."""
+    def render_text(state: StreamState) -> str:
-        self._last_message_id = None
+        """Render text content as SSE event"""
-        self._last_token_count = 0
+        step = Step(
-        self._last_usage = None
+            id=state.current_step_id,
            index=state.current_step_idx,
            type=StepType.TEXT,
            content=state.full_content
        )
        return sse_event("process_step", {"step": step.to_dict()})
    @staticmethod
    def render_error(error_msg: str) -> str:
        """Render error event"""
        return sse_event("error", {"content": error_msg})
 # Convenience function for backward compatibility
 def render_error(error_msg: str) -> str:
    """Render error event"""
    return sse_event("error", {"content": error_msg})
--- a/luxx/tools/executor.py
+++ b/luxx/tools/executor.py
@ -1,61 +1,121 @@
-"""Tool executor"""
+"""Tool executor with caching and parallel execution support
 This module follows the Single Responsibility Principle:
 - ToolExecutor: Tool execution logic
 - CallHistory: Call history management
 - CacheManager: Caching logic
 """
 import json
 import time
 from typing import List, Dict, Any, Optional
 from threading import Lock
-from luxx.tools.core import registry, ToolResult, ToolContext
+from luxx.tools.core import registry, ToolContext
-class ToolExecutor:
+class CacheManager:
-    """Tool executor with caching and parallel execution support"""
+    """Manages tool result caching"""
-    def __init__(
+    def __init__(self, enable_cache: bool = True, cache_ttl: int = 300):
        self,
        enable_cache: bool = True,
        cache_ttl: int = 300,  # 5 minutes
        max_workers: int = 4
    ):
        self.enable_cache = enable_cache
        self.cache_ttl = cache_ttl
        self.max_workers = max_workers
        self._cache: Dict[str, tuple] = {}  # key: (result, timestamp)
-        self._call_history: List[Dict[str, Any]] = []
+        self._lock = Lock()
-    def _make_cache_key(self, name: str, args: dict) -> str:
+    def make_key(self, name: str, args: dict, workspace: str = None) -> str:
        """Generate cache key"""
        args_str = json.dumps(args, sort_keys=True, ensure_ascii=False)
-        return f"{name}:{args_str}"
+        key = f"{name}:{args_str}"
        if workspace:
            key = f"{key}:{workspace}"
        return key
-    def _is_cache_valid(self, cache_key: str) -> bool:
+    def is_valid(self, cache_key: str) -> bool:
        """Check if cache is valid"""
        if cache_key not in self._cache:
            return False
        _, timestamp = self._cache[cache_key]
        return time.time() - timestamp < self.cache_ttl
-    def _get_cached(self, cache_key: str) -> Optional[Dict]:
+    def get(self, cache_key: str) -> Optional[Dict]:
        """Get cached result"""
-        if self.enable_cache and self._is_cache_valid(cache_key):
+        if not self.enable_cache:
            return None
        if self.is_valid(cache_key):
            return self._cache[cache_key][0]
        return None
-    def _set_cached(self, cache_key: str, result: Dict) -> None:
+    def set(self, cache_key: str, result: Dict) -> None:
        """Set cache"""
-        if self.enable_cache:
+        if not self.enable_cache:
            return
        with self._lock:
            self._cache[cache_key] = (result, time.time())
-    def _record_call(self, name: str, args: dict, result: Dict) -> None:
+    def clear(self) -> None:
-        """Record call history"""
+        """Clear all cache"""
-        self._call_history.append({
+        with self._lock:
            self._cache.clear()
    def size(self) -> int:
        """Get cache size"""
        return len(self._cache)
 class CallHistory:
    """Manages tool call history"""
    MAX_HISTORY_SIZE = 1000
    def __init__(self):
        self._history: List[Dict[str, Any]] = []
        self._lock = Lock()
    def record(self, name: str, args: dict, result: Dict) -> None:
        """Record a tool call"""
        entry = {
            "name": name,
            "args": args,
            "result": result,
            "timestamp": time.time()
-        })
+        }
-        
+        with self._lock:
-        # Limit history size
+            self._history.append(entry)
-        if len(self._call_history) > 1000:
+            # Limit history size
-            self._call_history = self._call_history[-1000:]
+            if len(self._history) > self.MAX_HISTORY_SIZE:
                self._history = self._history[-self.MAX_HISTORY_SIZE:]
    def get(self, limit: int = 100) -> List[Dict[str, Any]]:
        """Get recent call history"""
        with self._lock:
            return self._history[-limit:].copy()
    def clear(self) -> None:
        """Clear all history"""
        with self._lock:
            self._history.clear()
    def size(self) -> int:
        """Get history size"""
        return len(self._history)
 class ToolExecutor:
    """Tool executor with caching and parallel execution support
    This class delegates caching and history to specialized classes,
    following the Single Responsibility Principle.
    """
    def __init__(
        self,
        enable_cache: bool = True,
        cache_ttl: int = 300,
        max_workers: int = 4
    ):
        self.cache = CacheManager(enable_cache=enable_cache, cache_ttl=cache_ttl)
        self.history = CallHistory()
        self.max_workers = max_workers
    def process_tool_calls(
        self,
@ -63,16 +123,8 @@ class ToolExecutor:
        context: Dict[str, Any]
    ) -> List[Dict[str, Any]]:
        """Process tool calls sequentially"""
-        # Build ToolContext from context dict (includes user_permission_level)
+        # Build ToolContext from context dict
-        tool_ctx = ToolContext(
+        tool_ctx = self._build_tool_context(context)
            workspace=context.get("workspace"),
            user_id=context.get("user_id"),
            username=context.get("username"),
            extra={
                "user_permission_level": context.get("user_permission_level", 1),
                **(context.get("extra", {}))
            }
        )
        results = []
@ -81,26 +133,21 @@ class ToolExecutor:
            name = call.get("function", {}).get("name", "")
            # Parse JSON arguments
-            try:
+            args = self._parse_arguments(call)
                args = json.loads(call.get("function", {}).get("arguments", "{}"))
            except json.JSONDecodeError:
                args = {}
-            # Check cache (include context in cache key for file operations)
+            # Check cache
-            cache_key = self._make_cache_key(name, args)
+            cache_key = self.cache.make_key(name, args, tool_ctx.workspace)
-            if tool_ctx.workspace:
+            cached = self.cache.get(cache_key)
                cache_key = f"{cache_key}:{tool_ctx.workspace}"
            cached = self._get_cached(cache_key)
            if cached is not None:
                result = cached
            else:
                # Execute tool with context
                result = registry.execute(name, args, context=tool_ctx)
-                self._set_cached(cache_key, result)
+                self.cache.set(cache_key, result)
            # Record call
-            self._record_call(name, args, result)
+            self.history.record(name, args, result)
            # Create result message
            results.append(self._create_tool_result(call_id, name, result))
@ -116,16 +163,7 @@ class ToolExecutor:
        if len(tool_calls) <= 1:
            return self.process_tool_calls(tool_calls, context)
-        # Build ToolContext from context dict (includes user_permission_level)
+        tool_ctx = self._build_tool_context(context)
        tool_ctx = ToolContext(
            workspace=context.get("workspace"),
            user_id=context.get("user_id"),
            username=context.get("username"),
            extra={
                "user_permission_level": context.get("user_permission_level", 1),
                **(context.get("extra", {}))
            }
        )
        try:
            from concurrent.futures import ThreadPoolExecutor, as_completed
@ -136,45 +174,61 @@ class ToolExecutor:
                for call in tool_calls:
                    call_id = call.get("id", "")
                    name = call.get("function", {}).get("name", "")
-                    
+                    args = self._parse_arguments(call)
                    # Parse all arguments
                    try:
                        args = json.loads(call.get("function", {}).get("arguments", "{}"))
                    except json.JSONDecodeError:
                        args = {}
                    # Check cache
-                    cache_key = self._make_cache_key(name, args)
+                    cache_key = self.cache.make_key(name, args, tool_ctx.workspace)
-                    if tool_ctx.workspace:
+                    cached = self.cache.get(cache_key)
                        cache_key = f"{cache_key}:{tool_ctx.workspace}"
                    cached = self._get_cached(cache_key)
                    if cached is not None:
                        futures[call_id] = (name, args, cached)
                    else:
-                        # Submit task with context
+                        # Submit task
-                        future = executor.submit(registry.execute, name, args, context=tool_ctx)
+                        future = executor.submit(
                            registry.execute, name, args, context=tool_ctx
                        )
                        futures[future] = (call_id, name, args, cache_key)
                results = []
                for future in as_completed(futures.keys()):
-                    if future in futures:
+                    item = futures[future]
-                        item = futures[future]
+                    if len(item) == 3:
-                        if len(item) == 3:
+                        call_id, name, args = item
-                            call_id, name, args = item
+                        cache_key = self.cache.make_key(name, args, tool_ctx.workspace)
-                            cache_key = self._make_cache_key(name, args)
+                        result = item[2]
-                        else:
+                    else:
-                            call_id, name, args, cache_key = item
+                        call_id, name, args, cache_key = item
                        result = future.result()
-                        self._set_cached(cache_key, result)
+                        self.cache.set(cache_key, result)
-                        self._record_call(name, args, result)
+                    
-                        results.append(self._create_tool_result(call_id, name, result))
+                    self.history.record(name, args, result)
                    results.append(self._create_tool_result(call_id, name, result))
                return results
        except ImportError:
            return self.process_tool_calls(tool_calls, context)
    def _build_tool_context(self, context: Dict[str, Any]) -> ToolContext:
        """Build ToolContext from context dict"""
        return ToolContext(
            workspace=context.get("workspace"),
            user_id=context.get("user_id"),
            username=context.get("username"),
            extra={
                "user_permission_level": context.get("user_permission_level", 1),
                **(context.get("extra", {}))
            }
        )
    def _parse_arguments(self, call: Dict[str, Any]) -> Dict:
        """Parse JSON arguments from tool call"""
        try:
            return json.loads(call.get("function", {}).get("arguments", "{}"))
        except json.JSONDecodeError:
            return {}
    def _create_tool_result(self, call_id: str, name: str, result: Dict) -> Dict[str, Any]:
        """Create tool result message"""
        return {
@ -184,19 +238,10 @@ class ToolExecutor:
            "content": json.dumps(result, ensure_ascii=False)
        }
    def _create_error_result(self, call_id: str, name: str, error: str) -> Dict[str, Any]:
        """Create error result message"""
        return {
            "tool_call_id": call_id,
            "role": "tool",
            "name": name,
            "content": json.dumps({"success": False, "error": error}, ensure_ascii=False)
        }
    def clear_cache(self) -> None:
        """Clear all cache"""
-        self._cache.clear()
+        self.cache.clear()
    def get_history(self, limit: int = 100) -> List[Dict[str, Any]]:
        """Get call history"""
-        return self._call_history[-limit:]
+        return self.history.get(limit)