refactor: 增加消息适配的逻辑

This commit is contained in:
ViperEkura 2026-04-25 11:10:01 +08:00
parent 232a86e11f
commit a4e7f9d086
8 changed files with 1174 additions and 501 deletions

View File

@ -1,4 +1,4 @@
"""Services module""" """Services module"""
from luxx.services.llm_client import LLMClient, llm_client, LLMResponse from luxx.services.llm_client import LLMClient
from luxx.services.llm_response import ParsedDelta, LLMResponse, StreamAccumulator
from luxx.services.chat import ChatService, chat_service from luxx.services.chat import ChatService, chat_service
from luxx.services.llm_response import LLMResponseParser, llm_parser, ParsedDelta

View File

@ -8,17 +8,15 @@ The loop:
5. Repeat (max 10 iterations) 5. Repeat (max 10 iterations)
6. Return final response 6. Return final response
""" """
import json
import uuid import uuid
import logging import logging
import traceback from typing import List, Dict, AsyncGenerator
from typing import List, Dict, Any, AsyncGenerator
from luxx.tools.executor import ToolExecutor from luxx.tools.executor import ToolExecutor
from luxx.services.llm_client import LLMClient from luxx.services.llm_client import LLMClient
from luxx.services.stream_context import StreamContext, _sse_event from luxx.services.stream_context import StreamContext, _sse_event
from luxx.services.process_result import ProcessResult from luxx.services.process_result import ProcessResult
from luxx.services.llm_response import llm_parser from luxx.services.llm_response import ParsedDelta
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -26,20 +24,11 @@ logger = logging.getLogger(__name__)
MAX_ITERATIONS = 10 MAX_ITERATIONS = 10
def _parse_sse_line(line: str) -> tuple:
"""Parse SSE line into (event_type, data_str)."""
event_type = None
data_str = None
for part in line.strip().split('\n'):
if part.startswith('event: '):
event_type = part[7:].strip()
elif part.startswith('data: '):
data_str = part[6:].strip()
return event_type, data_str
class AgenticLoop: class AgenticLoop:
"""Executes the Agentic Loop: LLM + Tools iteration.""" """Executes the Agentic Loop: LLM + Tools iteration.
Supports multiple LLM Providers, auto-adapts response format.
"""
def __init__(self, tool_executor: ToolExecutor): def __init__(self, tool_executor: ToolExecutor):
self.tool_executor = tool_executor self.tool_executor = tool_executor
@ -66,8 +55,8 @@ class AgenticLoop:
context.reset() context.reset()
has_error = False has_error = False
# Stream LLM response # Stream LLM response - now yields ParsedDelta directly
async for sse_line in llm.stream_call( async for delta in llm.stream_call(
model=model, model=model,
messages=messages, messages=messages,
tools=tools, tools=tools,
@ -75,8 +64,8 @@ class AgenticLoop:
max_tokens=max_tokens, max_tokens=max_tokens,
thinking_enabled=thinking_enabled thinking_enabled=thinking_enabled
): ):
# Process stream line # Process parsed delta
result = self._process_stream_line(sse_line, context, total_usage) result = self._process_delta(delta, context, total_usage)
# Yield events # Yield events
for event in result.events: for event in result.events:
@ -110,85 +99,40 @@ class AgenticLoop:
if not has_error: if not has_error:
yield _sse_event("error", {"content": "Exceeded maximum tool call iterations"}) yield _sse_event("error", {"content": "Exceeded maximum tool call iterations"})
def _process_stream_line(self, sse_line: str, ctx: 'StreamContext', def _process_delta(
total_usage: dict) -> ProcessResult: self,
"""Process single SSE line from LLM, return result with events and flags.""" delta: ParsedDelta,
ctx: 'StreamContext',
total_usage: dict
) -> ProcessResult:
"""Process ParsedDelta from adapter, return result with events and flags.
Args:
delta: ParsedDelta from LLM adapter
ctx: StreamContext for state management
total_usage: Accumulated token usage
Returns:
ProcessResult with events and flags
"""
result = ProcessResult() result = ProcessResult()
event_type, data_str = _parse_sse_line(sse_line)
if not data_str: # Check for error (empty delta with no content)
if not delta.has_content() and not delta.is_complete:
# Empty delta, possibly an error
return result return result
# Handle upstream errors # Update usage
if event_type == 'error': if delta.usage:
try:
error_data = json.loads(data_str)
error_content = error_data.get("content", "Unknown error")
except json.JSONDecodeError:
error_content = data_str
result.set_error(error_content)
result.add_event(_sse_event("error", {"content": error_content}))
return result
try:
chunk = json.loads(data_str)
except json.JSONDecodeError:
error_msg = f"Parse error: {data_str[:50]}"
result.set_error(error_msg)
result.add_event(_sse_event("error", {"content": error_msg}))
return result
# Extract usage
if "usage" in chunk and chunk["usage"]:
usage = chunk["usage"]
total_usage.update({ total_usage.update({
"prompt_tokens": usage.get("prompt_tokens", 0), "prompt_tokens": delta.usage.get("prompt_tokens", 0),
"completion_tokens": usage.get("completion_tokens", 0), "completion_tokens": delta.usage.get("completion_tokens", 0),
"total_tokens": usage.get("total_tokens", 0) "total_tokens": delta.usage.get("total_tokens", 0)
}) })
# Handle API errors
if "error" in chunk:
error_msg = chunk["error"].get("message", str(chunk["error"]))
result.set_error(error_msg)
result.add_event(_sse_event("error", {"content": f"API Error: {error_msg}"}))
return result
# Get delta
choices = chunk.get("choices", [])
if not choices:
# Non-standard format: check for content directly
content = chunk.get("content") or ""
if content:
# Check for thinking tags in content
thinking_part, clean_text = llm_parser._extract_thinking_tags(content)
if thinking_part:
ctx.full_thinking = (ctx.full_thinking or "") + thinking_part
if not ctx.current_step_id or ctx.current_step_type != "thinking":
ctx.start_step("thinking")
result.add_event(_sse_event("process_step", {
"step": {"id": ctx.current_step_id, "index": ctx.current_step_idx, "type": "thinking", "content": ctx.full_thinking}
}))
result.set_content()
if clean_text:
ctx.full_content = (ctx.full_content or "") + clean_text
if not ctx.current_step_id or ctx.current_step_type != "text":
ctx.start_step("text")
result.add_event(_sse_event("process_step", {
"step": {"id": ctx.current_step_id, "index": ctx.current_step_idx, "type": "text", "content": ctx.full_content}
}))
result.set_content()
return result
delta = choices[0].get("delta", {})
# Parse delta using unified parser
parsed = llm_parser.parse_openai(delta)
# Process thinking content # Process thinking content
if parsed.thinking: if delta.thinking:
ctx.full_thinking = parsed.thinking ctx.full_thinking = delta.thinking
if not ctx.current_step_id or ctx.current_step_type != "thinking": if not ctx.current_step_id or ctx.current_step_type != "thinking":
ctx.start_step("thinking") ctx.start_step("thinking")
result.add_event(_sse_event("process_step", { result.add_event(_sse_event("process_step", {
@ -202,8 +146,8 @@ class AgenticLoop:
result.set_content() result.set_content()
# Process text content # Process text content
if parsed.text: if delta.text:
ctx.full_content = parsed.text ctx.full_content = delta.text
if not ctx.current_step_id or ctx.current_step_type != "text": if not ctx.current_step_id or ctx.current_step_type != "text":
ctx.start_step("text") ctx.start_step("text")
result.add_event(_sse_event("process_step", { result.add_event(_sse_event("process_step", {
@ -216,8 +160,9 @@ class AgenticLoop:
})) }))
result.set_content() result.set_content()
# Accumulate tool calls # Process tool calls
for tc in parsed.tool_calls or delta.get("tool_calls", []): if delta.tool_calls:
for tc in delta.tool_calls:
ctx.accumulate_tool_call(tc) ctx.accumulate_tool_call(tc)
result.set_tool_calls() result.set_tool_calls()

View File

@ -0,0 +1,24 @@
"""LLM Provider Adapters
Adapter module for various LLM API formats.
Adapter types:
- OpenAIAdapter: OpenAI/DeepSeek/GLM compatible APIs
- AnthropicAdapter: Anthropic Claude API
Usage:
from luxx.services.llm_adapters import OpenAIAdapter, AnthropicAdapter
adapter = OpenAIAdapter()
# Or use LLMClient for automatic selection
"""
from .base import ProviderAdapter
from .openai_adapter import OpenAIAdapter
from .anthropic_adapter import AnthropicAdapter
__all__ = [
"ProviderAdapter",
"OpenAIAdapter",
"AnthropicAdapter",
]

View File

@ -0,0 +1,398 @@
"""Anthropic Adapter - Anthropic Claude API adapter
Supports Anthropic Claude API streaming and non-streaming responses.
"""
import json
import logging
from typing import Dict, List, Any, AsyncGenerator
from .base import ProviderAdapter
from ..llm_response import ParsedDelta, LLMResponse
logger = logging.getLogger(__name__)
class AnthropicAdapter(ProviderAdapter):
"""Anthropic Claude API adapter
Anthropic API uses a completely different format from OpenAI:
- Endpoint: POST /v1/messages
- Streaming: SSE events (content_block_start, content_block_delta, etc.)
- Thinking: Independent thinking type content block
- Tools: tool_use type content block
Reference: https://docs.anthropic.com/claude/reference/messages
"""
# Anthropic API endpoint suffix
MESSAGES_PATH = "/v1/messages"
# Anthropic API version
ANTHROPIC_VERSION = "2023-06-01"
# Content block types
BLOCK_MESSAGE_START = "message_start"
BLOCK_CONTENT_BLOCK_START = "content_block_start"
BLOCK_CONTENT_BLOCK_DELTA = "content_block_delta"
BLOCK_CONTENT_BLOCK_STOP = "content_block_stop"
BLOCK_MESSAGE_DELTA = "message_delta"
BLOCK_MESSAGE_STOP = "message_stop"
BLOCK_ERROR = "error"
# Delta types
DELTA_THINKING = "thinking_delta"
DELTA_TEXT = "text_delta"
DELTA_INPUT_JSON = "input_json_delta"
# Content block subtypes
SUBTYPE_THINKING = "thinking"
SUBTYPE_TEXT = "text"
SUBTYPE_TOOL_USE = "tool_use"
def __init__(self):
# Buffers for accumulating streaming content
self._thinking_buffer = ""
self._text_buffer = ""
# Buffers for accumulating deltas
self._tool_args_buffer = ""
self._current_tool_index = -1
self._current_tool_name = ""
self._usage = {}
@property
def provider_type(self) -> str:
return "anthropic"
def build_request(
self,
model: str,
messages: List[Dict[str, Any]],
tools: List[Dict[str, Any]] = None,
**kwargs
) -> tuple[Dict[str, Any], Dict[str, str]]:
"""Build Anthropic-format request
Anthropic request format differs from OpenAI:
- Uses "messages" instead of "message"
- Requires "max_tokens"
- Different tool format
Args:
model: Model name (e.g., claude-3-5-sonnet-20241022)
messages: Message list
tools: Tool definition list
**kwargs: Other parameters
Returns:
tuple: (body, headers)
"""
api_key = kwargs.get("api_key", "")
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {api_key}",
"anthropic-version": self.ANTHROPIC_VERSION
}
# Convert messages to Anthropic format
anthropic_messages = self._convert_messages(messages)
body = {
"model": model,
"messages": anthropic_messages,
"stream": kwargs.get("stream", True),
"max_tokens": kwargs.get("max_tokens", 4096)
}
# System message
if "system" in kwargs:
body["system"] = kwargs["system"]
else:
# Extract from first message
for msg in messages:
if msg.get("role") == "system":
body["system"] = msg.get("content", "")
break
# Thinking capability (Claude 3.5+)
if kwargs.get("thinking_enabled"):
body["thinking"] = {
"type": "enabled",
"budget_tokens": kwargs.get("thinking_budget_tokens", 10000)
}
# Tool definitions
if tools:
body["tools"] = self._convert_tools(tools)
# Optional parameters
if "temperature" in kwargs:
body["temperature"] = kwargs["temperature"]
if "top_p" in kwargs:
body["top_p"] = kwargs["top_p"]
if "stop_sequences" in kwargs:
body["stop_sequences"] = kwargs["stop_sequences"]
return body, headers
def _convert_messages(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
"""Convert messages to Anthropic format
Anthropic message format:
- role: user, assistant
- content: str or List[Dict]
Args:
messages: OpenAI-format message list
Returns:
Anthropic-format message list
"""
result = []
for msg in messages:
role = msg.get("role")
content = msg.get("content", "")
# Skip system messages (handled separately)
if role == "system":
continue
# Process content
if isinstance(content, str):
anthropic_content = content
elif isinstance(content, dict):
anthropic_content = content.get("text", "")
else:
anthropic_content = str(content)
# Anthropic doesn't support tool role as message
if role == "tool":
# Tool results passed via tool_use block
continue
result.append({
"role": role,
"content": anthropic_content
})
return result
def _convert_tools(self, tools: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
"""Convert tool definitions to Anthropic format
Anthropic tool format:
{
"name": "function_name",
"description": "...",
"input_schema": {...} # JSON Schema
}
Args:
tools: OpenAI-format tool list
Returns:
Anthropic-format tool list
"""
result = []
for tool in tools:
func = tool.get("function", {})
result.append({
"name": func.get("name"),
"description": func.get("description", ""),
"input_schema": func.get("parameters", {"type": "object", "properties": {}})
})
return result
async def parse_stream_chunk(
self,
raw_chunk: str
) -> AsyncGenerator[ParsedDelta, None]:
"""Parse Anthropic-format SSE stream
Anthropic streaming events:
- message_start: Message start
- content_block_start: Content block start (thinking/text/tool_use)
- content_block_delta: Content block delta
- content_block_stop: Content block stop
- message_delta: Message delta (usage)
- message_stop: Message completely stopped
- error: Error
Args:
raw_chunk: Raw SSE line
Yields:
ParsedDelta objects
"""
# Reset buffers
self._reset_buffers()
try:
chunk = json.loads(raw_chunk.strip())
except json.JSONDecodeError:
return
chunk_type = chunk.get("type", "")
result = ParsedDelta()
if chunk_type == self.BLOCK_MESSAGE_START:
# Message start
pass
elif chunk_type == self.BLOCK_CONTENT_BLOCK_START:
# Content block start
block = chunk.get("content_block", {})
block_type = block.get("type")
index = chunk.get("index", 0)
if block_type == self.SUBTYPE_THINKING:
# Thinking block start
thinking_text = block.get("thinking", {}).get("thinking", "")
self._thinking_buffer = thinking_text
result.thinking = self._thinking_buffer
elif block_type == self.SUBTYPE_TOOL_USE:
# Tool use block start
self._current_tool_index = index
self._current_tool_name = block.get("name", "")
self._tool_args_buffer = ""
elif chunk_type == self.BLOCK_CONTENT_BLOCK_DELTA:
# Content block delta
delta = chunk.get("delta", {})
delta_type = delta.get("type", "")
if delta_type == self.DELTA_THINKING:
# Thinking delta
thinking = delta.get("thinking", "")
self._thinking_buffer += thinking
result.thinking = self._thinking_buffer
elif delta_type == self.DELTA_TEXT:
# Text delta
text = delta.get("text", "")
self._text_buffer += text
result.text = self._text_buffer
elif delta_type == self.DELTA_INPUT_JSON:
# Tool arguments delta - accumulate but don't return in result
partial_json = delta.get("partial_json", "")
self._tool_args_buffer += partial_json
elif chunk_type == self.BLOCK_CONTENT_BLOCK_STOP:
# Content block stop
pass
elif chunk_type == self.BLOCK_MESSAGE_DELTA:
# Message delta (usually contains usage)
delta = chunk.get("delta", {})
usage = chunk.get("usage", {})
self._usage = {
"prompt_tokens": usage.get("input_tokens", 0),
"completion_tokens": usage.get("output_tokens", 0),
"total_tokens": usage.get("input_tokens", 0) + usage.get("output_tokens", 0)
}
result.usage = self._usage
# Check if complete by stop reason
if delta.get("stop_reason"):
result.is_complete = True
elif chunk_type == self.BLOCK_MESSAGE_STOP:
# Message completely stopped
result.is_complete = True
elif chunk_type == self.BLOCK_ERROR:
# Error
error = chunk.get("error", {})
error_msg = error.get("type", "") + ": " + error.get("message", "")
logger.error(f"Anthropic API error: {error_msg}")
yield ParsedDelta()
return
yield result
def parse_response(
self,
data: Dict[str, Any]
) -> LLMResponse:
"""Parse Anthropic-format non-streaming response
Anthropic response format:
{
"id": "...",
"type": "message",
"role": "assistant",
"content": [
{"type": "text", "text": "..."},
{"type": "thinking", "thinking": "..."},
{"type": "tool_use", "id": "...", "name": "...", "input": {...}}
],
"model": "...",
"usage": {"input_tokens": ..., "output_tokens": ...}
}
Args:
data: API response data
Returns:
LLMResponse object
"""
contents = data.get("content", [])
text_parts = []
thinking = ""
tool_calls = []
for block in contents:
block_type = block.get("type")
if block_type == self.SUBTYPE_TEXT:
text_parts.append(block.get("text", ""))
elif block_type == self.SUBTYPE_THINKING:
thinking = block.get("thinking", "")
elif block_type == self.SUBTYPE_TOOL_USE:
tool_calls.append({
"id": block.get("id"),
"type": "function",
"function": {
"name": block.get("name"),
"arguments": json.dumps(block.get("input", {}))
}
})
usage = data.get("usage", {})
return LLMResponse(
content="\n".join(text_parts),
thinking=thinking,
tool_calls=tool_calls if tool_calls else None,
usage={
"prompt_tokens": usage.get("input_tokens", 0),
"completion_tokens": usage.get("output_tokens", 0),
"total_tokens": usage.get("input_tokens", 0) + usage.get("output_tokens", 0)
}
)
def supports_thinking(self) -> bool:
return True
def supports_tools(self) -> bool:
return True
def _reset_buffers(self):
"""Reset buffers (call when starting new message)"""
self._thinking_buffer = ""
self._text_buffer = ""
self._tool_args_buffer = ""
self._current_tool_index = -1
self._current_tool_name = ""
self._usage = {}

View File

@ -0,0 +1,97 @@
"""ProviderAdapter - LLM Provider adapter base class
Defines unified adapter interface that all Provider adapters must implement.
"""
from abc import ABC, abstractmethod
from typing import Dict, List, Any, AsyncGenerator
class ProviderAdapter(ABC):
"""LLM Provider adapter base class
All LLM API adapters must inherit from this class and implement its methods.
Adapters are responsible for:
1. Building requests in provider-specific format
2. Parsing streaming and non-streaming responses
3. Providing provider capability information
Attributes:
provider_type: Provider type identifier
"""
@property
@abstractmethod
def provider_type(self) -> str:
"""Return provider type identifier
Returns:
str: Provider type, e.g., "openai", "anthropic"
"""
pass
@abstractmethod
def build_request(
self,
model: str,
messages: List[Dict[str, Any]],
tools: List[Dict[str, Any]] = None,
**kwargs
) -> tuple[Dict[str, Any], Dict[str, str]]:
"""Build request body and headers
Args:
model: Model name
messages: Message list
tools: Tool definition list
**kwargs: Other parameters (temperature, max_tokens, thinking_enabled, etc.)
Returns:
tuple: (body, headers) tuple
"""
pass
@abstractmethod
async def parse_stream_chunk(
self,
raw_chunk: str
) -> AsyncGenerator[Any, None]:
"""Parse streaming response chunk
Args:
raw_chunk: Raw SSE line or chunk data
Yields:
ParsedDelta object or other parsed results
"""
pass
@abstractmethod
def parse_response(
self,
data: Dict[str, Any]
) -> Any:
"""Parse non-streaming response
Args:
data: API response data
Returns:
LLMResponse object
"""
pass
def supports_thinking(self) -> bool:
"""Whether provider supports thinking/reasoning
Returns:
bool: True if provider supports thinking/reasoning content
"""
return False
def supports_tools(self) -> bool:
"""Whether provider supports tool/function calls
Returns:
bool: True if provider supports function calling
"""
return False

View File

@ -0,0 +1,238 @@
"""OpenAI Adapter - OpenAI-compatible API adapter
Supports OpenAI, DeepSeek, GLM and other OpenAI-compatible APIs.
"""
import json
import logging
from typing import Dict, List, Any, AsyncGenerator
from .base import ProviderAdapter
from ..llm_response import ParsedDelta, LLMResponse, StreamAccumulator, llm_parser_factory
logger = logging.getLogger(__name__)
class OpenAIAdapter(ProviderAdapter):
"""OpenAI-compatible API adapter
Supported Providers:
- OpenAI (api.openai.com)
- DeepSeek (api.deepseek.com)
- GLM/Zhipu AI
- Any service compatible with OpenAI Chat Completions API
Features:
- Thinking content (reasoning_content, reasoning)
- Tool calls (tool_calls)
- Streaming responses (SSE)
"""
@property
def provider_type(self) -> str:
return "openai"
def __init__(self):
self._accumulator = llm_parser_factory()
def build_request(
self,
model: str,
messages: List[Dict[str, Any]],
tools: List[Dict[str, Any]] = None,
**kwargs
) -> tuple[Dict[str, Any], Dict[str, str]]:
"""Build OpenAI-format request"""
api_key = kwargs.get("api_key", "")
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {api_key}"
}
body = {
"model": model,
"messages": messages,
"stream": kwargs.get("stream", True)
}
# Optional parameters
if "temperature" in kwargs:
body["temperature"] = kwargs["temperature"]
if "max_tokens" in kwargs:
body["max_tokens"] = kwargs["max_tokens"]
if "top_p" in kwargs:
body["top_p"] = kwargs["top_p"]
if "frequency_penalty" in kwargs:
body["frequency_penalty"] = kwargs["frequency_penalty"]
if "presence_penalty" in kwargs:
body["presence_penalty"] = kwargs["presence_penalty"]
if "stop" in kwargs:
body["stop"] = kwargs["stop"]
# Tool definitions
if tools:
body["tools"] = tools
# Thinking capability (DeepSeek, etc.)
if kwargs.get("thinking_enabled"):
body["thinking_enabled"] = True
body["thoughts"] = [{"type": "thought", "text": ""}]
return body, headers
def reset(self):
"""Reset accumulator for new stream"""
self._accumulator.reset()
async def parse_stream_chunk(
self,
raw_chunk: str
) -> AsyncGenerator[ParsedDelta, None]:
"""Parse OpenAI-format SSE stream"""
# Parse SSE line
event_type, data_str = self._parse_sse_line(raw_chunk)
# Skip empty data
if not data_str:
return
# Handle [DONE] marker
if data_str == "[DONE]":
self._accumulator.set_complete()
yield self._accumulator._create_delta()
return
try:
chunk = json.loads(data_str)
except json.JSONDecodeError:
logger.warning(f"Failed to parse chunk: {data_str[:100]}")
return
# Handle errors
if event_type == "error" or "error" in chunk:
error_content = chunk.get("error", {}).get("message", str(chunk))
logger.error(f"Stream error: {error_content}")
yield ParsedDelta()
return
# Extract usage (usually in the last chunk)
usage = chunk.get("usage")
if usage:
self._accumulator.set_usage(usage)
# Parse choices
for choice in chunk.get("choices", []):
delta = choice.get("delta", {})
# Handle thinking content (DeepSeek, etc.)
thinking = delta.get("reasoning_content") or delta.get("reasoning") or ""
if thinking:
self._accumulator.thinking += thinking
self._accumulator.thinking = self._accumulator.thinking # trigger setter
# Handle text content
content = delta.get("content") or ""
if content:
# Check for embedded thinking tags
thinking_part, clean_text = self._extract_thinking_tags(content)
if thinking_part:
self._accumulator.thinking += thinking_part
if clean_text:
self._accumulator.text += clean_text
# Tool calls
tool_calls = delta.get("tool_calls")
if tool_calls:
self._accumulator.tool_calls = tool_calls
# Check if complete
finish_reason = choice.get("finish_reason")
if finish_reason:
self._accumulator.is_complete = True
# Only yield if there's meaningful content
if self._accumulator.has_content() or self._accumulator.is_complete:
yield self._accumulator._create_delta()
def parse_response(
self,
data: Dict[str, Any]
) -> LLMResponse:
"""Parse OpenAI-format non-streaming response"""
choice = data.get("choices", [{}])[0]
message = choice.get("message", {})
content = message.get("content", "") or ""
tool_calls = message.get("tool_calls")
usage = data.get("usage")
# Extract thinking content
thinking = ""
if content:
thinking, clean_content = self._extract_thinking_tags(content)
content = clean_content
# DeepSeek may put thinking content in separate field
if not thinking:
thinking = message.get("reasoning_content") or ""
return LLMResponse(
content=content,
thinking=thinking,
tool_calls=tool_calls,
usage=usage
)
def supports_thinking(self) -> bool:
return True
def supports_tools(self) -> bool:
return True
def _parse_sse_line(self, line: str) -> tuple:
"""Parse SSE line"""
event_type = None
data_str = None
for part in line.strip().split('\n'):
if part.startswith('event: '):
event_type = part[7:].strip()
elif part.startswith('data: '):
data_str = part[6:].strip()
return event_type, data_str
def _extract_thinking_tags(self, content: str) -> tuple:
"""Extract thinking tags from content
Supported formats:
- Standard: <think>...</think>
"""
thinking_parts = []
clean_parts = []
i = 0
while i < len(content):
remaining = content[i:].lower()
# Standard format
if remaining.startswith("<think>"):
end_tag = "</think>"
start = i + 7 # len("<think>")
end = content.find(end_tag, start)
if end != -1:
thinking_parts.append(content[start:end])
i = end + len(end_tag)
continue
# Regular character
clean_parts.append(content[i])
i += 1
return "".join(thinking_parts), "".join(clean_parts)

View File

@ -1,165 +1,258 @@
"""LLM API client""" """LLM API Client - Unified client with multi-Provider support
Supports various LLM API formats:
- OpenAI (api.openai.com)
- DeepSeek (api.deepseek.com)
- Anthropic (api.anthropic.com)
- GLM/Zhipu AI
Usage:
from luxx.services.llm_client import LLMClient
# Auto-detect provider
client = LLMClient(api_key="...", api_url="...")
# Specify provider
client = LLMClient(api_key="...", api_url="...", provider_type="anthropic")
# Streaming call
async for delta in client.stream_call(model, messages, tools=tools):
print(delta.text, delta.thinking, delta.tool_calls)
"""
import json import json
import httpx
import logging import logging
import traceback import traceback
from typing import Dict, Any, Optional, List, AsyncGenerator from typing import Dict, List, Any, Optional, AsyncGenerator
import httpx
from luxx.config import config from luxx.config import config
from luxx.services.llm_adapters import (
ProviderAdapter,
OpenAIAdapter,
AnthropicAdapter,
)
from luxx.services.llm_response import ParsedDelta, LLMResponse
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
class LLMResponse: class LLMClient:
"""LLM response""" """LLM API Client with multi-Provider support
content: str
tool_calls: Optional[List[Dict]] = None Uses adapter pattern to support different API formats, auto-detects or manually specifies Provider type.
usage: Optional[Dict] = None
Attributes:
api_key: API key
api_url: API base URL
default_model: Default model
provider_type: Provider type
adapter: Current adapter instance
"""
# Provider type to adapter class mapping
PROVIDER_ADAPTERS: Dict[str, type] = {
# OpenAI-compatible formats
"openai": OpenAIAdapter,
"deepseek": OpenAIAdapter,
"glm": OpenAIAdapter,
"zhipu": OpenAIAdapter,
# Anthropic formats
"anthropic": AnthropicAdapter,
"claude": AnthropicAdapter,
}
# URL keywords for provider detection
PROVIDER_URL_KEYWORDS: Dict[str, List[str]] = {
"anthropic": ["anthropic", "claude"],
"deepseek": ["deepseek"],
"glm": ["glm", "zhipu", "chatglm"],
"openai": ["openai"],
}
def __init__( def __init__(
self, self,
content: str = "", api_key: str = None,
tool_calls: Optional[List[Dict]] = None, api_url: str = None,
usage: Optional[Dict] = None model: str = None,
provider_type: str = None
): ):
self.content = content """Initialize LLM client
self.tool_calls = tool_calls
self.usage = usage
Args:
class LLMClient: api_key: API key, defaults to config value
"""LLM API client with multi-provider support""" api_url: API base URL, defaults to config value
model: Default model name
def __init__(self, api_key: str = None, api_url: str = None, model: str = None): provider_type: Specify Provider type, defaults to auto-detect
"""
self.api_key = api_key or config.llm_api_key self.api_key = api_key or config.llm_api_key
self.api_url = api_url or config.llm_api_url self.api_url = api_url or config.llm_api_url
self.default_model = model self.default_model = model
self.provider = self._detect_provider()
# Detect or use specified provider
if provider_type:
self.provider_type = provider_type
else:
self.provider_type = self._detect_provider_type(api_url)
self.adapter = self._create_adapter()
self._client: Optional[httpx.AsyncClient] = None self._client: Optional[httpx.AsyncClient] = None
def _detect_provider(self) -> str: def _detect_provider_type(self, url: str = None) -> str:
"""Detect provider from URL""" """Detect Provider type from URL
url = self.api_url.lower()
if "deepseek" in url: Args:
return "deepseek" url: API URL, uses self.api_url if None
elif "glm" in url or "zhipu" in url:
return "glm" Returns:
elif "openai" in url: Provider type string
return "openai" """
url = url or self.api_url
url_lower = url.lower()
for provider, keywords in self.PROVIDER_URL_KEYWORDS.items():
for keyword in keywords:
if keyword in url_lower:
logger.debug(f"Detected provider '{provider}' from URL: {url}")
return provider
logger.debug(f"Defaulting to 'openai' for URL: {url}")
return "openai" return "openai"
async def close(self): def _create_adapter(self) -> ProviderAdapter:
"""Close client""" """Create adapter instance
if self._client:
await self._client.aclose()
self._client = None
def _build_headers(self) -> Dict[str, str]: Returns:
"""Build request headers""" ProviderAdapter subclass instance
return { """
"Content-Type": "application/json", adapter_class = self.PROVIDER_ADAPTERS.get(
"Authorization": f"Bearer {self.api_key}" self.provider_type,
} OpenAIAdapter
def _build_body(
self,
model: str,
messages: List[Dict],
tools: Optional[List[Dict]] = None,
stream: bool = False,
**kwargs
) -> Dict[str, Any]:
"""Build request body"""
body = {
"model": model,
"messages": messages,
"stream": stream
}
if "temperature" in kwargs:
body["temperature"] = kwargs["temperature"]
if "max_tokens" in kwargs:
body["max_tokens"] = kwargs["max_tokens"]
if "thinking_enabled" in kwargs and kwargs["thinking_enabled"]:
body["thinking_enabled"] = True
if tools:
body["tools"] = tools
return body
def _parse_response(self, data: Dict) -> LLMResponse:
"""Parse response"""
content = ""
tool_calls = None
usage = None
if "choices" in data:
choice = data["choices"][0]
content = choice.get("message", {}).get("content", "")
tool_calls = choice.get("message", {}).get("tool_calls")
if "usage" in data:
usage = data["usage"]
return LLMResponse(
content=content,
tool_calls=tool_calls,
usage=usage
) )
logger.info(f"Created {adapter_class.__name__} for provider: {self.provider_type}")
return adapter_class()
@property
def supports_thinking(self) -> bool:
"""Whether current Provider supports thinking content"""
return self.adapter.supports_thinking()
@property
def supports_tools(self) -> bool:
"""Whether current Provider supports tool calls"""
return self.adapter.supports_tools()
async def client(self) -> httpx.AsyncClient: async def client(self) -> httpx.AsyncClient:
"""Get HTTP client""" """Get HTTP client (lazy load)"""
if self._client is None: if self._client is None or self._client.is_closed:
self._client = httpx.AsyncClient(timeout=120.0) self._client = httpx.AsyncClient(timeout=120.0)
return self._client return self._client
async def sync_call( async def close(self):
"""Close HTTP client"""
if self._client and not self._client.is_closed:
await self._client.aclose()
self._client = None
def sync_call(
self, self,
model: str, model: str,
messages: List[Dict], messages: List[Dict[str, Any]],
tools: Optional[List[Dict]] = None, tools: List[Dict[str, Any]] = None,
**kwargs **kwargs
) -> LLMResponse: ) -> LLMResponse:
"""Call LLM API (non-streaming)""" """Synchronous call to LLM (non-streaming)
body = self._build_body(model, messages, tools, stream=False, **kwargs)
Args:
model: Model name
messages: Message list
tools: Tool definition list
**kwargs: Other parameters (temperature, max_tokens, thinking_enabled, etc.)
Returns:
LLMResponse object
"""
import asyncio
return asyncio.get_event_loop().run_until_complete(
self.async_sync_call(model, messages, tools, **kwargs)
)
async def async_sync_call(
self,
model: str,
messages: List[Dict[str, Any]],
tools: List[Dict[str, Any]] = None,
**kwargs
) -> LLMResponse:
"""Internal async sync call"""
model = model or self.default_model
kwargs["api_key"] = self.api_key
body, headers = self.adapter.build_request(
model, messages, tools, stream=False, **kwargs
)
endpoint = self.api_url
logger.info(f"Sync call to {endpoint} with model {model}")
try:
async with httpx.AsyncClient(timeout=120.0) as client: async with httpx.AsyncClient(timeout=120.0) as client:
response = await client.post( response = await client.post(
self.api_url, endpoint,
headers=self._build_headers(), headers=headers,
json=body json=body
) )
response.raise_for_status() response.raise_for_status()
data = response.json() data = response.json()
return self._parse_response(data) return self.adapter.parse_response(data)
except httpx.HTTPStatusError as e:
logger.error(f"HTTP error: {e.response.status_code} - {e.response.text}")
raise
except Exception as e:
logger.error(f"Sync call error: {e}\n{traceback.format_exc()}")
raise
async def stream_call( async def stream_call(
self, self,
model: str, model: str,
messages: List[Dict], messages: List[Dict[str, Any]],
tools: Optional[List[Dict]] = None, tools: List[Dict[str, Any]] = None,
**kwargs **kwargs
) -> AsyncGenerator[str, None]: ) -> AsyncGenerator[ParsedDelta, None]:
"""Stream call LLM API - yields raw SSE event lines """Streaming call to LLM
Args:
model: Model name
messages: Message list
tools: Tool definition list
**kwargs: Other parameters
Yields: Yields:
str: Raw SSE event lines for direct forwarding ParsedDelta objects with accumulated content
""" """
body = self._build_body(model, messages, tools, stream=True, **kwargs) # Reset adapter buffers for new stream
if hasattr(self.adapter, 'reset'):
self.adapter.reset()
logger.info(f"Starting stream_call for model: {model}, messages count: {len(messages)}") model = model or self.default_model
kwargs["api_key"] = self.api_key
kwargs["stream"] = True
body, headers = self.adapter.build_request(
model, messages, tools, **kwargs
)
endpoint = self.api_url
logger.info(f"Stream call to {endpoint} with model {model}")
try: try:
async with httpx.AsyncClient(timeout=120.0) as client: async with httpx.AsyncClient(timeout=120.0) as client:
logger.info(f"Sending request to {self.api_url}")
async with client.stream( async with client.stream(
"POST", "POST",
self.api_url, endpoint,
headers=self._build_headers(), headers=headers,
json=body json=body
) as response: ) as response:
logger.info(f"Response status: {response.status_code}") logger.info(f"Response status: {response.status_code}")
@ -167,15 +260,40 @@ class LLMClient:
async for line in response.aiter_lines(): async for line in response.aiter_lines():
if line.strip(): if line.strip():
yield line + "\n" async for delta in self.adapter.parse_stream_chunk(line):
yield delta
except httpx.HTTPStatusError as e: except httpx.HTTPStatusError as e:
status_code = e.response.status_code if e.response else "?" status_code = e.response.status_code if e.response else "?"
logger.error(f"HTTP error: {status_code}") error_body = e.response.text if e.response else ""
yield f"event: error\ndata: {json.dumps({'content': f'HTTP {status_code}: Request failed'})}\n\n" logger.error(f"HTTP error: {status_code} - {error_body}")
yield ParsedDelta()
except Exception as e: except Exception as e:
logger.error(f"Exception: {type(e).__name__}: {str(e)}\n{traceback.format_exc()}") logger.error(f"Stream error: {type(e).__name__}: {e}\n{traceback.format_exc()}")
yield f"event: error\ndata: {json.dumps({'content': str(e)})}\n\n" yield ParsedDelta()
# Global LLM client # Convenience function
llm_client = LLMClient() def create_client(
api_key: str = None,
api_url: str = None,
model: str = None,
provider_type: str = None
) -> LLMClient:
"""Convenience function to create LLM client
Args:
api_key: API key
api_url: API URL
model: Model
provider_type: Provider type
Returns:
LLMClient instance
"""
return LLMClient(
api_key=api_key,
api_url=api_url,
model=model,
provider_type=provider_type
)

View File

@ -1,309 +1,162 @@
"""LLM Response Parser - Unified parser for multiple LLM API formats. """LLM Response - Unified LLM response data structures
Supported Providers: Provides unified response format for data transfer between adapter layer and business layer.
- OpenAI: delta.content, delta.tool_calls
- DeepSeek: delta.content, delta.reasoning_content, delta.tool_calls
- Anthropic: content_block with thinking/text types
- MiniMax: <|im_start|>thinking...<|im_end|> tags in content
Data Flow: Data format:
``` 1. ParsedDelta: Single delta in streaming response (with accumulated content)
LLM API Response (SSE) 2. LLMResponse: Complete LLM response
3. StreamAccumulator: Helper class for accumulating streaming content
LLMResponseParser.parse_chunk()
ParsedDelta { thinking, text, tool_calls }
AgenticLoop._process_stream_line()
SSE Events (process_step)
type: "thinking"
type: "text"
type: "tool_call"
```
API Response Formats:
1. OpenAI Standard (DeepSeek, OpenAI):
```json
{
"choices": [{
"delta": {
"content": "Hello",
"reasoning_content": "Let me think...",
"tool_calls": [{"id": "call_1", "function": {...}}]
}
}]
}
```
2. Anthropic Streaming:
```json
{"type": "content_block_start", "content_block": {"type": "thinking", "thinking": "..."}}
{"type": "content_block_delta", "delta": {"type": "thinking_delta", "thinking": "..."}}
{"type": "content_block_delta", "delta": {"type": "text_delta", "text": "..."}}
{"type": "content_block_stop"}
```
3. MiniMax (with thinking tags in content):
```json
{
"choices": [{
"delta": {
"content": "<|im_start|>thinking分析中...<|im_end|>回复内容"
}
}]
}
```
4. Standard thinking tags:
```json
{
"choices": [{
"delta": {
"content": "<think>思考内容</think>回复内容"
}
}]
}
```
""" """
from typing import Dict, Any, Optional, List from typing import Dict, Any, Optional, List
from dataclasses import dataclass from dataclasses import dataclass, field
@dataclass @dataclass
class ParsedDelta: class ParsedDelta:
"""Parsed response delta from LLM. """Streaming response delta
Contains accumulated content from streaming response.
Attributes: Attributes:
thinking: Thinking/reasoning content thinking: Accumulated thinking/reasoning content
text: Regular text content text: Accumulated text content
tool_calls: Tool call requests tool_calls: List of tool calls
is_complete: Whether this delta completes a content block is_complete: Whether the response is complete
usage: Token usage statistics
""" """
thinking: str = "" thinking: str = ""
text: str = "" text: str = ""
tool_calls: List[Dict] = None tool_calls: List[Dict] = field(default_factory=list)
is_complete: bool = False is_complete: bool = False
usage: Dict[str, int] = None
def __post_init__(self): def __post_init__(self):
if self.tool_calls is None: if self.tool_calls is None:
self.tool_calls = [] self.tool_calls = []
if self.usage is None:
self.usage = {}
def has_content(self) -> bool:
"""Check if there's any meaningful content"""
return bool(self.thinking or self.text or self.tool_calls)
def merge(self, other: 'ParsedDelta') -> 'ParsedDelta':
"""Merge another delta"""
return ParsedDelta(
thinking=self.thinking + other.thinking,
text=self.text + other.text,
tool_calls=self.tool_calls or other.tool_calls,
is_complete=other.is_complete,
usage=other.usage or self.usage
)
class LLMResponseParser: @dataclass
"""Unified parser for LLM API response formats. class LLMResponse:
"""Complete LLM response
Usage: Attributes:
from luxx.services.llm_response import llm_parser content: Final text content
thinking: Final thinking content (if any)
# Parse OpenAI format tool_calls: List of tool calls (if any)
delta = {"content": "Hello", "reasoning_content": "Thinking..."} usage: Token usage
parsed = llm_parser.parse_openai(delta)
# Parse Anthropic format
chunk = {"type": "content_block_delta", "delta": {"type": "thinking_delta", "thinking": "..."}}
parsed = llm_parser.parse_anthropic(chunk)
# Auto-detect format
parsed = llm_parser.parse_chunk(chunk, provider="anthropic")
""" """
content: str = ""
thinking: str = ""
tool_calls: List[Dict] = field(default_factory=list)
usage: Dict[str, int] = None
# Content block types def __post_init__(self):
BLOCK_THINKING = "thinking" if self.tool_calls is None:
BLOCK_TEXT = "text" self.tool_calls = []
BLOCK_TOOL_USE = "tool_use" if self.usage is None:
BLOCK_TOOL_RESULT = "tool_result" self.usage = {}
@property
def has_tool_calls(self) -> bool:
"""Check if there are tool calls"""
return bool(self.tool_calls)
def to_message_content(self) -> str:
"""Convert to message content format"""
return self.content
class StreamAccumulator:
"""Accumulates streaming response content
Helper class for adapters to accumulate streaming content
and create ParsedDelta with accumulated results.
"""
def __init__(self): def __init__(self):
self._buffer = "" self.reset()
self._thinking_buffer = ""
self._text_buffer = ""
def reset(self): def reset(self):
"""Reset parser state for new message.""" """Reset all buffers for new stream"""
self._buffer = "" self.thinking = ""
self._thinking_buffer = "" self.text = ""
self._text_buffer = "" self.tool_calls: List[Dict] = []
self.is_complete = False
self.usage: Dict[str, int] = {}
def parse_openai(self, delta: Dict) -> ParsedDelta: def has_content(self) -> bool:
"""Parse OpenAI format delta. """Check if there's any meaningful content"""
return bool(self.thinking or self.text or self.tool_calls)
Handles: def add_thinking(self, content: str) -> 'ParsedDelta':
- OpenAI: delta.content, delta.tool_calls """Add thinking/reasoning content"""
- DeepSeek: delta.content, delta.reasoning_content, delta.tool_calls if content:
- MiniMax: <|im_start|>thinking...<|im_end|> in content self.thinking = content
- Standard: <think>...</think> in content return self._create_delta()
Args: def add_text(self, content: str) -> 'ParsedDelta':
delta: Delta object from LLM API response """Add text content"""
if content:
self.text = content
return self._create_delta()
Returns: def add_tool_calls(self, tool_calls: List[Dict]) -> 'ParsedDelta':
ParsedDelta with extracted thinking, text, and tool_calls """Add tool calls"""
""" if tool_calls:
result = ParsedDelta() self.tool_calls = tool_calls
return self._create_delta()
# Get thinking content (DeepSeek uses reasoning_content) def set_complete(self, is_complete: bool = True) -> 'ParsedDelta':
thinking = delta.get("reasoning_content") or delta.get("reasoning") or "" """Mark response as complete"""
if thinking: self.is_complete = is_complete
self._thinking_buffer += thinking return self._create_delta()
result.thinking = self._thinking_buffer
# Get text content def set_usage(self, usage: Dict[str, int]) -> 'ParsedDelta':
text = delta.get("content") or "" """Set token usage"""
if text: if usage:
# Check for embedded thinking tags (MiniMax format) self.usage = usage
thinking_part, clean_text = self._extract_thinking_tags(text) return self._create_delta()
if thinking_part:
self._thinking_buffer += thinking_part
result.thinking = self._thinking_buffer
if clean_text:
self._text_buffer += clean_text
result.text = self._text_buffer
elif thinking_part := delta.get("thinking"):
# Some providers use "thinking" field directly
self._thinking_buffer += thinking_part
result.thinking = self._thinking_buffer
# Tool calls def _create_delta(self) -> ParsedDelta:
result.tool_calls = delta.get("tool_calls") or [] """Create ParsedDelta from current accumulated state"""
return ParsedDelta(
thinking=self.thinking,
text=self.text,
tool_calls=self.tool_calls,
is_complete=self.is_complete,
usage=self.usage
)
return result def to_response(self) -> LLMResponse:
"""Convert to complete LLMResponse"""
def parse_anthropic(self, chunk: Dict) -> ParsedDelta: return LLMResponse(
"""Parse Anthropic streaming format. content=self.text,
thinking=self.thinking,
Anthropic uses a different event structure: tool_calls=self.tool_calls,
- content_block_start: Begin a content block usage=self.usage
- content_block_delta: Incremental content )
- content_block_stop: End of content blocks
Content block types:
- thinking: Model reasoning
- text: Regular text
- tool_use: Tool invocation
- tool_result: Tool output
Args:
chunk: Anthropic SSE event chunk
Returns:
ParsedDelta with extracted content
"""
result = ParsedDelta()
chunk_type = chunk.get("type", "")
if chunk_type == "content_block_start":
block = chunk.get("content_block", {})
if block.get("type") == self.BLOCK_THINKING:
thinking = block.get("thinking", "")
if thinking:
self._thinking_buffer = thinking
result.thinking = self._thinking_buffer
elif chunk_type == "content_block_delta":
delta = chunk.get("delta", {})
delta_type = delta.get("type", "")
if delta_type == "thinking_delta":
thinking = delta.get("thinking", "")
self._thinking_buffer += thinking
result.thinking = self._thinking_buffer
elif delta_type == "text_delta":
text = delta.get("text", "")
self._text_buffer += text
result.text = self._text_buffer
elif delta_type == "partial_json":
# Partial JSON for tool calls
pass
elif chunk_type == "content_block_stop":
result.is_complete = True
return result
def parse_chunk(self, chunk: Dict, provider: str = "openai") -> ParsedDelta:
"""Parse chunk based on provider.
Args:
chunk: Response chunk from LLM
provider: Provider name ("openai", "anthropic", "deepseek", "minimax")
Returns:
ParsedDelta with extracted content
"""
if provider == "anthropic":
return self.parse_anthropic(chunk)
# Default to OpenAI format
return self.parse_openai(chunk)
def _extract_thinking_tags(self, content: str) -> tuple:
"""Extract thinking content from tags.
Handles multiple tag formats:
- MiniMax: <|im_start|>thinking...<|im_end|>
- Standard: <think>...</think>
Args:
content: Raw content string from LLM
Returns:
Tuple of (thinking_content, clean_text)
"""
thinking_parts = []
clean_parts = []
i = 0
while i < len(content):
remaining = content[i:].lower()
# Check for MiniMax format
if remaining.startswith("<|im_start|>thinking"):
end_tag = "<|im_end|>"
start = i + 21 # len("<|im_start|>thinking")
end = content.find(end_tag, start)
if end != -1:
thinking_parts.append(content[start:end])
i = end + len(end_tag)
continue
# Check for standard format
if remaining.startswith("<think>"):
end_tag = "</think>"
start = i + 7 # len("<think>")
end = content.find(end_tag, start)
if end != -1:
thinking_parts.append(content[start:end])
i = end + len(end_tag)
continue
# Regular character
clean_parts.append(content[i])
i += 1
return "".join(thinking_parts), "".join(clean_parts)
def has_thinking_tags(self, content: str) -> bool:
"""Check if content contains thinking tags.
Args:
content: Raw content string
Returns:
True if content contains thinking tags
"""
if not content:
return False
lower = content.lower()
return "<|im_start|>thinking" in lower or "<think>" in lower
# Global parser instance # Backward compatibility alias
llm_parser = LLMResponseParser() LLMResponseParser = StreamAccumulator
def llm_parser_factory() -> StreamAccumulator:
"""Factory function to create a new StreamAccumulator"""
return StreamAccumulator()