Luxx/luxx/services/llm_response.py

310 lines
9.2 KiB
Python

"""LLM Response Parser - Unified parser for multiple LLM API formats.
Supported Providers:
- OpenAI: delta.content, delta.tool_calls
- DeepSeek: delta.content, delta.reasoning_content, delta.tool_calls
- Anthropic: content_block with thinking/text types
- MiniMax: <|im_start|>thinking...<|im_end|> tags in content
Data Flow:
```
LLM API Response (SSE)
LLMResponseParser.parse_chunk()
├──► ParsedDelta { thinking, text, tool_calls }
AgenticLoop._process_stream_line()
SSE Events (process_step)
├──► type: "thinking"
├──► type: "text"
└──► type: "tool_call"
```
API Response Formats:
1. OpenAI Standard (DeepSeek, OpenAI):
```json
{
"choices": [{
"delta": {
"content": "Hello",
"reasoning_content": "Let me think...",
"tool_calls": [{"id": "call_1", "function": {...}}]
}
}]
}
```
2. Anthropic Streaming:
```json
{"type": "content_block_start", "content_block": {"type": "thinking", "thinking": "..."}}
{"type": "content_block_delta", "delta": {"type": "thinking_delta", "thinking": "..."}}
{"type": "content_block_delta", "delta": {"type": "text_delta", "text": "..."}}
{"type": "content_block_stop"}
```
3. MiniMax (with thinking tags in content):
```json
{
"choices": [{
"delta": {
"content": "<|im_start|>thinking分析中...<|im_end|>回复内容"
}
}]
}
```
4. Standard thinking tags:
```json
{
"choices": [{
"delta": {
"content": "<think>思考内容</think>回复内容"
}
}]
}
```
"""
from typing import Dict, Any, Optional, List
from dataclasses import dataclass
@dataclass
class ParsedDelta:
"""Parsed response delta from LLM.
Attributes:
thinking: Thinking/reasoning content
text: Regular text content
tool_calls: Tool call requests
is_complete: Whether this delta completes a content block
"""
thinking: str = ""
text: str = ""
tool_calls: List[Dict] = None
is_complete: bool = False
def __post_init__(self):
if self.tool_calls is None:
self.tool_calls = []
class LLMResponseParser:
"""Unified parser for LLM API response formats.
Usage:
from luxx.services.llm_response import llm_parser
# Parse OpenAI format
delta = {"content": "Hello", "reasoning_content": "Thinking..."}
parsed = llm_parser.parse_openai(delta)
# Parse Anthropic format
chunk = {"type": "content_block_delta", "delta": {"type": "thinking_delta", "thinking": "..."}}
parsed = llm_parser.parse_anthropic(chunk)
# Auto-detect format
parsed = llm_parser.parse_chunk(chunk, provider="anthropic")
"""
# Content block types
BLOCK_THINKING = "thinking"
BLOCK_TEXT = "text"
BLOCK_TOOL_USE = "tool_use"
BLOCK_TOOL_RESULT = "tool_result"
def __init__(self):
self._buffer = ""
self._thinking_buffer = ""
self._text_buffer = ""
def reset(self):
"""Reset parser state for new message."""
self._buffer = ""
self._thinking_buffer = ""
self._text_buffer = ""
def parse_openai(self, delta: Dict) -> ParsedDelta:
"""Parse OpenAI format delta.
Handles:
- OpenAI: delta.content, delta.tool_calls
- DeepSeek: delta.content, delta.reasoning_content, delta.tool_calls
- MiniMax: <|im_start|>thinking...<|im_end|> in content
- Standard: <think>...</think> in content
Args:
delta: Delta object from LLM API response
Returns:
ParsedDelta with extracted thinking, text, and tool_calls
"""
result = ParsedDelta()
# Get thinking content (DeepSeek uses reasoning_content)
thinking = delta.get("reasoning_content") or delta.get("reasoning") or ""
if thinking:
self._thinking_buffer += thinking
result.thinking = self._thinking_buffer
# Get text content
text = delta.get("content") or ""
if text:
# Check for embedded thinking tags (MiniMax format)
thinking_part, clean_text = self._extract_thinking_tags(text)
if thinking_part:
self._thinking_buffer += thinking_part
result.thinking = self._thinking_buffer
if clean_text:
self._text_buffer += clean_text
result.text = self._text_buffer
elif thinking_part := delta.get("thinking"):
# Some providers use "thinking" field directly
self._thinking_buffer += thinking_part
result.thinking = self._thinking_buffer
# Tool calls
result.tool_calls = delta.get("tool_calls") or []
return result
def parse_anthropic(self, chunk: Dict) -> ParsedDelta:
"""Parse Anthropic streaming format.
Anthropic uses a different event structure:
- content_block_start: Begin a content block
- content_block_delta: Incremental content
- content_block_stop: End of content blocks
Content block types:
- thinking: Model reasoning
- text: Regular text
- tool_use: Tool invocation
- tool_result: Tool output
Args:
chunk: Anthropic SSE event chunk
Returns:
ParsedDelta with extracted content
"""
result = ParsedDelta()
chunk_type = chunk.get("type", "")
if chunk_type == "content_block_start":
block = chunk.get("content_block", {})
if block.get("type") == self.BLOCK_THINKING:
thinking = block.get("thinking", "")
if thinking:
self._thinking_buffer = thinking
result.thinking = self._thinking_buffer
elif chunk_type == "content_block_delta":
delta = chunk.get("delta", {})
delta_type = delta.get("type", "")
if delta_type == "thinking_delta":
thinking = delta.get("thinking", "")
self._thinking_buffer += thinking
result.thinking = self._thinking_buffer
elif delta_type == "text_delta":
text = delta.get("text", "")
self._text_buffer += text
result.text = self._text_buffer
elif delta_type == "partial_json":
# Partial JSON for tool calls
pass
elif chunk_type == "content_block_stop":
result.is_complete = True
return result
def parse_chunk(self, chunk: Dict, provider: str = "openai") -> ParsedDelta:
"""Parse chunk based on provider.
Args:
chunk: Response chunk from LLM
provider: Provider name ("openai", "anthropic", "deepseek", "minimax")
Returns:
ParsedDelta with extracted content
"""
if provider == "anthropic":
return self.parse_anthropic(chunk)
# Default to OpenAI format
return self.parse_openai(chunk)
def _extract_thinking_tags(self, content: str) -> tuple:
"""Extract thinking content from tags.
Handles multiple tag formats:
- MiniMax: <|im_start|>thinking...<|im_end|>
- Standard: <think>...</think>
Args:
content: Raw content string from LLM
Returns:
Tuple of (thinking_content, clean_text)
"""
thinking_parts = []
clean_parts = []
i = 0
while i < len(content):
remaining = content[i:].lower()
# Check for MiniMax format
if remaining.startswith("<|im_start|>thinking"):
end_tag = "<|im_end|>"
start = i + 21 # len("<|im_start|>thinking")
end = content.find(end_tag, start)
if end != -1:
thinking_parts.append(content[start:end])
i = end + len(end_tag)
continue
# Check for standard format
if remaining.startswith("<think>"):
end_tag = "</think>"
start = i + 7 # len("<think>")
end = content.find(end_tag, start)
if end != -1:
thinking_parts.append(content[start:end])
i = end + len(end_tag)
continue
# Regular character
clean_parts.append(content[i])
i += 1
return "".join(thinking_parts), "".join(clean_parts)
def has_thinking_tags(self, content: str) -> bool:
"""Check if content contains thinking tags.
Args:
content: Raw content string
Returns:
True if content contains thinking tags
"""
if not content:
return False
lower = content.lower()
return "<|im_start|>thinking" in lower or "<think>" in lower
# Global parser instance
llm_parser = LLMResponseParser()