310 lines
9.2 KiB
Python
310 lines
9.2 KiB
Python
"""LLM Response Parser - Unified parser for multiple LLM API formats.
|
|
|
|
Supported Providers:
|
|
- OpenAI: delta.content, delta.tool_calls
|
|
- DeepSeek: delta.content, delta.reasoning_content, delta.tool_calls
|
|
- Anthropic: content_block with thinking/text types
|
|
- MiniMax: <|im_start|>thinking...<|im_end|> tags in content
|
|
|
|
Data Flow:
|
|
```
|
|
LLM API Response (SSE)
|
|
│
|
|
▼
|
|
LLMResponseParser.parse_chunk()
|
|
│
|
|
├──► ParsedDelta { thinking, text, tool_calls }
|
|
│
|
|
▼
|
|
AgenticLoop._process_stream_line()
|
|
│
|
|
▼
|
|
SSE Events (process_step)
|
|
│
|
|
├──► type: "thinking"
|
|
├──► type: "text"
|
|
└──► type: "tool_call"
|
|
```
|
|
|
|
API Response Formats:
|
|
|
|
1. OpenAI Standard (DeepSeek, OpenAI):
|
|
```json
|
|
{
|
|
"choices": [{
|
|
"delta": {
|
|
"content": "Hello",
|
|
"reasoning_content": "Let me think...",
|
|
"tool_calls": [{"id": "call_1", "function": {...}}]
|
|
}
|
|
}]
|
|
}
|
|
```
|
|
|
|
2. Anthropic Streaming:
|
|
```json
|
|
{"type": "content_block_start", "content_block": {"type": "thinking", "thinking": "..."}}
|
|
{"type": "content_block_delta", "delta": {"type": "thinking_delta", "thinking": "..."}}
|
|
{"type": "content_block_delta", "delta": {"type": "text_delta", "text": "..."}}
|
|
{"type": "content_block_stop"}
|
|
```
|
|
|
|
3. MiniMax (with thinking tags in content):
|
|
```json
|
|
{
|
|
"choices": [{
|
|
"delta": {
|
|
"content": "<|im_start|>thinking分析中...<|im_end|>回复内容"
|
|
}
|
|
}]
|
|
}
|
|
```
|
|
|
|
4. Standard thinking tags:
|
|
```json
|
|
{
|
|
"choices": [{
|
|
"delta": {
|
|
"content": "<think>思考内容</think>回复内容"
|
|
}
|
|
}]
|
|
}
|
|
```
|
|
"""
|
|
from typing import Dict, Any, Optional, List
|
|
from dataclasses import dataclass
|
|
|
|
|
|
@dataclass
|
|
class ParsedDelta:
|
|
"""Parsed response delta from LLM.
|
|
|
|
Attributes:
|
|
thinking: Thinking/reasoning content
|
|
text: Regular text content
|
|
tool_calls: Tool call requests
|
|
is_complete: Whether this delta completes a content block
|
|
"""
|
|
thinking: str = ""
|
|
text: str = ""
|
|
tool_calls: List[Dict] = None
|
|
is_complete: bool = False
|
|
|
|
def __post_init__(self):
|
|
if self.tool_calls is None:
|
|
self.tool_calls = []
|
|
|
|
|
|
class LLMResponseParser:
|
|
"""Unified parser for LLM API response formats.
|
|
|
|
Usage:
|
|
from luxx.services.llm_response import llm_parser
|
|
|
|
# Parse OpenAI format
|
|
delta = {"content": "Hello", "reasoning_content": "Thinking..."}
|
|
parsed = llm_parser.parse_openai(delta)
|
|
|
|
# Parse Anthropic format
|
|
chunk = {"type": "content_block_delta", "delta": {"type": "thinking_delta", "thinking": "..."}}
|
|
parsed = llm_parser.parse_anthropic(chunk)
|
|
|
|
# Auto-detect format
|
|
parsed = llm_parser.parse_chunk(chunk, provider="anthropic")
|
|
"""
|
|
|
|
# Content block types
|
|
BLOCK_THINKING = "thinking"
|
|
BLOCK_TEXT = "text"
|
|
BLOCK_TOOL_USE = "tool_use"
|
|
BLOCK_TOOL_RESULT = "tool_result"
|
|
|
|
def __init__(self):
|
|
self._buffer = ""
|
|
self._thinking_buffer = ""
|
|
self._text_buffer = ""
|
|
|
|
def reset(self):
|
|
"""Reset parser state for new message."""
|
|
self._buffer = ""
|
|
self._thinking_buffer = ""
|
|
self._text_buffer = ""
|
|
|
|
def parse_openai(self, delta: Dict) -> ParsedDelta:
|
|
"""Parse OpenAI format delta.
|
|
|
|
Handles:
|
|
- OpenAI: delta.content, delta.tool_calls
|
|
- DeepSeek: delta.content, delta.reasoning_content, delta.tool_calls
|
|
- MiniMax: <|im_start|>thinking...<|im_end|> in content
|
|
- Standard: <think>...</think> in content
|
|
|
|
Args:
|
|
delta: Delta object from LLM API response
|
|
|
|
Returns:
|
|
ParsedDelta with extracted thinking, text, and tool_calls
|
|
"""
|
|
result = ParsedDelta()
|
|
|
|
# Get thinking content (DeepSeek uses reasoning_content)
|
|
thinking = delta.get("reasoning_content") or delta.get("reasoning") or ""
|
|
if thinking:
|
|
self._thinking_buffer += thinking
|
|
result.thinking = self._thinking_buffer
|
|
|
|
# Get text content
|
|
text = delta.get("content") or ""
|
|
if text:
|
|
# Check for embedded thinking tags (MiniMax format)
|
|
thinking_part, clean_text = self._extract_thinking_tags(text)
|
|
if thinking_part:
|
|
self._thinking_buffer += thinking_part
|
|
result.thinking = self._thinking_buffer
|
|
if clean_text:
|
|
self._text_buffer += clean_text
|
|
result.text = self._text_buffer
|
|
elif thinking_part := delta.get("thinking"):
|
|
# Some providers use "thinking" field directly
|
|
self._thinking_buffer += thinking_part
|
|
result.thinking = self._thinking_buffer
|
|
|
|
# Tool calls
|
|
result.tool_calls = delta.get("tool_calls") or []
|
|
|
|
return result
|
|
|
|
def parse_anthropic(self, chunk: Dict) -> ParsedDelta:
|
|
"""Parse Anthropic streaming format.
|
|
|
|
Anthropic uses a different event structure:
|
|
- content_block_start: Begin a content block
|
|
- content_block_delta: Incremental content
|
|
- content_block_stop: End of content blocks
|
|
|
|
Content block types:
|
|
- thinking: Model reasoning
|
|
- text: Regular text
|
|
- tool_use: Tool invocation
|
|
- tool_result: Tool output
|
|
|
|
Args:
|
|
chunk: Anthropic SSE event chunk
|
|
|
|
Returns:
|
|
ParsedDelta with extracted content
|
|
"""
|
|
result = ParsedDelta()
|
|
chunk_type = chunk.get("type", "")
|
|
|
|
if chunk_type == "content_block_start":
|
|
block = chunk.get("content_block", {})
|
|
if block.get("type") == self.BLOCK_THINKING:
|
|
thinking = block.get("thinking", "")
|
|
if thinking:
|
|
self._thinking_buffer = thinking
|
|
result.thinking = self._thinking_buffer
|
|
|
|
elif chunk_type == "content_block_delta":
|
|
delta = chunk.get("delta", {})
|
|
delta_type = delta.get("type", "")
|
|
|
|
if delta_type == "thinking_delta":
|
|
thinking = delta.get("thinking", "")
|
|
self._thinking_buffer += thinking
|
|
result.thinking = self._thinking_buffer
|
|
|
|
elif delta_type == "text_delta":
|
|
text = delta.get("text", "")
|
|
self._text_buffer += text
|
|
result.text = self._text_buffer
|
|
|
|
elif delta_type == "partial_json":
|
|
# Partial JSON for tool calls
|
|
pass
|
|
|
|
elif chunk_type == "content_block_stop":
|
|
result.is_complete = True
|
|
|
|
return result
|
|
|
|
def parse_chunk(self, chunk: Dict, provider: str = "openai") -> ParsedDelta:
|
|
"""Parse chunk based on provider.
|
|
|
|
Args:
|
|
chunk: Response chunk from LLM
|
|
provider: Provider name ("openai", "anthropic", "deepseek", "minimax")
|
|
|
|
Returns:
|
|
ParsedDelta with extracted content
|
|
"""
|
|
if provider == "anthropic":
|
|
return self.parse_anthropic(chunk)
|
|
|
|
# Default to OpenAI format
|
|
return self.parse_openai(chunk)
|
|
|
|
def _extract_thinking_tags(self, content: str) -> tuple:
|
|
"""Extract thinking content from tags.
|
|
|
|
Handles multiple tag formats:
|
|
- MiniMax: <|im_start|>thinking...<|im_end|>
|
|
- Standard: <think>...</think>
|
|
|
|
Args:
|
|
content: Raw content string from LLM
|
|
|
|
Returns:
|
|
Tuple of (thinking_content, clean_text)
|
|
"""
|
|
thinking_parts = []
|
|
clean_parts = []
|
|
i = 0
|
|
|
|
while i < len(content):
|
|
remaining = content[i:].lower()
|
|
|
|
# Check for MiniMax format
|
|
if remaining.startswith("<|im_start|>thinking"):
|
|
end_tag = "<|im_end|>"
|
|
start = i + 21 # len("<|im_start|>thinking")
|
|
end = content.find(end_tag, start)
|
|
if end != -1:
|
|
thinking_parts.append(content[start:end])
|
|
i = end + len(end_tag)
|
|
continue
|
|
|
|
# Check for standard format
|
|
if remaining.startswith("<think>"):
|
|
end_tag = "</think>"
|
|
start = i + 7 # len("<think>")
|
|
end = content.find(end_tag, start)
|
|
if end != -1:
|
|
thinking_parts.append(content[start:end])
|
|
i = end + len(end_tag)
|
|
continue
|
|
|
|
# Regular character
|
|
clean_parts.append(content[i])
|
|
i += 1
|
|
|
|
return "".join(thinking_parts), "".join(clean_parts)
|
|
|
|
def has_thinking_tags(self, content: str) -> bool:
|
|
"""Check if content contains thinking tags.
|
|
|
|
Args:
|
|
content: Raw content string
|
|
|
|
Returns:
|
|
True if content contains thinking tags
|
|
"""
|
|
if not content:
|
|
return False
|
|
lower = content.lower()
|
|
return "<|im_start|>thinking" in lower or "<think>" in lower
|
|
|
|
|
|
# Global parser instance
|
|
llm_parser = LLMResponseParser()
|