"""LLM Response Parser - Unified parser for multiple LLM API formats. Supported Providers: - OpenAI: delta.content, delta.tool_calls - DeepSeek: delta.content, delta.reasoning_content, delta.tool_calls - Anthropic: content_block with thinking/text types - MiniMax: <|im_start|>thinking...<|im_end|> tags in content Data Flow: ``` LLM API Response (SSE) │ ▼ LLMResponseParser.parse_chunk() │ ├──► ParsedDelta { thinking, text, tool_calls } │ ▼ AgenticLoop._process_stream_line() │ ▼ SSE Events (process_step) │ ├──► type: "thinking" ├──► type: "text" └──► type: "tool_call" ``` API Response Formats: 1. OpenAI Standard (DeepSeek, OpenAI): ```json { "choices": [{ "delta": { "content": "Hello", "reasoning_content": "Let me think...", "tool_calls": [{"id": "call_1", "function": {...}}] } }] } ``` 2. Anthropic Streaming: ```json {"type": "content_block_start", "content_block": {"type": "thinking", "thinking": "..."}} {"type": "content_block_delta", "delta": {"type": "thinking_delta", "thinking": "..."}} {"type": "content_block_delta", "delta": {"type": "text_delta", "text": "..."}} {"type": "content_block_stop"} ``` 3. MiniMax (with thinking tags in content): ```json { "choices": [{ "delta": { "content": "<|im_start|>thinking分析中...<|im_end|>回复内容" } }] } ``` 4. Standard thinking tags: ```json { "choices": [{ "delta": { "content": "思考内容回复内容" } }] } ``` """ from typing import Dict, Any, Optional, List from dataclasses import dataclass @dataclass class ParsedDelta: """Parsed response delta from LLM. Attributes: thinking: Thinking/reasoning content text: Regular text content tool_calls: Tool call requests is_complete: Whether this delta completes a content block """ thinking: str = "" text: str = "" tool_calls: List[Dict] = None is_complete: bool = False def __post_init__(self): if self.tool_calls is None: self.tool_calls = [] class LLMResponseParser: """Unified parser for LLM API response formats. Usage: from luxx.services.llm_response import llm_parser # Parse OpenAI format delta = {"content": "Hello", "reasoning_content": "Thinking..."} parsed = llm_parser.parse_openai(delta) # Parse Anthropic format chunk = {"type": "content_block_delta", "delta": {"type": "thinking_delta", "thinking": "..."}} parsed = llm_parser.parse_anthropic(chunk) # Auto-detect format parsed = llm_parser.parse_chunk(chunk, provider="anthropic") """ # Content block types BLOCK_THINKING = "thinking" BLOCK_TEXT = "text" BLOCK_TOOL_USE = "tool_use" BLOCK_TOOL_RESULT = "tool_result" def __init__(self): self._buffer = "" self._thinking_buffer = "" self._text_buffer = "" def reset(self): """Reset parser state for new message.""" self._buffer = "" self._thinking_buffer = "" self._text_buffer = "" def parse_openai(self, delta: Dict) -> ParsedDelta: """Parse OpenAI format delta. Handles: - OpenAI: delta.content, delta.tool_calls - DeepSeek: delta.content, delta.reasoning_content, delta.tool_calls - MiniMax: <|im_start|>thinking...<|im_end|> in content - Standard: ... in content Args: delta: Delta object from LLM API response Returns: ParsedDelta with extracted thinking, text, and tool_calls """ result = ParsedDelta() # Get thinking content (DeepSeek uses reasoning_content) thinking = delta.get("reasoning_content") or delta.get("reasoning") or "" if thinking: self._thinking_buffer += thinking result.thinking = self._thinking_buffer # Get text content text = delta.get("content") or "" if text: # Check for embedded thinking tags (MiniMax format) thinking_part, clean_text = self._extract_thinking_tags(text) if thinking_part: self._thinking_buffer += thinking_part result.thinking = self._thinking_buffer if clean_text: self._text_buffer += clean_text result.text = self._text_buffer elif thinking_part := delta.get("thinking"): # Some providers use "thinking" field directly self._thinking_buffer += thinking_part result.thinking = self._thinking_buffer # Tool calls result.tool_calls = delta.get("tool_calls") or [] return result def parse_anthropic(self, chunk: Dict) -> ParsedDelta: """Parse Anthropic streaming format. Anthropic uses a different event structure: - content_block_start: Begin a content block - content_block_delta: Incremental content - content_block_stop: End of content blocks Content block types: - thinking: Model reasoning - text: Regular text - tool_use: Tool invocation - tool_result: Tool output Args: chunk: Anthropic SSE event chunk Returns: ParsedDelta with extracted content """ result = ParsedDelta() chunk_type = chunk.get("type", "") if chunk_type == "content_block_start": block = chunk.get("content_block", {}) if block.get("type") == self.BLOCK_THINKING: thinking = block.get("thinking", "") if thinking: self._thinking_buffer = thinking result.thinking = self._thinking_buffer elif chunk_type == "content_block_delta": delta = chunk.get("delta", {}) delta_type = delta.get("type", "") if delta_type == "thinking_delta": thinking = delta.get("thinking", "") self._thinking_buffer += thinking result.thinking = self._thinking_buffer elif delta_type == "text_delta": text = delta.get("text", "") self._text_buffer += text result.text = self._text_buffer elif delta_type == "partial_json": # Partial JSON for tool calls pass elif chunk_type == "content_block_stop": result.is_complete = True return result def parse_chunk(self, chunk: Dict, provider: str = "openai") -> ParsedDelta: """Parse chunk based on provider. Args: chunk: Response chunk from LLM provider: Provider name ("openai", "anthropic", "deepseek", "minimax") Returns: ParsedDelta with extracted content """ if provider == "anthropic": return self.parse_anthropic(chunk) # Default to OpenAI format return self.parse_openai(chunk) def _extract_thinking_tags(self, content: str) -> tuple: """Extract thinking content from tags. Handles multiple tag formats: - MiniMax: <|im_start|>thinking...<|im_end|> - Standard: ... Args: content: Raw content string from LLM Returns: Tuple of (thinking_content, clean_text) """ thinking_parts = [] clean_parts = [] i = 0 while i < len(content): remaining = content[i:].lower() # Check for MiniMax format if remaining.startswith("<|im_start|>thinking"): end_tag = "<|im_end|>" start = i + 21 # len("<|im_start|>thinking") end = content.find(end_tag, start) if end != -1: thinking_parts.append(content[start:end]) i = end + len(end_tag) continue # Check for standard format if remaining.startswith(""): end_tag = "" start = i + 7 # len("") end = content.find(end_tag, start) if end != -1: thinking_parts.append(content[start:end]) i = end + len(end_tag) continue # Regular character clean_parts.append(content[i]) i += 1 return "".join(thinking_parts), "".join(clean_parts) def has_thinking_tags(self, content: str) -> bool: """Check if content contains thinking tags. Args: content: Raw content string Returns: True if content contains thinking tags """ if not content: return False lower = content.lower() return "<|im_start|>thinking" in lower or "" in lower # Global parser instance llm_parser = LLMResponseParser()