Luxx/luxx/services/llm_response.py

"""LLM Response Parser - Unified parser for multiple LLM API formats.

Supported Providers:
- OpenAI: delta.content, delta.tool_calls
- DeepSeek: delta.content, delta.reasoning_content, delta.tool_calls
- Anthropic: content_block with thinking/text types
- MiniMax: <|im_start|>thinking...<|im_end|> tags in content

Data Flow:
```
LLM API Response (SSE)
    │
    ▼
LLMResponseParser.parse_chunk()
    │
    ├──► ParsedDelta { thinking, text, tool_calls }
    │
    ▼
AgenticLoop._process_stream_line()
    │
    ▼
SSE Events (process_step)
    │
    ├──► type: "thinking"
    ├──► type: "text"
    └──► type: "tool_call"
```

API Response Formats:

1. OpenAI Standard (DeepSeek, OpenAI):
```json
{
  "choices": [{
    "delta": {
      "content": "Hello",
      "reasoning_content": "Let me think...",
      "tool_calls": [{"id": "call_1", "function": {...}}]
    }
  }]
}
```

2. Anthropic Streaming:
```json
{"type": "content_block_start", "content_block": {"type": "thinking", "thinking": "..."}}
{"type": "content_block_delta", "delta": {"type": "thinking_delta", "thinking": "..."}}
{"type": "content_block_delta", "delta": {"type": "text_delta", "text": "..."}}
{"type": "content_block_stop"}
```

3. MiniMax (with thinking tags in content):
```json
{
  "choices": [{
    "delta": {
      "content": "<|im_start|>thinking分析中...<|im_end|>回复内容"
    }
  }]
}
```

4. Standard thinking tags:
```json
{
  "choices": [{
    "delta": {
      "content": "<think>思考内容</think>回复内容"
    }
  }]
}
```
"""
from typing import Dict, Any, Optional, List
from dataclasses import dataclass


@dataclass
class ParsedDelta:
    """Parsed response delta from LLM.

    Attributes:
        thinking: Thinking/reasoning content
        text: Regular text content
        tool_calls: Tool call requests
        is_complete: Whether this delta completes a content block
    """
    thinking: str = ""
    text: str = ""
    tool_calls: List[Dict] = None
    is_complete: bool = False

    def __post_init__(self):
        if self.tool_calls is None:
            self.tool_calls = []


class LLMResponseParser:
    """Unified parser for LLM API response formats.

    Usage:
        from luxx.services.llm_response import llm_parser

        # Parse OpenAI format
        delta = {"content": "Hello", "reasoning_content": "Thinking..."}
        parsed = llm_parser.parse_openai(delta)

        # Parse Anthropic format
        chunk = {"type": "content_block_delta", "delta": {"type": "thinking_delta", "thinking": "..."}}
        parsed = llm_parser.parse_anthropic(chunk)

        # Auto-detect format
        parsed = llm_parser.parse_chunk(chunk, provider="anthropic")
    """

    # Content block types
    BLOCK_THINKING = "thinking"
    BLOCK_TEXT = "text"
    BLOCK_TOOL_USE = "tool_use"
    BLOCK_TOOL_RESULT = "tool_result"

    def __init__(self):
        self._buffer = ""
        self._thinking_buffer = ""
        self._text_buffer = ""

    def reset(self):
        """Reset parser state for new message."""
        self._buffer = ""
        self._thinking_buffer = ""
        self._text_buffer = ""

    def parse_openai(self, delta: Dict) -> ParsedDelta:
        """Parse OpenAI format delta.

        Handles:
        - OpenAI: delta.content, delta.tool_calls
        - DeepSeek: delta.content, delta.reasoning_content, delta.tool_calls
        - MiniMax: <|im_start|>thinking...<|im_end|> in content
        - Standard: <think>...</think> in content

        Args:
            delta: Delta object from LLM API response

        Returns:
            ParsedDelta with extracted thinking, text, and tool_calls
        """
        result = ParsedDelta()

        # Get thinking content (DeepSeek uses reasoning_content)
        thinking = delta.get("reasoning_content") or delta.get("reasoning") or ""
        if thinking:
            self._thinking_buffer += thinking
            result.thinking = self._thinking_buffer

        # Get text content
        text = delta.get("content") or ""
        if text:
            # Check for embedded thinking tags (MiniMax format)
            thinking_part, clean_text = self._extract_thinking_tags(text)
            if thinking_part:
                self._thinking_buffer += thinking_part
                result.thinking = self._thinking_buffer
            if clean_text:
                self._text_buffer += clean_text
                result.text = self._text_buffer
        elif thinking_part := delta.get("thinking"):
            # Some providers use "thinking" field directly
            self._thinking_buffer += thinking_part
            result.thinking = self._thinking_buffer

        # Tool calls
        result.tool_calls = delta.get("tool_calls") or []

        return result

    def parse_anthropic(self, chunk: Dict) -> ParsedDelta:
        """Parse Anthropic streaming format.

        Anthropic uses a different event structure:
        - content_block_start: Begin a content block
        - content_block_delta: Incremental content
        - content_block_stop: End of content blocks

        Content block types:
        - thinking: Model reasoning
        - text: Regular text
        - tool_use: Tool invocation
        - tool_result: Tool output

        Args:
            chunk: Anthropic SSE event chunk

        Returns:
            ParsedDelta with extracted content
        """
        result = ParsedDelta()
        chunk_type = chunk.get("type", "")

        if chunk_type == "content_block_start":
            block = chunk.get("content_block", {})
            if block.get("type") == self.BLOCK_THINKING:
                thinking = block.get("thinking", "")
                if thinking:
                    self._thinking_buffer = thinking
                    result.thinking = self._thinking_buffer

        elif chunk_type == "content_block_delta":
            delta = chunk.get("delta", {})
            delta_type = delta.get("type", "")

            if delta_type == "thinking_delta":
                thinking = delta.get("thinking", "")
                self._thinking_buffer += thinking
                result.thinking = self._thinking_buffer

            elif delta_type == "text_delta":
                text = delta.get("text", "")
                self._text_buffer += text
                result.text = self._text_buffer

            elif delta_type == "partial_json":
                # Partial JSON for tool calls
                pass

        elif chunk_type == "content_block_stop":
            result.is_complete = True

        return result

    def parse_chunk(self, chunk: Dict, provider: str = "openai") -> ParsedDelta:
        """Parse chunk based on provider.

        Args:
            chunk: Response chunk from LLM
            provider: Provider name ("openai", "anthropic", "deepseek", "minimax")

        Returns:
            ParsedDelta with extracted content
        """
        if provider == "anthropic":
            return self.parse_anthropic(chunk)

        # Default to OpenAI format
        return self.parse_openai(chunk)

    def _extract_thinking_tags(self, content: str) -> tuple:
        """Extract thinking content from tags.

        Handles multiple tag formats:
        - MiniMax: <|im_start|>thinking...<|im_end|>
        - Standard: <think>...</think>

        Args:
            content: Raw content string from LLM

        Returns:
            Tuple of (thinking_content, clean_text)
        """
        thinking_parts = []
        clean_parts = []
        i = 0

        while i < len(content):
            remaining = content[i:].lower()

            # Check for MiniMax format
            if remaining.startswith("<|im_start|>thinking"):
                end_tag = "<|im_end|>"
                start = i + 21  # len("<|im_start|>thinking")
                end = content.find(end_tag, start)
                if end != -1:
                    thinking_parts.append(content[start:end])
                    i = end + len(end_tag)
                    continue

            # Check for standard format
            if remaining.startswith("<think>"):
                end_tag = "</think>"
                start = i + 7  # len("<think>")
                end = content.find(end_tag, start)
                if end != -1:
                    thinking_parts.append(content[start:end])
                    i = end + len(end_tag)
                    continue

            # Regular character
            clean_parts.append(content[i])
            i += 1

        return "".join(thinking_parts), "".join(clean_parts)

    def has_thinking_tags(self, content: str) -> bool:
        """Check if content contains thinking tags.

        Args:
            content: Raw content string

        Returns:
            True if content contains thinking tags
        """
        if not content:
            return False
        lower = content.lower()
        return "<|im_start|>thinking" in lower or "<think>" in lower


# Global parser instance
llm_parser = LLMResponseParser()