Luxx/luxx/services/llm_adapters/anthropic_adapter.py

"""Anthropic Adapter - Anthropic Claude API adapter

Supports Anthropic Claude API streaming and non-streaming responses.
"""
import json
import logging
from typing import Dict, List, Any, AsyncGenerator

from .base import ProviderAdapter
from ..llm_response import ParsedDelta, LLMResponse

logger = logging.getLogger(__name__)


class AnthropicAdapter(ProviderAdapter):
    """Anthropic Claude API adapter

    Anthropic API uses a completely different format from OpenAI:
    - Endpoint: POST /v1/messages
    - Streaming: SSE events (content_block_start, content_block_delta, etc.)
    - Thinking: Independent thinking type content block
    - Tools: tool_use type content block

    Reference: https://docs.anthropic.com/claude/reference/messages
    """

    # Anthropic API endpoint suffix
    MESSAGES_PATH = "/v1/messages"

    # Anthropic API version
    ANTHROPIC_VERSION = "2023-06-01"

    # Content block types
    BLOCK_MESSAGE_START = "message_start"
    BLOCK_CONTENT_BLOCK_START = "content_block_start"
    BLOCK_CONTENT_BLOCK_DELTA = "content_block_delta"
    BLOCK_CONTENT_BLOCK_STOP = "content_block_stop"
    BLOCK_MESSAGE_DELTA = "message_delta"
    BLOCK_MESSAGE_STOP = "message_stop"
    BLOCK_ERROR = "error"

    # Delta types
    DELTA_THINKING = "thinking_delta"
    DELTA_TEXT = "text_delta"
    DELTA_INPUT_JSON = "input_json_delta"

    # Content block subtypes
    SUBTYPE_THINKING = "thinking"
    SUBTYPE_TEXT = "text"
    SUBTYPE_TOOL_USE = "tool_use"

    def __init__(self):
        # Buffers for accumulating streaming content
        self._thinking_buffer = ""
        self._text_buffer = ""
        # Buffers for accumulating deltas
        self._tool_args_buffer = ""
        self._current_tool_index = -1
        self._current_tool_name = ""
        self._usage = {}

    @property
    def provider_type(self) -> str:
        return "anthropic"

    def build_request(
        self,
        model: str,
        messages: List[Dict[str, Any]],
        tools: List[Dict[str, Any]] = None,
        **kwargs
    ) -> tuple[Dict[str, Any], Dict[str, str]]:
        """Build Anthropic-format request

        Anthropic request format differs from OpenAI:
        - Uses "messages" instead of "message"
        - Requires "max_tokens"
        - Different tool format

        Args:
            model: Model name (e.g., claude-3-5-sonnet-20241022)
            messages: Message list
            tools: Tool definition list
            **kwargs: Other parameters

        Returns:
            tuple: (body, headers)
        """
        api_key = kwargs.get("api_key", "")

        headers = {
            "Content-Type": "application/json",
            "Authorization": f"Bearer {api_key}",
            "anthropic-version": self.ANTHROPIC_VERSION
        }

        # Convert messages to Anthropic format
        anthropic_messages = self._convert_messages(messages)

        body = {
            "model": model,
            "messages": anthropic_messages,
            "stream": kwargs.get("stream", True),
            "max_tokens": kwargs.get("max_tokens", 4096)
        }

        # System message
        if "system" in kwargs:
            body["system"] = kwargs["system"]
        else:
            # Extract from first message
            for msg in messages:
                if msg.get("role") == "system":
                    body["system"] = msg.get("content", "")
                    break

        # Thinking capability (Claude 3.5+)
        if kwargs.get("thinking_enabled"):
            body["thinking"] = {
                "type": "enabled",
                "budget_tokens": kwargs.get("thinking_budget_tokens", 10000)
            }

        # Tool definitions
        if tools:
            body["tools"] = self._convert_tools(tools)

        # Optional parameters
        if "temperature" in kwargs:
            body["temperature"] = kwargs["temperature"]

        if "top_p" in kwargs:
            body["top_p"] = kwargs["top_p"]

        if "stop_sequences" in kwargs:
            body["stop_sequences"] = kwargs["stop_sequences"]

        return body, headers

    def _convert_messages(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
        """Convert messages to Anthropic format

        Anthropic message format:
        - role: user, assistant
        - content: str or List[Dict]

        Args:
            messages: OpenAI-format message list

        Returns:
            Anthropic-format message list
        """
        result = []

        for msg in messages:
            role = msg.get("role")
            content = msg.get("content", "")

            # Skip system messages (handled separately)
            if role == "system":
                continue

            # Process content
            if isinstance(content, str):
                anthropic_content = content
            elif isinstance(content, dict):
                anthropic_content = content.get("text", "")
            else:
                anthropic_content = str(content)

            # Anthropic doesn't support tool role as message
            if role == "tool":
                # Tool results passed via tool_use block
                continue

            result.append({
                "role": role,
                "content": anthropic_content
            })

        return result

    def _convert_tools(self, tools: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
        """Convert tool definitions to Anthropic format

        Anthropic tool format:
        {
            "name": "function_name",
            "description": "...",
            "input_schema": {...}  # JSON Schema
        }

        Args:
            tools: OpenAI-format tool list

        Returns:
            Anthropic-format tool list
        """
        result = []

        for tool in tools:
            func = tool.get("function", {})
            result.append({
                "name": func.get("name"),
                "description": func.get("description", ""),
                "input_schema": func.get("parameters", {"type": "object", "properties": {}})
            })

        return result

    async def parse_stream_chunk(
        self,
        raw_chunk: str
    ) -> AsyncGenerator[ParsedDelta, None]:
        """Parse Anthropic-format SSE stream

        Anthropic streaming events:
        - message_start: Message start
        - content_block_start: Content block start (thinking/text/tool_use)
        - content_block_delta: Content block delta
        - content_block_stop: Content block stop
        - message_delta: Message delta (usage)
        - message_stop: Message completely stopped
        - error: Error

        Args:
            raw_chunk: Raw SSE line

        Yields:
            ParsedDelta objects
        """
        # Reset buffers
        self._reset_buffers()

        try:
            chunk = json.loads(raw_chunk.strip())
        except json.JSONDecodeError:
            return

        chunk_type = chunk.get("type", "")
        result = ParsedDelta()

        if chunk_type == self.BLOCK_MESSAGE_START:
            # Message start
            pass

        elif chunk_type == self.BLOCK_CONTENT_BLOCK_START:
            # Content block start
            block = chunk.get("content_block", {})
            block_type = block.get("type")
            index = chunk.get("index", 0)

            if block_type == self.SUBTYPE_THINKING:
                # Thinking block start
                thinking_text = block.get("thinking", {}).get("thinking", "")
                self._thinking_buffer = thinking_text
                result.thinking = self._thinking_buffer

            elif block_type == self.SUBTYPE_TOOL_USE:
                # Tool use block start
                self._current_tool_index = index
                self._current_tool_name = block.get("name", "")
                self._tool_args_buffer = ""

        elif chunk_type == self.BLOCK_CONTENT_BLOCK_DELTA:
            # Content block delta
            delta = chunk.get("delta", {})
            delta_type = delta.get("type", "")

            if delta_type == self.DELTA_THINKING:
                # Thinking delta
                thinking = delta.get("thinking", "")
                self._thinking_buffer += thinking
                result.thinking = self._thinking_buffer

            elif delta_type == self.DELTA_TEXT:
                # Text delta
                text = delta.get("text", "")
                self._text_buffer += text
                result.text = self._text_buffer

            elif delta_type == self.DELTA_INPUT_JSON:
                # Tool arguments delta - accumulate but don't return in result
                partial_json = delta.get("partial_json", "")
                self._tool_args_buffer += partial_json

        elif chunk_type == self.BLOCK_CONTENT_BLOCK_STOP:
            # Content block stop
            pass

        elif chunk_type == self.BLOCK_MESSAGE_DELTA:
            # Message delta (usually contains usage)
            delta = chunk.get("delta", {})
            usage = chunk.get("usage", {})

            self._usage = {
                "prompt_tokens": usage.get("input_tokens", 0),
                "completion_tokens": usage.get("output_tokens", 0),
                "total_tokens": usage.get("input_tokens", 0) + usage.get("output_tokens", 0)
            }
            result.usage = self._usage

            # Check if complete by stop reason
            if delta.get("stop_reason"):
                result.is_complete = True

        elif chunk_type == self.BLOCK_MESSAGE_STOP:
            # Message completely stopped
            result.is_complete = True

        elif chunk_type == self.BLOCK_ERROR:
            # Error
            error = chunk.get("error", {})
            error_msg = error.get("type", "") + ": " + error.get("message", "")
            logger.error(f"Anthropic API error: {error_msg}")
            yield ParsedDelta()
            return

        yield result

    def parse_response(
        self,
        data: Dict[str, Any]
    ) -> LLMResponse:
        """Parse Anthropic-format non-streaming response

        Anthropic response format:
        {
            "id": "...",
            "type": "message",
            "role": "assistant",
            "content": [
                {"type": "text", "text": "..."},
                {"type": "thinking", "thinking": "..."},
                {"type": "tool_use", "id": "...", "name": "...", "input": {...}}
            ],
            "model": "...",
            "usage": {"input_tokens": ..., "output_tokens": ...}
        }

        Args:
            data: API response data

        Returns:
            LLMResponse object
        """
        contents = data.get("content", [])

        text_parts = []
        thinking = ""
        tool_calls = []

        for block in contents:
            block_type = block.get("type")

            if block_type == self.SUBTYPE_TEXT:
                text_parts.append(block.get("text", ""))

            elif block_type == self.SUBTYPE_THINKING:
                thinking = block.get("thinking", "")

            elif block_type == self.SUBTYPE_TOOL_USE:
                tool_calls.append({
                    "id": block.get("id"),
                    "type": "function",
                    "function": {
                        "name": block.get("name"),
                        "arguments": json.dumps(block.get("input", {}))
                    }
                })

        usage = data.get("usage", {})

        return LLMResponse(
            content="\n".join(text_parts),
            thinking=thinking,
            tool_calls=tool_calls if tool_calls else None,
            usage={
                "prompt_tokens": usage.get("input_tokens", 0),
                "completion_tokens": usage.get("output_tokens", 0),
                "total_tokens": usage.get("input_tokens", 0) + usage.get("output_tokens", 0)
            }
        )

    def supports_thinking(self) -> bool:
        return True

    def supports_tools(self) -> bool:
        return True

    def _reset_buffers(self):
        """Reset buffers (call when starting new message)"""
        self._thinking_buffer = ""
        self._text_buffer = ""
        self._tool_args_buffer = ""
        self._current_tool_index = -1
        self._current_tool_name = ""
        self._usage = {}