Luxx/luxx/services/llm_adapters/openai_adapter.py

"""OpenAI Adapter - OpenAI-compatible API adapter

Supports OpenAI, DeepSeek, GLM and other OpenAI-compatible APIs.
"""
import json
import logging
from typing import Dict, List, Any, AsyncGenerator, Optional

from .base import ProviderAdapter
from ..llm_response import ParsedDelta, LLMResponse

logger = logging.getLogger(__name__)


class OpenAIAdapter(ProviderAdapter):
    """OpenAI-compatible API adapter

    Pure parsing adapter - no internal state management.
    Each parse_stream_chunk call returns incremental content.
    Accumulation is handled by the consumer (AgenticLoop).
    """

    @property
    def provider_type(self) -> str:
        return "openai"

    def __init__(self):
        pass

    def build_request(
        self,
        model: str,
        messages: List[Dict[str, Any]],
        tools: List[Dict[str, Any]] = None,
        **kwargs
    ) -> tuple[Dict[str, Any], Dict[str, str]]:
        """Build OpenAI-format request"""
        api_key = kwargs.get("api_key", "")

        headers = {
            "Content-Type": "application/json",
            "Authorization": f"Bearer {api_key}"
        }

        body = {
            "model": model,
            "messages": messages,
            "stream": kwargs.get("stream", True)
        }

        # Optional parameters
        if "temperature" in kwargs:
            body["temperature"] = kwargs["temperature"]
        if "max_tokens" in kwargs:
            body["max_tokens"] = kwargs["max_tokens"]
        if "top_p" in kwargs:
            body["top_p"] = kwargs["top_p"]
        if "frequency_penalty" in kwargs:
            body["frequency_penalty"] = kwargs["frequency_penalty"]
        if "presence_penalty" in kwargs:
            body["presence_penalty"] = kwargs["presence_penalty"]
        if "stop" in kwargs:
            body["stop"] = kwargs["stop"]
        if tools:
            body["tools"] = tools
        if kwargs.get("thinking_enabled"):
            body["thinking_enabled"] = True

        return body, headers

    def reset(self):
        """No-op for pure parsing adapter"""
        pass

    async def parse_stream_chunk(
        self,
        raw_chunk: str
    ) -> AsyncGenerator[ParsedDelta, None]:
        """Parse OpenAI-format SSE stream

        Returns incremental content - no accumulation.
        """
        # Parse SSE line
        event_type, data_str = self._parse_sse_line(raw_chunk)

        if not data_str or data_str == "[DONE]":
            if data_str == "[DONE]":
                yield ParsedDelta(is_complete=True)
            return

        try:
            chunk = json.loads(data_str)
        except json.JSONDecodeError:
            return

        # Handle errors
        if event_type == "error" or "error" in chunk:
            yield ParsedDelta()
            return

        # Extract usage
        usage = chunk.get("usage", {})

        # Parse choices
        for choice in chunk.get("choices", []):
            delta = choice.get("delta", {})
            content = delta.get("content") or ""

            # Extract thinking tags if present
            thinking, clean_text = self._extract_tags(content)

            # Tool calls
            tool_calls = delta.get("tool_calls", [])

            # Check if this is the final delta
            is_complete = bool(choice.get("finish_reason"))

            if thinking or clean_text or tool_calls or is_complete or usage:
                yield ParsedDelta(
                    thinking=thinking,
                    text=clean_text,
                    tool_calls=tool_calls if tool_calls else [],
                    is_complete=is_complete,
                    usage=usage if usage else {}
                )

    def parse_response(self, data: Dict[str, Any]) -> LLMResponse:
        """Parse non-streaming response"""
        choice = data.get("choices", [{}])[0]
        message = choice.get("message", {})

        content = message.get("content", "") or ""
        thinking, clean_content = self._extract_tags(content)
        if not thinking:
            thinking = message.get("reasoning_content") or ""

        tool_calls = message.get("tool_calls", [])

        usage = data.get("usage", {})

        return LLMResponse(
            content=clean_content,
            thinking=thinking,
            tool_calls=tool_calls,
            usage=usage
        )

    def _parse_sse_line(self, line: str) -> tuple:
        """Parse a single SSE line, return (event_type, data)"""
        if line.startswith("event:"):
            return line[6:].strip(), None
        elif line.startswith("data:"):
            return "", line[5:].strip()
        return "", None

    def _extract_tags(self, content: str) -> tuple:
        """Extract thinking tags and return (thinking, clean_text)

        Handles thinking tags that may be split across chunks:
        - First </think> in content closes any thinking block
        - Everything before first </think> is thinking
        - Everything after first </think> is clean text
        """
        if not content:
            return "", ""

        content_lower = content.lower()

        # Find first </think> (marks end of thinking block)
        end_idx = content_lower.find("</think>")

        if end_idx != -1:
            # Found end tag - split at this point
            thinking_content = content[:end_idx].strip()
            # Find if there's also a start tag before this
            start_idx = content_lower.rfind("<think>", 0, end_idx)

            if start_idx != -1:
                # There's a complete thinking block
                thinking = content[start_idx + 7:end_idx]
                clean = content[end_idx + 9:]
            else:
                # No start tag - this is the end of a split thinking block
                # Everything before </think> was thinking
                thinking = content[:end_idx]
                clean = content[end_idx + 9:]

            return thinking, clean

        # No end tag found
        # Check if there's a start tag
        start_idx = content_lower.find("<think>")

        if start_idx != -1:
            # Has start tag but no end - all content after start is thinking
            thinking = content[start_idx + 7:]
            return thinking, ""
        else:
            # No tags at all - everything is clean
            return "", content