"""LLM API Client - Unified client with multi-Provider support Supports various LLM API formats: - OpenAI (api.openai.com) - DeepSeek (api.deepseek.com) - Anthropic (api.anthropic.com) - GLM/Zhipu AI Usage: from luxx.services.llm_client import LLMClient # Auto-detect provider client = LLMClient(api_key="...", api_url="...") # Specify provider client = LLMClient(api_key="...", api_url="...", provider_type="anthropic") # Streaming call async for delta in client.stream_call(model, messages, tools=tools): print(delta.text, delta.thinking, delta.tool_call) """ import json import logging import traceback from typing import Dict, List, Any, Optional, AsyncGenerator import httpx from luxx.config import config from luxx.services.llm_adapters import ( ProviderAdapter, OpenAIAdapter, AnthropicAdapter, ) from luxx.services.llm_response import ParsedDelta logger = logging.getLogger(__name__) class LLMClient: """LLM API Client with multi-Provider support Uses adapter pattern to support different API formats, auto-detects or manually specifies Provider type. Attributes: api_key: API key api_url: API base URL default_model: Default model provider_type: Provider type adapter: Current adapter instance """ # Provider type to adapter class mapping PROVIDER_ADAPTERS: Dict[str, type] = { # OpenAI-compatible formats "openai": OpenAIAdapter, "deepseek": OpenAIAdapter, "glm": OpenAIAdapter, "zhipu": OpenAIAdapter, # Anthropic formats "anthropic": AnthropicAdapter, "claude": AnthropicAdapter, } # URL keywords for provider detection PROVIDER_URL_KEYWORDS: Dict[str, List[str]] = { "anthropic": ["anthropic", "claude"], "deepseek": ["deepseek"], "glm": ["glm", "zhipu", "chatglm"], "openai": ["openai"], } def __init__( self, api_key: str = None, api_url: str = None, model: str = None, provider_type: str = None ): """Initialize LLM client Args: api_key: API key, defaults to config value api_url: API base URL, defaults to config value model: Default model name provider_type: Specify Provider type, defaults to auto-detect """ self.api_key = api_key or config.llm_api_key self.api_url = api_url or config.llm_api_url self.default_model = model # Detect or use specified provider if provider_type: self.provider_type = provider_type else: self.provider_type = self._detect_provider_type(api_url) self.adapter = self._create_adapter() self._client: Optional[httpx.AsyncClient] = None def _detect_provider_type(self, url: str = None) -> str: """Detect Provider type from URL Args: url: API URL, uses self.api_url if None Returns: Provider type string """ url = url or self.api_url url_lower = url.lower() for provider, keywords in self.PROVIDER_URL_KEYWORDS.items(): for keyword in keywords: if keyword in url_lower: logger.debug(f"Detected provider '{provider}' from URL: {url}") return provider logger.debug(f"Defaulting to 'openai' for URL: {url}") return "openai" def _create_adapter(self) -> ProviderAdapter: """Create adapter instance Returns: ProviderAdapter subclass instance """ adapter_class = self.PROVIDER_ADAPTERS.get( self.provider_type, OpenAIAdapter ) logger.info(f"Created {adapter_class.__name__} for provider: {self.provider_type}") return adapter_class() @property def supports_thinking(self) -> bool: """Whether current Provider supports thinking content""" return self.adapter.supports_thinking() @property def supports_tools(self) -> bool: """Whether current Provider supports tool calls""" return self.adapter.supports_tools() async def client(self) -> httpx.AsyncClient: """Get HTTP client (lazy load)""" if self._client is None or self._client.is_closed: self._client = httpx.AsyncClient(timeout=120.0) return self._client async def close(self): """Close HTTP client""" if self._client and not self._client.is_closed: await self._client.aclose() self._client = None def sync_call( self, model: str, messages: List[Dict[str, Any]], tools: List[Dict[str, Any]] = None, **kwargs ) -> Dict: """Synchronous call to LLM (non-streaming) Args: model: Model name messages: Message list tools: Tool definition list **kwargs: Other parameters (temperature, max_tokens, thinking_enabled, etc.) Returns: Dict with keys: content, thinking, tool_calls, usage """ import asyncio return asyncio.get_event_loop().run_until_complete( self.async_sync_call(model, messages, tools, **kwargs) ) async def async_sync_call( self, model: str, messages: List[Dict[str, Any]], tools: List[Dict[str, Any]] = None, **kwargs ) -> Dict: """Internal async sync call""" model = model or self.default_model kwargs["api_key"] = self.api_key body, headers = self.adapter.build_request( model, messages, tools, stream=False, **kwargs ) endpoint = self.api_url logger.info(f"Sync call to {endpoint} with model {model}") try: async with httpx.AsyncClient(timeout=120.0) as client: response = await client.post( endpoint, headers=headers, json=body ) response.raise_for_status() data = response.json() return self.adapter.parse_response(data) except httpx.HTTPStatusError as e: logger.error(f"HTTP error: {e.response.status_code} - {e.response.text}") raise except Exception as e: logger.error(f"Sync call error: {e}\n{traceback.format_exc()}") raise async def stream_call( self, model: str, messages: List[Dict[str, Any]], tools: List[Dict[str, Any]] = None, **kwargs ) -> AsyncGenerator[ParsedDelta, None]: """Streaming call to LLM Args: model: Model name messages: Message list tools: Tool definition list **kwargs: Other parameters Yields: ParsedDelta objects with accumulated content """ # Reset adapter buffers for new stream if hasattr(self.adapter, 'reset'): self.adapter.reset() model = model or self.default_model kwargs["api_key"] = self.api_key kwargs["stream"] = True body, headers = self.adapter.build_request( model, messages, tools, **kwargs ) endpoint = self.api_url logger.info(f"Stream call to {endpoint} with model {model}") try: async with httpx.AsyncClient(timeout=120.0) as client: async with client.stream( "POST", endpoint, headers=headers, json=body ) as response: logger.info(f"Response status: {response.status_code}") response.raise_for_status() async for line in response.aiter_lines(): # MiniMax may send multiple SSE events concatenated on one line # Format: data: {...}\ndata: {...}\n parts = line.split("data: ") for part in parts: part = part.strip() if part and part != "[DONE]" and part.startswith("{"): async for delta in self.adapter.parse_stream_chunk("data: " + part): yield delta except httpx.HTTPStatusError as e: status_code = e.response.status_code if e.response else "?" error_body = e.response.text if e.response else "" logger.error(f"HTTP error: {status_code} - {error_body}") yield ParsedDelta() except Exception as e: logger.error(f"Stream error: {type(e).__name__}: {e}\n{traceback.format_exc()}") yield ParsedDelta() # Convenience function def create_client( api_key: str = None, api_url: str = None, model: str = None, provider_type: str = None ) -> LLMClient: """Convenience function to create LLM client Args: api_key: API key api_url: API URL model: Model provider_type: Provider type Returns: LLMClient instance """ return LLMClient( api_key=api_key, api_url=api_url, model=model, provider_type=provider_type )