Luxx/luxx/services/chat.py

146 lines
4.7 KiB
Python

"""Chat Service - Facade for conversation handling"""
import logging
from typing import List, Dict, AsyncGenerator
from luxx.models import Conversation
from luxx.services.llm_service import LLMService
from luxx.services.message_service import MessageService
from luxx.services.stream_service import StreamService
logger = logging.getLogger(__name__)
class ChatService:
"""
Chat service facade.
Coordinates between LLM, message, and streaming services.
"""
def __init__(
self,
llm_service: LLMService = None,
message_service: MessageService = None,
stream_service: StreamService = None
):
self.llm_service = llm_service or LLMService()
self.message_service = message_service or MessageService()
self.stream_service = stream_service or StreamService()
async def stream_response(
self,
conversation: Conversation,
user_message: str,
thinking_enabled: bool = False,
enabled_tools: List[str] = None,
user_id: int = None,
username: str = None,
workspace: str = None,
user_permission_level: int = 1
) -> AsyncGenerator[str, None]:
"""
Stream response for user conversations.
Args:
conversation: Conversation object
user_message: User's message
thinking_enabled: Enable reasoning
enabled_tools: List of enabled tool names
user_id: User ID
username: Username
workspace: Workspace path
user_permission_level: Permission level
Yields:
SSE event strings
"""
# Build messages
messages = self.message_service.build_messages(conversation)
self.message_service.add_user_message(messages, user_message)
# Get tools
tools = self.stream_service.filter_tools(enabled_tools) if enabled_tools else []
# Get LLM config
llm, provider_max_tokens = self.llm_service.get_client(conversation)
model = conversation.model or llm.default_model or "gpt-4"
max_tokens = provider_max_tokens
thinking_enabled = thinking_enabled or conversation.thinking_enabled
# Stream response
async for event in self.stream_service.stream(
messages=messages,
model=model,
tools=tools,
temperature=conversation.temperature,
max_tokens=max_tokens or 8192,
thinking_enabled=thinking_enabled,
llm_client=llm,
conversation=conversation,
conversation_id=conversation.id,
user_id=user_id,
username=username,
workspace=workspace,
user_permission_level=user_permission_level
):
yield event
async def stream_response_for_agent(
self,
messages: List[Dict],
model: str = None,
tools: List[Dict] = None,
temperature: float = 0.7,
max_tokens: int = 2048,
thinking_enabled: bool = False,
provider_id: int = None,
workspace: str = None,
user_id: int = None,
username: str = None,
user_permission_level: int = 1
) -> AsyncGenerator[str, None]:
"""
Stream response for agents (reuses user chat logic).
Args:
messages: Pre-built message list (should include system prompt and history)
model: Model name
tools: List of tool definitions
temperature: Sampling temperature
max_tokens: Maximum tokens
thinking_enabled: Enable reasoning
provider_id: LLM provider ID
workspace: Workspace path
user_id: User ID
username: Username
user_permission_level: Permission level
Yields:
SSE event strings
"""
# Get LLM config
llm, provider_max_tokens = self.llm_service.get_client(provider_id=provider_id)
model = model or llm.default_model or "gpt-4"
effective_max_tokens = provider_max_tokens or max_tokens
# Stream response
async for event in self.stream_service.stream(
messages=messages,
model=model,
tools=tools or [],
temperature=temperature,
max_tokens=effective_max_tokens,
thinking_enabled=thinking_enabled,
llm_client=llm,
provider_id=provider_id,
conversation_id=None, # Agents don't save to conversation
user_id=user_id,
username=username,
workspace=workspace,
user_permission_level=user_permission_level
):
yield event
# Global service instance
chat_service = ChatService()