399 lines
13 KiB
Python
399 lines
13 KiB
Python
"""Anthropic Adapter - Anthropic Claude API adapter
|
|
|
|
Supports Anthropic Claude API streaming and non-streaming responses.
|
|
"""
|
|
import json
|
|
import logging
|
|
from typing import Dict, List, Any, AsyncGenerator
|
|
|
|
from .base import ProviderAdapter
|
|
from ..llm_response import ParsedDelta, LLMResponse
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class AnthropicAdapter(ProviderAdapter):
|
|
"""Anthropic Claude API adapter
|
|
|
|
Anthropic API uses a completely different format from OpenAI:
|
|
- Endpoint: POST /v1/messages
|
|
- Streaming: SSE events (content_block_start, content_block_delta, etc.)
|
|
- Thinking: Independent thinking type content block
|
|
- Tools: tool_use type content block
|
|
|
|
Reference: https://docs.anthropic.com/claude/reference/messages
|
|
"""
|
|
|
|
# Anthropic API endpoint suffix
|
|
MESSAGES_PATH = "/v1/messages"
|
|
|
|
# Anthropic API version
|
|
ANTHROPIC_VERSION = "2023-06-01"
|
|
|
|
# Content block types
|
|
BLOCK_MESSAGE_START = "message_start"
|
|
BLOCK_CONTENT_BLOCK_START = "content_block_start"
|
|
BLOCK_CONTENT_BLOCK_DELTA = "content_block_delta"
|
|
BLOCK_CONTENT_BLOCK_STOP = "content_block_stop"
|
|
BLOCK_MESSAGE_DELTA = "message_delta"
|
|
BLOCK_MESSAGE_STOP = "message_stop"
|
|
BLOCK_ERROR = "error"
|
|
|
|
# Delta types
|
|
DELTA_THINKING = "thinking_delta"
|
|
DELTA_TEXT = "text_delta"
|
|
DELTA_INPUT_JSON = "input_json_delta"
|
|
|
|
# Content block subtypes
|
|
SUBTYPE_THINKING = "thinking"
|
|
SUBTYPE_TEXT = "text"
|
|
SUBTYPE_TOOL_USE = "tool_use"
|
|
|
|
def __init__(self):
|
|
# Buffers for accumulating streaming content
|
|
self._thinking_buffer = ""
|
|
self._text_buffer = ""
|
|
# Buffers for accumulating deltas
|
|
self._tool_args_buffer = ""
|
|
self._current_tool_index = -1
|
|
self._current_tool_name = ""
|
|
self._usage = {}
|
|
|
|
@property
|
|
def provider_type(self) -> str:
|
|
return "anthropic"
|
|
|
|
def build_request(
|
|
self,
|
|
model: str,
|
|
messages: List[Dict[str, Any]],
|
|
tools: List[Dict[str, Any]] = None,
|
|
**kwargs
|
|
) -> tuple[Dict[str, Any], Dict[str, str]]:
|
|
"""Build Anthropic-format request
|
|
|
|
Anthropic request format differs from OpenAI:
|
|
- Uses "messages" instead of "message"
|
|
- Requires "max_tokens"
|
|
- Different tool format
|
|
|
|
Args:
|
|
model: Model name (e.g., claude-3-5-sonnet-20241022)
|
|
messages: Message list
|
|
tools: Tool definition list
|
|
**kwargs: Other parameters
|
|
|
|
Returns:
|
|
tuple: (body, headers)
|
|
"""
|
|
api_key = kwargs.get("api_key", "")
|
|
|
|
headers = {
|
|
"Content-Type": "application/json",
|
|
"Authorization": f"Bearer {api_key}",
|
|
"anthropic-version": self.ANTHROPIC_VERSION
|
|
}
|
|
|
|
# Convert messages to Anthropic format
|
|
anthropic_messages = self._convert_messages(messages)
|
|
|
|
body = {
|
|
"model": model,
|
|
"messages": anthropic_messages,
|
|
"stream": kwargs.get("stream", True),
|
|
"max_tokens": kwargs.get("max_tokens", 4096)
|
|
}
|
|
|
|
# System message
|
|
if "system" in kwargs:
|
|
body["system"] = kwargs["system"]
|
|
else:
|
|
# Extract from first message
|
|
for msg in messages:
|
|
if msg.get("role") == "system":
|
|
body["system"] = msg.get("content", "")
|
|
break
|
|
|
|
# Thinking capability (Claude 3.5+)
|
|
if kwargs.get("thinking_enabled"):
|
|
body["thinking"] = {
|
|
"type": "enabled",
|
|
"budget_tokens": kwargs.get("thinking_budget_tokens", 10000)
|
|
}
|
|
|
|
# Tool definitions
|
|
if tools:
|
|
body["tools"] = self._convert_tools(tools)
|
|
|
|
# Optional parameters
|
|
if "temperature" in kwargs:
|
|
body["temperature"] = kwargs["temperature"]
|
|
|
|
if "top_p" in kwargs:
|
|
body["top_p"] = kwargs["top_p"]
|
|
|
|
if "stop_sequences" in kwargs:
|
|
body["stop_sequences"] = kwargs["stop_sequences"]
|
|
|
|
return body, headers
|
|
|
|
def _convert_messages(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
"""Convert messages to Anthropic format
|
|
|
|
Anthropic message format:
|
|
- role: user, assistant
|
|
- content: str or List[Dict]
|
|
|
|
Args:
|
|
messages: OpenAI-format message list
|
|
|
|
Returns:
|
|
Anthropic-format message list
|
|
"""
|
|
result = []
|
|
|
|
for msg in messages:
|
|
role = msg.get("role")
|
|
content = msg.get("content", "")
|
|
|
|
# Skip system messages (handled separately)
|
|
if role == "system":
|
|
continue
|
|
|
|
# Process content
|
|
if isinstance(content, str):
|
|
anthropic_content = content
|
|
elif isinstance(content, dict):
|
|
anthropic_content = content.get("text", "")
|
|
else:
|
|
anthropic_content = str(content)
|
|
|
|
# Anthropic doesn't support tool role as message
|
|
if role == "tool":
|
|
# Tool results passed via tool_use block
|
|
continue
|
|
|
|
result.append({
|
|
"role": role,
|
|
"content": anthropic_content
|
|
})
|
|
|
|
return result
|
|
|
|
def _convert_tools(self, tools: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
"""Convert tool definitions to Anthropic format
|
|
|
|
Anthropic tool format:
|
|
{
|
|
"name": "function_name",
|
|
"description": "...",
|
|
"input_schema": {...} # JSON Schema
|
|
}
|
|
|
|
Args:
|
|
tools: OpenAI-format tool list
|
|
|
|
Returns:
|
|
Anthropic-format tool list
|
|
"""
|
|
result = []
|
|
|
|
for tool in tools:
|
|
func = tool.get("function", {})
|
|
result.append({
|
|
"name": func.get("name"),
|
|
"description": func.get("description", ""),
|
|
"input_schema": func.get("parameters", {"type": "object", "properties": {}})
|
|
})
|
|
|
|
return result
|
|
|
|
async def parse_stream_chunk(
|
|
self,
|
|
raw_chunk: str
|
|
) -> AsyncGenerator[ParsedDelta, None]:
|
|
"""Parse Anthropic-format SSE stream
|
|
|
|
Anthropic streaming events:
|
|
- message_start: Message start
|
|
- content_block_start: Content block start (thinking/text/tool_use)
|
|
- content_block_delta: Content block delta
|
|
- content_block_stop: Content block stop
|
|
- message_delta: Message delta (usage)
|
|
- message_stop: Message completely stopped
|
|
- error: Error
|
|
|
|
Args:
|
|
raw_chunk: Raw SSE line
|
|
|
|
Yields:
|
|
ParsedDelta objects
|
|
"""
|
|
# Reset buffers
|
|
self._reset_buffers()
|
|
|
|
try:
|
|
chunk = json.loads(raw_chunk.strip())
|
|
except json.JSONDecodeError:
|
|
return
|
|
|
|
chunk_type = chunk.get("type", "")
|
|
result = ParsedDelta()
|
|
|
|
if chunk_type == self.BLOCK_MESSAGE_START:
|
|
# Message start
|
|
pass
|
|
|
|
elif chunk_type == self.BLOCK_CONTENT_BLOCK_START:
|
|
# Content block start
|
|
block = chunk.get("content_block", {})
|
|
block_type = block.get("type")
|
|
index = chunk.get("index", 0)
|
|
|
|
if block_type == self.SUBTYPE_THINKING:
|
|
# Thinking block start
|
|
thinking_text = block.get("thinking", {}).get("thinking", "")
|
|
self._thinking_buffer = thinking_text
|
|
result.thinking = self._thinking_buffer
|
|
|
|
elif block_type == self.SUBTYPE_TOOL_USE:
|
|
# Tool use block start
|
|
self._current_tool_index = index
|
|
self._current_tool_name = block.get("name", "")
|
|
self._tool_args_buffer = ""
|
|
|
|
elif chunk_type == self.BLOCK_CONTENT_BLOCK_DELTA:
|
|
# Content block delta
|
|
delta = chunk.get("delta", {})
|
|
delta_type = delta.get("type", "")
|
|
|
|
if delta_type == self.DELTA_THINKING:
|
|
# Thinking delta
|
|
thinking = delta.get("thinking", "")
|
|
self._thinking_buffer += thinking
|
|
result.thinking = self._thinking_buffer
|
|
|
|
elif delta_type == self.DELTA_TEXT:
|
|
# Text delta
|
|
text = delta.get("text", "")
|
|
self._text_buffer += text
|
|
result.text = self._text_buffer
|
|
|
|
elif delta_type == self.DELTA_INPUT_JSON:
|
|
# Tool arguments delta - accumulate but don't return in result
|
|
partial_json = delta.get("partial_json", "")
|
|
self._tool_args_buffer += partial_json
|
|
|
|
elif chunk_type == self.BLOCK_CONTENT_BLOCK_STOP:
|
|
# Content block stop
|
|
pass
|
|
|
|
elif chunk_type == self.BLOCK_MESSAGE_DELTA:
|
|
# Message delta (usually contains usage)
|
|
delta = chunk.get("delta", {})
|
|
usage = chunk.get("usage", {})
|
|
|
|
self._usage = {
|
|
"prompt_tokens": usage.get("input_tokens", 0),
|
|
"completion_tokens": usage.get("output_tokens", 0),
|
|
"total_tokens": usage.get("input_tokens", 0) + usage.get("output_tokens", 0)
|
|
}
|
|
result.usage = self._usage
|
|
|
|
# Check if complete by stop reason
|
|
if delta.get("stop_reason"):
|
|
result.is_complete = True
|
|
|
|
elif chunk_type == self.BLOCK_MESSAGE_STOP:
|
|
# Message completely stopped
|
|
result.is_complete = True
|
|
|
|
elif chunk_type == self.BLOCK_ERROR:
|
|
# Error
|
|
error = chunk.get("error", {})
|
|
error_msg = error.get("type", "") + ": " + error.get("message", "")
|
|
logger.error(f"Anthropic API error: {error_msg}")
|
|
yield ParsedDelta()
|
|
return
|
|
|
|
yield result
|
|
|
|
def parse_response(
|
|
self,
|
|
data: Dict[str, Any]
|
|
) -> LLMResponse:
|
|
"""Parse Anthropic-format non-streaming response
|
|
|
|
Anthropic response format:
|
|
{
|
|
"id": "...",
|
|
"type": "message",
|
|
"role": "assistant",
|
|
"content": [
|
|
{"type": "text", "text": "..."},
|
|
{"type": "thinking", "thinking": "..."},
|
|
{"type": "tool_use", "id": "...", "name": "...", "input": {...}}
|
|
],
|
|
"model": "...",
|
|
"usage": {"input_tokens": ..., "output_tokens": ...}
|
|
}
|
|
|
|
Args:
|
|
data: API response data
|
|
|
|
Returns:
|
|
LLMResponse object
|
|
"""
|
|
contents = data.get("content", [])
|
|
|
|
text_parts = []
|
|
thinking = ""
|
|
tool_calls = []
|
|
|
|
for block in contents:
|
|
block_type = block.get("type")
|
|
|
|
if block_type == self.SUBTYPE_TEXT:
|
|
text_parts.append(block.get("text", ""))
|
|
|
|
elif block_type == self.SUBTYPE_THINKING:
|
|
thinking = block.get("thinking", "")
|
|
|
|
elif block_type == self.SUBTYPE_TOOL_USE:
|
|
tool_calls.append({
|
|
"id": block.get("id"),
|
|
"type": "function",
|
|
"function": {
|
|
"name": block.get("name"),
|
|
"arguments": json.dumps(block.get("input", {}))
|
|
}
|
|
})
|
|
|
|
usage = data.get("usage", {})
|
|
|
|
return LLMResponse(
|
|
content="\n".join(text_parts),
|
|
thinking=thinking,
|
|
tool_calls=tool_calls if tool_calls else None,
|
|
usage={
|
|
"prompt_tokens": usage.get("input_tokens", 0),
|
|
"completion_tokens": usage.get("output_tokens", 0),
|
|
"total_tokens": usage.get("input_tokens", 0) + usage.get("output_tokens", 0)
|
|
}
|
|
)
|
|
|
|
def supports_thinking(self) -> bool:
|
|
return True
|
|
|
|
def supports_tools(self) -> bool:
|
|
return True
|
|
|
|
def _reset_buffers(self):
|
|
"""Reset buffers (call when starting new message)"""
|
|
self._thinking_buffer = ""
|
|
self._text_buffer = ""
|
|
self._tool_args_buffer = ""
|
|
self._current_tool_index = -1
|
|
self._current_tool_name = ""
|
|
self._usage = {}
|