chore: initial public snapshot for github upload

This commit is contained in:
Your Name
2026-03-26 20:06:14 +08:00
commit 0e5ecd930e
3497 changed files with 1586236 additions and 0 deletions

View File

@@ -0,0 +1,15 @@
from typing import Type, Union
from .batches.transformation import AnthropicBatchesConfig
from .chat.transformation import AnthropicConfig
__all__ = ["AnthropicBatchesConfig", "AnthropicConfig"]
def get_anthropic_config(
url_route: str,
) -> Union[Type[AnthropicBatchesConfig], Type[AnthropicConfig]]:
if "messages/batches" in url_route and "results" in url_route:
return AnthropicBatchesConfig
else:
return AnthropicConfig

View File

@@ -0,0 +1,4 @@
from .handler import AnthropicBatchesHandler
from .transformation import AnthropicBatchesConfig
__all__ = ["AnthropicBatchesHandler", "AnthropicBatchesConfig"]

View File

@@ -0,0 +1,167 @@
"""
Anthropic Batches API Handler
"""
import asyncio
from typing import TYPE_CHECKING, Any, Coroutine, Optional, Union
import httpx
from litellm.llms.custom_httpx.http_handler import (
get_async_httpx_client,
)
from litellm.types.utils import LiteLLMBatch, LlmProviders
if TYPE_CHECKING:
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
else:
LiteLLMLoggingObj = Any
from ..common_utils import AnthropicModelInfo
from .transformation import AnthropicBatchesConfig
class AnthropicBatchesHandler:
"""
Handler for Anthropic Message Batches API.
Supports:
- retrieve_batch() - Retrieve batch status and information
"""
def __init__(self):
self.anthropic_model_info = AnthropicModelInfo()
self.provider_config = AnthropicBatchesConfig()
async def aretrieve_batch(
self,
batch_id: str,
api_base: Optional[str],
api_key: Optional[str],
timeout: Union[float, httpx.Timeout],
max_retries: Optional[int],
logging_obj: Optional[LiteLLMLoggingObj] = None,
) -> LiteLLMBatch:
"""
Async: Retrieve a batch from Anthropic.
Args:
batch_id: The batch ID to retrieve
api_base: Anthropic API base URL
api_key: Anthropic API key
timeout: Request timeout
max_retries: Max retry attempts (unused for now)
logging_obj: Optional logging object
Returns:
LiteLLMBatch: Batch information in OpenAI format
"""
# Resolve API credentials
api_base = api_base or self.anthropic_model_info.get_api_base(api_base)
api_key = api_key or self.anthropic_model_info.get_api_key()
if not api_key:
raise ValueError("Missing Anthropic API Key")
# Create a minimal logging object if not provided
if logging_obj is None:
from litellm.litellm_core_utils.litellm_logging import (
Logging as LiteLLMLoggingObjClass,
)
logging_obj = LiteLLMLoggingObjClass(
model="anthropic/unknown",
messages=[],
stream=False,
call_type="batch_retrieve",
start_time=None,
litellm_call_id=f"batch_retrieve_{batch_id}",
function_id="batch_retrieve",
)
# Get the complete URL for batch retrieval
retrieve_url = self.provider_config.get_retrieve_batch_url(
api_base=api_base,
batch_id=batch_id,
optional_params={},
litellm_params={},
)
# Validate environment and get headers
headers = self.provider_config.validate_environment(
headers={},
model="",
messages=[],
optional_params={},
litellm_params={},
api_key=api_key,
api_base=api_base,
)
logging_obj.pre_call(
input=batch_id,
api_key=api_key,
additional_args={
"api_base": retrieve_url,
"headers": headers,
"complete_input_dict": {},
},
)
# Make the request
async_client = get_async_httpx_client(llm_provider=LlmProviders.ANTHROPIC)
response = await async_client.get(url=retrieve_url, headers=headers)
response.raise_for_status()
# Transform response to LiteLLM format
return self.provider_config.transform_retrieve_batch_response(
model=None,
raw_response=response,
logging_obj=logging_obj,
litellm_params={},
)
def retrieve_batch(
self,
_is_async: bool,
batch_id: str,
api_base: Optional[str],
api_key: Optional[str],
timeout: Union[float, httpx.Timeout],
max_retries: Optional[int],
logging_obj: Optional[LiteLLMLoggingObj] = None,
) -> Union[LiteLLMBatch, Coroutine[Any, Any, LiteLLMBatch]]:
"""
Retrieve a batch from Anthropic.
Args:
_is_async: Whether to run asynchronously
batch_id: The batch ID to retrieve
api_base: Anthropic API base URL
api_key: Anthropic API key
timeout: Request timeout
max_retries: Max retry attempts (unused for now)
logging_obj: Optional logging object
Returns:
LiteLLMBatch or Coroutine: Batch information in OpenAI format
"""
if _is_async:
return self.aretrieve_batch(
batch_id=batch_id,
api_base=api_base,
api_key=api_key,
timeout=timeout,
max_retries=max_retries,
logging_obj=logging_obj,
)
else:
return asyncio.run(
self.aretrieve_batch(
batch_id=batch_id,
api_base=api_base,
api_key=api_key,
timeout=timeout,
max_retries=max_retries,
logging_obj=logging_obj,
)
)

View File

@@ -0,0 +1,312 @@
import json
import time
from typing import TYPE_CHECKING, Any, Dict, List, Literal, Optional, Union, cast
import httpx
from httpx import Headers, Response
from litellm.llms.base_llm.batches.transformation import BaseBatchesConfig
from litellm.llms.base_llm.chat.transformation import BaseLLMException
from litellm.types.llms.openai import AllMessageValues, CreateBatchRequest
from litellm.types.utils import LiteLLMBatch, LlmProviders, ModelResponse
if TYPE_CHECKING:
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
LoggingClass = LiteLLMLoggingObj
else:
LoggingClass = Any
class AnthropicBatchesConfig(BaseBatchesConfig):
def __init__(self):
from ..chat.transformation import AnthropicConfig
from ..common_utils import AnthropicModelInfo
self.anthropic_chat_config = AnthropicConfig() # initialize once
self.anthropic_model_info = AnthropicModelInfo()
@property
def custom_llm_provider(self) -> LlmProviders:
"""Return the LLM provider type for this configuration."""
return LlmProviders.ANTHROPIC
def validate_environment(
self,
headers: dict,
model: str,
messages: List[AllMessageValues],
optional_params: dict,
litellm_params: dict,
api_key: Optional[str] = None,
api_base: Optional[str] = None,
) -> dict:
"""Validate and prepare environment-specific headers and parameters."""
# Resolve api_key from environment if not provided
api_key = api_key or self.anthropic_model_info.get_api_key()
if api_key is None:
raise ValueError(
"Missing Anthropic API Key - A call is being made to anthropic but no key is set either in the environment variables or via params"
)
_headers = {
"accept": "application/json",
"anthropic-version": "2023-06-01",
"content-type": "application/json",
"x-api-key": api_key,
}
# Add beta header for message batches
if "anthropic-beta" not in headers:
headers["anthropic-beta"] = "message-batches-2024-09-24"
headers.update(_headers)
return headers
def get_complete_batch_url(
self,
api_base: Optional[str],
api_key: Optional[str],
model: str,
optional_params: Dict,
litellm_params: Dict,
data: CreateBatchRequest,
) -> str:
"""Get the complete URL for batch creation request."""
api_base = api_base or self.anthropic_model_info.get_api_base(api_base)
if not api_base.endswith("/v1/messages/batches"):
api_base = f"{api_base.rstrip('/')}/v1/messages/batches"
return api_base
def transform_create_batch_request(
self,
model: str,
create_batch_data: CreateBatchRequest,
optional_params: dict,
litellm_params: dict,
) -> Union[bytes, str, Dict[str, Any]]:
"""
Transform the batch creation request to Anthropic format.
Not currently implemented - placeholder to satisfy abstract base class.
"""
raise NotImplementedError("Batch creation not yet implemented for Anthropic")
def transform_create_batch_response(
self,
model: Optional[str],
raw_response: httpx.Response,
logging_obj: LoggingClass,
litellm_params: dict,
) -> LiteLLMBatch:
"""
Transform Anthropic MessageBatch creation response to LiteLLM format.
Not currently implemented - placeholder to satisfy abstract base class.
"""
raise NotImplementedError("Batch creation not yet implemented for Anthropic")
def get_retrieve_batch_url(
self,
api_base: Optional[str],
batch_id: str,
optional_params: Dict,
litellm_params: Dict,
) -> str:
"""
Get the complete URL for batch retrieval request.
Args:
api_base: Base API URL (optional, will use default if not provided)
batch_id: Batch ID to retrieve
optional_params: Optional parameters
litellm_params: LiteLLM parameters
Returns:
Complete URL for Anthropic batch retrieval: {api_base}/v1/messages/batches/{batch_id}
"""
api_base = api_base or self.anthropic_model_info.get_api_base(api_base)
return f"{api_base.rstrip('/')}/v1/messages/batches/{batch_id}"
def transform_retrieve_batch_request(
self,
batch_id: str,
optional_params: dict,
litellm_params: dict,
) -> Union[bytes, str, Dict[str, Any]]:
"""
Transform batch retrieval request for Anthropic.
For Anthropic, the URL is constructed by get_retrieve_batch_url(),
so this method returns an empty dict (no additional request params needed).
"""
# No additional request params needed - URL is handled by get_retrieve_batch_url
return {}
def transform_retrieve_batch_response(
self,
model: Optional[str],
raw_response: httpx.Response,
logging_obj: LoggingClass,
litellm_params: dict,
) -> LiteLLMBatch:
"""Transform Anthropic MessageBatch retrieval response to LiteLLM format."""
try:
response_data = raw_response.json()
except Exception as e:
raise ValueError(f"Failed to parse Anthropic batch response: {e}")
# Map Anthropic MessageBatch to OpenAI Batch format
batch_id = response_data.get("id", "")
processing_status = response_data.get("processing_status", "in_progress")
# Map Anthropic processing_status to OpenAI status
status_mapping: Dict[
str,
Literal[
"validating",
"failed",
"in_progress",
"finalizing",
"completed",
"expired",
"cancelling",
"cancelled",
],
] = {
"in_progress": "in_progress",
"canceling": "cancelling",
"ended": "completed",
}
openai_status = status_mapping.get(processing_status, "in_progress")
# Parse timestamps
def parse_timestamp(ts_str: Optional[str]) -> Optional[int]:
if not ts_str:
return None
try:
from datetime import datetime
dt = datetime.fromisoformat(ts_str.replace("Z", "+00:00"))
return int(dt.timestamp())
except Exception:
return None
created_at = parse_timestamp(response_data.get("created_at"))
ended_at = parse_timestamp(response_data.get("ended_at"))
expires_at = parse_timestamp(response_data.get("expires_at"))
cancel_initiated_at = parse_timestamp(response_data.get("cancel_initiated_at"))
archived_at = parse_timestamp(response_data.get("archived_at"))
# Extract request counts
request_counts_data = response_data.get("request_counts", {})
from openai.types.batch import BatchRequestCounts
request_counts = BatchRequestCounts(
total=sum(
[
request_counts_data.get("processing", 0),
request_counts_data.get("succeeded", 0),
request_counts_data.get("errored", 0),
request_counts_data.get("canceled", 0),
request_counts_data.get("expired", 0),
]
),
completed=request_counts_data.get("succeeded", 0),
failed=request_counts_data.get("errored", 0),
)
return LiteLLMBatch(
id=batch_id,
object="batch",
endpoint="/v1/messages",
errors=None,
input_file_id="None",
completion_window="24h",
status=openai_status,
output_file_id=batch_id,
error_file_id=None,
created_at=created_at or int(time.time()),
in_progress_at=created_at if processing_status == "in_progress" else None,
expires_at=expires_at,
finalizing_at=None,
completed_at=ended_at if processing_status == "ended" else None,
failed_at=None,
expired_at=archived_at if archived_at else None,
cancelling_at=cancel_initiated_at
if processing_status == "canceling"
else None,
cancelled_at=ended_at
if processing_status == "canceling" and ended_at
else None,
request_counts=request_counts,
metadata={},
)
def get_error_class(
self, error_message: str, status_code: int, headers: Union[Dict, Headers]
) -> "BaseLLMException":
"""Get the appropriate error class for Anthropic."""
from ..common_utils import AnthropicError
# Convert Dict to Headers if needed
if isinstance(headers, dict):
headers_obj: Optional[Headers] = Headers(headers)
else:
headers_obj = headers if isinstance(headers, Headers) else None
return AnthropicError(
status_code=status_code, message=error_message, headers=headers_obj
)
def transform_response(
self,
model: str,
raw_response: Response,
model_response: ModelResponse,
logging_obj: LoggingClass,
request_data: Dict,
messages: List[AllMessageValues],
optional_params: Dict,
litellm_params: dict,
encoding: Any,
api_key: Optional[str] = None,
json_mode: Optional[bool] = None,
) -> ModelResponse:
from litellm.cost_calculator import BaseTokenUsageProcessor
from litellm.types.utils import Usage
response_text = raw_response.text.strip()
all_usage: List[Usage] = []
try:
# Split by newlines and try to parse each line as JSON
lines = response_text.split("\n")
for line in lines:
line = line.strip()
if not line:
continue
try:
response_json = json.loads(line)
# Update model_response with the parsed JSON
completion_response = response_json["result"]["message"]
transformed_response = (
self.anthropic_chat_config.transform_parsed_response(
completion_response=completion_response,
raw_response=raw_response,
model_response=model_response,
)
)
transformed_response_usage = getattr(
transformed_response, "usage", None
)
if transformed_response_usage:
all_usage.append(cast(Usage, transformed_response_usage))
except json.JSONDecodeError:
continue
## SUM ALL USAGE
combined_usage = BaseTokenUsageProcessor.combine_usage_objects(all_usage)
setattr(model_response, "usage", combined_usage)
return model_response
except Exception as e:
raise e

View File

@@ -0,0 +1 @@
from .handler import AnthropicChatCompletion, ModelResponseIterator

View File

@@ -0,0 +1,10 @@
from litellm.llms.anthropic.chat.guardrail_translation.handler import (
AnthropicMessagesHandler,
)
from litellm.types.utils import CallTypes
guardrail_translation_mappings = {
CallTypes.anthropic_messages: AnthropicMessagesHandler,
}
__all__ = ["guardrail_translation_mappings"]

View File

@@ -0,0 +1,688 @@
"""
Anthropic Message Handler for Unified Guardrails
This module provides a class-based handler for Anthropic-format messages.
The class methods can be overridden for custom behavior.
Pattern Overview:
-----------------
1. Extract text content from messages/responses (both string and list formats)
2. Create async tasks to apply guardrails to each text segment
3. Track mappings to know where each response belongs
4. Apply guardrail responses back to the original structure
"""
import json
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, cast
from litellm._logging import verbose_proxy_logger
from litellm.llms.anthropic.chat.transformation import AnthropicConfig
from litellm.llms.anthropic.experimental_pass_through.adapters.transformation import (
LiteLLMAnthropicMessagesAdapter,
)
from litellm.llms.base_llm.guardrail_translation.base_translation import BaseTranslation
from litellm.proxy.pass_through_endpoints.llm_provider_handlers.anthropic_passthrough_logging_handler import (
AnthropicPassthroughLoggingHandler,
)
from litellm.types.llms.anthropic import (
AllAnthropicToolsValues,
AnthropicMessagesRequest,
)
from litellm.types.llms.openai import (
ChatCompletionToolCallChunk,
ChatCompletionToolParam,
)
from litellm.types.utils import (
ChatCompletionMessageToolCall,
Choices,
GenericGuardrailAPIInputs,
ModelResponse,
)
if TYPE_CHECKING:
from litellm.integrations.custom_guardrail import CustomGuardrail
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
from litellm.types.llms.anthropic_messages.anthropic_response import (
AnthropicMessagesResponse,
)
class AnthropicMessagesHandler(BaseTranslation):
"""
Handler for processing Anthropic messages with guardrails.
This class provides methods to:
1. Process input messages (pre-call hook)
2. Process output responses (post-call hook)
Methods can be overridden to customize behavior for different message formats.
"""
def __init__(self):
super().__init__()
self.adapter = LiteLLMAnthropicMessagesAdapter()
async def process_input_messages(
self,
data: dict,
guardrail_to_apply: "CustomGuardrail",
litellm_logging_obj: Optional[Any] = None,
) -> Any:
"""
Process input messages by applying guardrails to text content.
"""
messages = data.get("messages")
if messages is None:
return data
(
chat_completion_compatible_request,
_tool_name_mapping,
) = LiteLLMAnthropicMessagesAdapter().translate_anthropic_to_openai(
# Use a shallow copy to avoid mutating request data (pop on litellm_metadata).
anthropic_message_request=cast(AnthropicMessagesRequest, data.copy())
)
structured_messages = chat_completion_compatible_request.get("messages", [])
texts_to_check: List[str] = []
images_to_check: List[str] = []
tools_to_check: List[
ChatCompletionToolParam
] = chat_completion_compatible_request.get("tools", [])
task_mappings: List[Tuple[int, Optional[int]]] = []
# Track (message_index, content_index) for each text
# content_index is None for string content, int for list content
# Step 1: Extract all text content and images
for msg_idx, message in enumerate(messages):
self._extract_input_text_and_images(
message=message,
msg_idx=msg_idx,
texts_to_check=texts_to_check,
images_to_check=images_to_check,
task_mappings=task_mappings,
)
# Step 2: Apply guardrail to all texts in batch
if texts_to_check:
inputs = GenericGuardrailAPIInputs(texts=texts_to_check)
if images_to_check:
inputs["images"] = images_to_check
if tools_to_check:
inputs["tools"] = tools_to_check
if structured_messages:
inputs["structured_messages"] = structured_messages
# Include model information if available
model = data.get("model")
if model:
inputs["model"] = model
guardrailed_inputs = await guardrail_to_apply.apply_guardrail(
inputs=inputs,
request_data=data,
input_type="request",
logging_obj=litellm_logging_obj,
)
guardrailed_texts = guardrailed_inputs.get("texts", [])
guardrailed_tools = guardrailed_inputs.get("tools")
if guardrailed_tools is not None:
data["tools"] = guardrailed_tools
# Step 3: Map guardrail responses back to original message structure
await self._apply_guardrail_responses_to_input(
messages=messages,
responses=guardrailed_texts,
task_mappings=task_mappings,
)
verbose_proxy_logger.debug(
"Anthropic Messages: Processed input messages: %s", messages
)
return data
def extract_request_tool_names(self, data: dict) -> List[str]:
"""Extract tool names from Anthropic messages request (tools[].name)."""
names: List[str] = []
for tool in data.get("tools") or []:
if isinstance(tool, dict) and tool.get("name"):
names.append(str(tool["name"]))
return names
def _extract_input_text_and_images(
self,
message: Dict[str, Any],
msg_idx: int,
texts_to_check: List[str],
images_to_check: List[str],
task_mappings: List[Tuple[int, Optional[int]]],
) -> None:
"""
Extract text content and images from a message.
Override this method to customize text/image extraction logic.
"""
content = message.get("content", None)
tools = message.get("tools", None)
if content is None and tools is None:
return
## CHECK FOR TEXT + IMAGES
if content is not None and isinstance(content, str):
# Simple string content
texts_to_check.append(content)
task_mappings.append((msg_idx, None))
elif content is not None and isinstance(content, list):
# List content (e.g., multimodal with text and images)
for content_idx, content_item in enumerate(content):
# Extract text
text_str = content_item.get("text", None)
if text_str is not None:
texts_to_check.append(text_str)
task_mappings.append((msg_idx, int(content_idx)))
# Extract images
if content_item.get("type") == "image":
source = content_item.get("source", {})
if isinstance(source, dict):
# Could be base64 or url
data = source.get("data")
if data:
images_to_check.append(data)
def _extract_input_tools(
self,
tools: List[Dict[str, Any]],
tools_to_check: List[ChatCompletionToolParam],
) -> None:
"""
Extract tools from a message.
"""
## CHECK FOR TOOLS
if tools is not None and isinstance(tools, list):
# TRANSFORM ANTHROPIC TOOLS TO OPENAI TOOLS
openai_tools = self.adapter.translate_anthropic_tools_to_openai(
tools=cast(List[AllAnthropicToolsValues], tools)
)
tools_to_check.extend(openai_tools) # type: ignore
async def _apply_guardrail_responses_to_input(
self,
messages: List[Dict[str, Any]],
responses: List[str],
task_mappings: List[Tuple[int, Optional[int]]],
) -> None:
"""
Apply guardrail responses back to input messages.
Override this method to customize how responses are applied.
"""
for task_idx, guardrail_response in enumerate(responses):
mapping = task_mappings[task_idx]
msg_idx = cast(int, mapping[0])
content_idx_optional = cast(Optional[int], mapping[1])
content = messages[msg_idx].get("content", None)
if content is None:
continue
if isinstance(content, str) and content_idx_optional is None:
# Replace string content with guardrail response
messages[msg_idx]["content"] = guardrail_response
elif isinstance(content, list) and content_idx_optional is not None:
# Replace specific text item in list content
messages[msg_idx]["content"][content_idx_optional][
"text"
] = guardrail_response
async def process_output_response(
self,
response: "AnthropicMessagesResponse",
guardrail_to_apply: "CustomGuardrail",
litellm_logging_obj: Optional[Any] = None,
user_api_key_dict: Optional[Any] = None,
) -> Any:
"""
Process output response by applying guardrails to text content and tool calls.
Args:
response: Anthropic MessagesResponse object
guardrail_to_apply: The guardrail instance to apply
litellm_logging_obj: Optional logging object
user_api_key_dict: User API key metadata to pass to guardrails
Returns:
Modified response with guardrail applied to content
Response Format Support:
- List content: response.content = [
{"type": "text", "text": "text here"},
{"type": "tool_use", "id": "...", "name": "...", "input": {...}},
...
]
"""
texts_to_check: List[str] = []
images_to_check: List[str] = []
tool_calls_to_check: List[ChatCompletionToolCallChunk] = []
task_mappings: List[Tuple[int, Optional[int]]] = []
# Track (content_index, None) for each text
# Handle both dict and object responses
response_content: List[Any] = []
if isinstance(response, dict):
response_content = response.get("content", []) or []
elif hasattr(response, "content"):
content = getattr(response, "content", None)
response_content = content or []
else:
response_content = []
if not response_content:
return response
# Step 1: Extract all text content and tool calls from response
for content_idx, content_block in enumerate(response_content):
# Handle both dict and Pydantic object content blocks
block_dict: Dict[str, Any] = {}
if isinstance(content_block, dict):
block_type = content_block.get("type")
block_dict = cast(Dict[str, Any], content_block)
elif hasattr(content_block, "type"):
block_type = getattr(content_block, "type", None)
# Convert Pydantic object to dict for processing
if hasattr(content_block, "model_dump"):
block_dict = content_block.model_dump()
else:
block_dict = {
"type": block_type,
"text": getattr(content_block, "text", None),
}
else:
continue
if block_type in ["text", "tool_use"]:
self._extract_output_text_and_images(
content_block=block_dict,
content_idx=content_idx,
texts_to_check=texts_to_check,
images_to_check=images_to_check,
task_mappings=task_mappings,
tool_calls_to_check=tool_calls_to_check,
)
# Step 2: Apply guardrail to all texts in batch
if texts_to_check or tool_calls_to_check:
# Create a request_data dict with response info and user API key metadata
request_data: dict = {"response": response}
# Add user API key metadata with prefixed keys
user_metadata = self.transform_user_api_key_dict_to_metadata(
user_api_key_dict
)
if user_metadata:
request_data["litellm_metadata"] = user_metadata
inputs = GenericGuardrailAPIInputs(texts=texts_to_check)
if images_to_check:
inputs["images"] = images_to_check
if tool_calls_to_check:
inputs["tool_calls"] = tool_calls_to_check
# Include model information from the response if available
response_model = None
if isinstance(response, dict):
response_model = response.get("model")
elif hasattr(response, "model"):
response_model = getattr(response, "model", None)
if response_model:
inputs["model"] = response_model
guardrailed_inputs = await guardrail_to_apply.apply_guardrail(
inputs=inputs,
request_data=request_data,
input_type="response",
logging_obj=litellm_logging_obj,
)
guardrailed_texts = guardrailed_inputs.get("texts", [])
# Step 3: Map guardrail responses back to original response structure
await self._apply_guardrail_responses_to_output(
response=response,
responses=guardrailed_texts,
task_mappings=task_mappings,
)
verbose_proxy_logger.debug(
"Anthropic Messages: Processed output response: %s", response
)
return response
async def process_output_streaming_response(
self,
responses_so_far: List[Any],
guardrail_to_apply: "CustomGuardrail",
litellm_logging_obj: Optional[Any] = None,
user_api_key_dict: Optional[Any] = None,
) -> List[Any]:
"""
Process output streaming response by applying guardrails to text content.
Get the string so far, check the apply guardrail to the string so far, and return the list of responses so far.
"""
has_ended = self._check_streaming_has_ended(responses_so_far)
if has_ended:
# build the model response from the responses_so_far
built_response = (
AnthropicPassthroughLoggingHandler._build_complete_streaming_response(
all_chunks=responses_so_far,
litellm_logging_obj=cast("LiteLLMLoggingObj", litellm_logging_obj),
model="",
)
)
# Check if model_response is valid and has choices before accessing
if (
built_response is not None
and hasattr(built_response, "choices")
and built_response.choices
):
model_response = cast(ModelResponse, built_response)
first_choice = cast(Choices, model_response.choices[0])
tool_calls_list = cast(
Optional[List[ChatCompletionMessageToolCall]],
first_choice.message.tool_calls,
)
string_so_far = first_choice.message.content
guardrail_inputs = GenericGuardrailAPIInputs()
if string_so_far:
guardrail_inputs["texts"] = [string_so_far]
if tool_calls_list:
guardrail_inputs["tool_calls"] = tool_calls_list
_guardrailed_inputs = await guardrail_to_apply.apply_guardrail( # allow rejecting the response, if invalid
inputs=guardrail_inputs,
request_data={},
input_type="response",
logging_obj=litellm_logging_obj,
)
else:
verbose_proxy_logger.debug(
"Skipping output guardrail - model response has no choices"
)
return responses_so_far
string_so_far = self.get_streaming_string_so_far(responses_so_far)
_guardrailed_inputs = await guardrail_to_apply.apply_guardrail( # allow rejecting the response, if invalid
inputs={"texts": [string_so_far]},
request_data={},
input_type="response",
logging_obj=litellm_logging_obj,
)
return responses_so_far
def get_streaming_string_so_far(self, responses_so_far: List[Any]) -> str:
"""
Parse streaming responses and extract accumulated text content.
Handles two formats:
1. Raw bytes in SSE (Server-Sent Events) format from Anthropic API
2. Parsed dict objects (for backwards compatibility)
SSE format example:
b'event: content_block_delta\\ndata: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" curious"}}\\n\\n'
Dict format example:
{
"type": "content_block_delta",
"index": 0,
"delta": {
"type": "text_delta",
"text": " curious"
}
}
"""
text_so_far = ""
for response in responses_so_far:
# Handle raw bytes in SSE format
if isinstance(response, bytes):
text_so_far += self._extract_text_from_sse(response)
# Handle already-parsed dict format
elif isinstance(response, dict):
delta = response.get("delta") if response.get("delta") else None
if delta and delta.get("type") == "text_delta":
text = delta.get("text", "")
if text:
text_so_far += text
return text_so_far
def _extract_text_from_sse(self, sse_bytes: bytes) -> str:
"""
Extract text content from Server-Sent Events (SSE) format.
Args:
sse_bytes: Raw bytes in SSE format
Returns:
Accumulated text from all content_block_delta events
"""
text = ""
try:
# Decode bytes to string
sse_string = sse_bytes.decode("utf-8")
# Split by double newline to get individual events
events = sse_string.split("\n\n")
for event in events:
if not event.strip():
continue
# Parse event lines
lines = event.strip().split("\n")
event_type = None
data_line = None
for line in lines:
if line.startswith("event:"):
event_type = line[6:].strip()
elif line.startswith("data:"):
data_line = line[5:].strip()
# Only process content_block_delta events
if event_type == "content_block_delta" and data_line:
try:
data = json.loads(data_line)
delta = data.get("delta", {})
if delta.get("type") == "text_delta":
text += delta.get("text", "")
except json.JSONDecodeError:
verbose_proxy_logger.warning(
f"Failed to parse JSON from SSE data: {data_line}"
)
except Exception as e:
verbose_proxy_logger.error(f"Error extracting text from SSE: {e}")
return text
def _check_streaming_has_ended(self, responses_so_far: List[Any]) -> bool:
"""
Check if streaming response has ended by looking for non-null stop_reason.
Handles two formats:
1. Raw bytes in SSE (Server-Sent Events) format from Anthropic API
2. Parsed dict objects (for backwards compatibility)
SSE format example:
b'event: message_delta\\ndata: {"type":"message_delta","delta":{"stop_reason":"tool_use","stop_sequence":null},...}\\n\\n'
Dict format example:
{
"type": "message_delta",
"delta": {
"stop_reason": "tool_use",
"stop_sequence": null
}
}
Returns:
True if stop_reason is set to a non-null value, indicating stream has ended
"""
for response in responses_so_far:
# Handle raw bytes in SSE format
if isinstance(response, bytes):
try:
# Decode bytes to string
sse_string = response.decode("utf-8")
# Split by double newline to get individual events
events = sse_string.split("\n\n")
for event in events:
if not event.strip():
continue
# Parse event lines
lines = event.strip().split("\n")
event_type = None
data_line = None
for line in lines:
if line.startswith("event:"):
event_type = line[6:].strip()
elif line.startswith("data:"):
data_line = line[5:].strip()
# Check for message_delta event with stop_reason
if event_type == "message_delta" and data_line:
try:
data = json.loads(data_line)
delta = data.get("delta", {})
stop_reason = delta.get("stop_reason")
if stop_reason is not None:
return True
except json.JSONDecodeError:
verbose_proxy_logger.warning(
f"Failed to parse JSON from SSE data: {data_line}"
)
except Exception as e:
verbose_proxy_logger.error(
f"Error checking streaming end in SSE: {e}"
)
# Handle already-parsed dict format
elif isinstance(response, dict):
if response.get("type") == "message_delta":
delta = response.get("delta", {})
stop_reason = delta.get("stop_reason")
if stop_reason is not None:
return True
return False
def _has_text_content(self, response: "AnthropicMessagesResponse") -> bool:
"""
Check if response has any text content to process.
Override this method to customize text content detection.
"""
if isinstance(response, dict):
response_content = response.get("content", [])
else:
response_content = getattr(response, "content", None) or []
if not response_content:
return False
for content_block in response_content:
# Check if this is a text block by checking the 'type' field
if isinstance(content_block, dict) and content_block.get("type") == "text":
content_text = content_block.get("text")
if content_text and isinstance(content_text, str):
return True
return False
def _extract_output_text_and_images(
self,
content_block: Dict[str, Any],
content_idx: int,
texts_to_check: List[str],
images_to_check: List[str],
task_mappings: List[Tuple[int, Optional[int]]],
tool_calls_to_check: Optional[List[ChatCompletionToolCallChunk]] = None,
) -> None:
"""
Extract text content, images, and tool calls from a response content block.
Override this method to customize text/image/tool extraction logic.
"""
content_type = content_block.get("type")
# Extract text content
if content_type == "text":
content_text = content_block.get("text")
if content_text and isinstance(content_text, str):
# Simple string content
texts_to_check.append(content_text)
task_mappings.append((content_idx, None))
# Extract tool calls
elif content_type == "tool_use":
tool_call = AnthropicConfig.convert_tool_use_to_openai_format(
anthropic_tool_content=content_block,
index=content_idx,
)
if tool_calls_to_check is None:
tool_calls_to_check = []
tool_calls_to_check.append(tool_call)
async def _apply_guardrail_responses_to_output(
self,
response: "AnthropicMessagesResponse",
responses: List[str],
task_mappings: List[Tuple[int, Optional[int]]],
) -> None:
"""
Apply guardrail responses back to output response.
Override this method to customize how responses are applied.
"""
for task_idx, guardrail_response in enumerate(responses):
mapping = task_mappings[task_idx]
content_idx = cast(int, mapping[0])
# Handle both dict and object responses
response_content: List[Any] = []
if isinstance(response, dict):
response_content = response.get("content", []) or []
elif hasattr(response, "content"):
content = getattr(response, "content", None)
response_content = content or []
else:
continue
if not response_content:
continue
# Get the content block at the index
if content_idx >= len(response_content):
continue
content_block = response_content[content_idx]
# Verify it's a text block and update the text field
# Handle both dict and Pydantic object content blocks
if isinstance(content_block, dict):
if content_block.get("type") == "text":
cast(Dict[str, Any], content_block)["text"] = guardrail_response
elif (
hasattr(content_block, "type")
and getattr(content_block, "type", None) == "text"
):
# Update Pydantic object's text attribute
if hasattr(content_block, "text"):
content_block.text = guardrail_response

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,627 @@
"""
This file contains common utils for anthropic calls.
"""
from typing import Dict, List, Optional, Union
import httpx
import litellm
from litellm.litellm_core_utils.prompt_templates.common_utils import (
get_file_ids_from_messages,
)
from litellm.llms.base_llm.base_utils import BaseLLMModelInfo, BaseTokenCounter
from litellm.llms.base_llm.chat.transformation import BaseLLMException
from litellm.types.llms.anthropic import (
ANTHROPIC_HOSTED_TOOLS,
ANTHROPIC_OAUTH_BETA_HEADER,
ANTHROPIC_OAUTH_TOKEN_PREFIX,
AllAnthropicToolsValues,
AnthropicMcpServerTool,
)
from litellm.types.llms.openai import AllMessageValues
def is_anthropic_oauth_key(value: Optional[str]) -> bool:
"""Check if a value contains an Anthropic OAuth token (sk-ant-oat*)."""
if value is None:
return False
# Handle both raw token and "Bearer <token>" format
if value.startswith("Bearer "):
value = value[7:]
return value.startswith(ANTHROPIC_OAUTH_TOKEN_PREFIX)
def _merge_beta_headers(existing: Optional[str], new_beta: str) -> str:
"""Merge a new beta value into an existing comma-separated anthropic-beta header."""
if not existing:
return new_beta
betas = {b.strip() for b in existing.split(",") if b.strip()}
betas.add(new_beta)
return ",".join(sorted(betas))
def optionally_handle_anthropic_oauth(
headers: dict, api_key: Optional[str]
) -> tuple[dict, Optional[str]]:
"""
Handle Anthropic OAuth token detection and header setup.
If an OAuth token is detected in the Authorization header, extracts it
and sets the required OAuth headers.
Args:
headers: Request headers dict
api_key: Current API key (may be None)
Returns:
Tuple of (updated headers, api_key)
"""
# Check Authorization header (passthrough / forwarded requests)
auth_header = headers.get("authorization", "")
if auth_header and auth_header.startswith(f"Bearer {ANTHROPIC_OAUTH_TOKEN_PREFIX}"):
api_key = auth_header.replace("Bearer ", "")
headers.pop("x-api-key", None)
headers["anthropic-beta"] = _merge_beta_headers(
headers.get("anthropic-beta"), ANTHROPIC_OAUTH_BETA_HEADER
)
headers["anthropic-dangerous-direct-browser-access"] = "true"
return headers, api_key
# Check api_key directly (standard chat/completion flow)
if api_key and api_key.startswith(ANTHROPIC_OAUTH_TOKEN_PREFIX):
headers.pop("x-api-key", None)
headers["authorization"] = f"Bearer {api_key}"
headers["anthropic-beta"] = _merge_beta_headers(
headers.get("anthropic-beta"), ANTHROPIC_OAUTH_BETA_HEADER
)
headers["anthropic-dangerous-direct-browser-access"] = "true"
return headers, api_key
class AnthropicError(BaseLLMException):
def __init__(
self,
status_code: int,
message,
headers: Optional[httpx.Headers] = None,
):
super().__init__(status_code=status_code, message=message, headers=headers)
class AnthropicModelInfo(BaseLLMModelInfo):
def is_cache_control_set(self, messages: List[AllMessageValues]) -> bool:
"""
Return if {"cache_control": ..} in message content block
Used to check if anthropic prompt caching headers need to be set.
"""
for message in messages:
if message.get("cache_control", None) is not None:
return True
_message_content = message.get("content")
if _message_content is not None and isinstance(_message_content, list):
for content in _message_content:
if "cache_control" in content:
return True
return False
def is_file_id_used(self, messages: List[AllMessageValues]) -> bool:
"""
Return if {"source": {"type": "file", "file_id": ..}} in message content block
"""
file_ids = get_file_ids_from_messages(messages)
return len(file_ids) > 0
def is_mcp_server_used(
self, mcp_servers: Optional[List[AnthropicMcpServerTool]]
) -> bool:
if mcp_servers is None:
return False
if mcp_servers:
return True
return False
def is_computer_tool_used(
self, tools: Optional[List[AllAnthropicToolsValues]]
) -> Optional[str]:
"""Returns the computer tool version if used, e.g. 'computer_20250124' or None"""
if tools is None:
return None
for tool in tools:
if "type" in tool and tool["type"].startswith("computer_"):
return tool["type"]
return None
def is_web_search_tool_used(
self, tools: Optional[List[AllAnthropicToolsValues]]
) -> bool:
"""Returns True if web_search tool is used"""
if tools is None:
return False
for tool in tools:
if "type" in tool and tool["type"].startswith(
ANTHROPIC_HOSTED_TOOLS.WEB_SEARCH.value
):
return True
return False
def is_pdf_used(self, messages: List[AllMessageValues]) -> bool:
"""
Set to true if media passed into messages.
"""
for message in messages:
if (
"content" in message
and message["content"] is not None
and isinstance(message["content"], list)
):
for content in message["content"]:
if "type" in content and content["type"] != "text":
return True
return False
def is_tool_search_used(self, tools: Optional[List]) -> bool:
"""
Check if tool search tools are present in the tools list.
"""
if not tools:
return False
for tool in tools:
tool_type = tool.get("type", "")
if tool_type in [
"tool_search_tool_regex_20251119",
"tool_search_tool_bm25_20251119",
]:
return True
return False
def is_programmatic_tool_calling_used(self, tools: Optional[List]) -> bool:
"""
Check if programmatic tool calling is being used (tools with allowed_callers field).
Returns True if any tool has allowed_callers containing 'code_execution_20250825'.
"""
if not tools:
return False
for tool in tools:
# Check top-level allowed_callers
allowed_callers = tool.get("allowed_callers", None)
if allowed_callers and isinstance(allowed_callers, list):
if "code_execution_20250825" in allowed_callers:
return True
# Check function.allowed_callers for OpenAI format tools
function = tool.get("function", {})
if isinstance(function, dict):
function_allowed_callers = function.get("allowed_callers", None)
if function_allowed_callers and isinstance(
function_allowed_callers, list
):
if "code_execution_20250825" in function_allowed_callers:
return True
return False
def is_input_examples_used(self, tools: Optional[List]) -> bool:
"""
Check if input_examples is being used in any tools.
Returns True if any tool has input_examples field.
"""
if not tools:
return False
for tool in tools:
# Check top-level input_examples
input_examples = tool.get("input_examples", None)
if (
input_examples
and isinstance(input_examples, list)
and len(input_examples) > 0
):
return True
# Check function.input_examples for OpenAI format tools
function = tool.get("function", {})
if isinstance(function, dict):
function_input_examples = function.get("input_examples", None)
if (
function_input_examples
and isinstance(function_input_examples, list)
and len(function_input_examples) > 0
):
return True
return False
@staticmethod
def _is_claude_4_6_model(model: str) -> bool:
"""Check if the model is a Claude 4.6 model (Opus 4.6 or Sonnet 4.6)."""
model_lower = model.lower()
return any(
v in model_lower
for v in (
"opus-4-6",
"opus_4_6",
"opus-4.6",
"opus_4.6",
"sonnet-4-6",
"sonnet_4_6",
"sonnet-4.6",
"sonnet_4.6",
)
)
def is_effort_used(
self, optional_params: Optional[dict], model: Optional[str] = None
) -> bool:
"""
Check if effort parameter is being used and requires a beta header.
Returns True if effort-related parameters are present and
the model requires the effort beta header. Claude 4.6 models
use output_config as a stable API feature — no beta header needed.
"""
if not optional_params:
return False
# Claude 4.6 models use output_config as a stable API feature — no beta header needed
if model and self._is_claude_4_6_model(model):
return False
# Check if reasoning_effort is provided for Claude Opus 4.5
if model and ("opus-4-5" in model.lower() or "opus_4_5" in model.lower()):
reasoning_effort = optional_params.get("reasoning_effort")
if reasoning_effort and isinstance(reasoning_effort, str):
return True
# Check if output_config is directly provided (for non-4.6 models)
output_config = optional_params.get("output_config")
if output_config and isinstance(output_config, dict):
effort = output_config.get("effort")
if effort and isinstance(effort, str):
return True
return False
def is_code_execution_tool_used(self, tools: Optional[List]) -> bool:
"""
Check if code execution tool is being used.
Returns True if any tool has type "code_execution_20250825".
"""
if not tools:
return False
for tool in tools:
tool_type = tool.get("type", "")
if tool_type == "code_execution_20250825":
return True
return False
def is_container_with_skills_used(self, optional_params: Optional[dict]) -> bool:
"""
Check if container with skills is being used.
Returns True if optional_params contains container with skills.
"""
if not optional_params:
return False
container = optional_params.get("container")
if container and isinstance(container, dict):
skills = container.get("skills")
if skills and isinstance(skills, list) and len(skills) > 0:
return True
return False
def _get_user_anthropic_beta_headers(
self, anthropic_beta_header: Optional[str]
) -> Optional[List[str]]:
if anthropic_beta_header is None:
return None
return anthropic_beta_header.split(",")
def get_computer_tool_beta_header(self, computer_tool_version: str) -> str:
"""
Get the appropriate beta header for a given computer tool version.
Args:
computer_tool_version: The computer tool version (e.g., 'computer_20250124', 'computer_20241022')
Returns:
The corresponding beta header string
"""
computer_tool_beta_mapping = {
"computer_20250124": "computer-use-2025-01-24",
"computer_20241022": "computer-use-2024-10-22",
}
return computer_tool_beta_mapping.get(
computer_tool_version, "computer-use-2024-10-22" # Default fallback
)
def get_anthropic_beta_list(
self,
model: str,
optional_params: Optional[dict] = None,
computer_tool_used: Optional[str] = None,
prompt_caching_set: bool = False,
file_id_used: bool = False,
mcp_server_used: bool = False,
) -> List[str]:
"""
Get list of common beta headers based on the features that are active.
Returns:
List of beta header strings
"""
from litellm.types.llms.anthropic import (
ANTHROPIC_EFFORT_BETA_HEADER,
)
betas = []
# Detect features
effort_used = self.is_effort_used(optional_params, model)
if effort_used:
betas.append(ANTHROPIC_EFFORT_BETA_HEADER) # effort-2025-11-24
if computer_tool_used:
beta_header = self.get_computer_tool_beta_header(computer_tool_used)
betas.append(beta_header)
# Anthropic no longer requires the prompt-caching beta header
# Prompt caching now works automatically when cache_control is used in messages
# Reference: https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching
if file_id_used:
betas.append("files-api-2025-04-14")
betas.append("code-execution-2025-05-22")
if mcp_server_used:
betas.append("mcp-client-2025-04-04")
return list(set(betas))
def get_anthropic_headers(
self,
api_key: str,
anthropic_version: Optional[str] = None,
computer_tool_used: Optional[str] = None,
prompt_caching_set: bool = False,
pdf_used: bool = False,
file_id_used: bool = False,
mcp_server_used: bool = False,
web_search_tool_used: bool = False,
tool_search_used: bool = False,
programmatic_tool_calling_used: bool = False,
input_examples_used: bool = False,
effort_used: bool = False,
is_vertex_request: bool = False,
user_anthropic_beta_headers: Optional[List[str]] = None,
code_execution_tool_used: bool = False,
container_with_skills_used: bool = False,
) -> dict:
betas = set()
# Anthropic no longer requires the prompt-caching beta header
# Prompt caching now works automatically when cache_control is used in messages
# Reference: https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching
if computer_tool_used:
beta_header = self.get_computer_tool_beta_header(computer_tool_used)
betas.add(beta_header)
# if pdf_used:
# betas.add("pdfs-2024-09-25")
if file_id_used:
betas.add("files-api-2025-04-14")
betas.add("code-execution-2025-05-22")
if mcp_server_used:
betas.add("mcp-client-2025-04-04")
# Tool search, programmatic tool calling, and input_examples all use the same beta header
if tool_search_used or programmatic_tool_calling_used or input_examples_used:
from litellm.types.llms.anthropic import ANTHROPIC_TOOL_SEARCH_BETA_HEADER
betas.add(ANTHROPIC_TOOL_SEARCH_BETA_HEADER)
# Effort parameter uses a separate beta header
if effort_used:
from litellm.types.llms.anthropic import ANTHROPIC_EFFORT_BETA_HEADER
betas.add(ANTHROPIC_EFFORT_BETA_HEADER)
# Code execution tool uses a separate beta header
if code_execution_tool_used:
betas.add("code-execution-2025-08-25")
# Container with skills uses a separate beta header
if container_with_skills_used:
betas.add("skills-2025-10-02")
_is_oauth = api_key and api_key.startswith(ANTHROPIC_OAUTH_TOKEN_PREFIX)
headers = {
"anthropic-version": anthropic_version or "2023-06-01",
"accept": "application/json",
"content-type": "application/json",
}
if _is_oauth:
headers["authorization"] = f"Bearer {api_key}"
headers["anthropic-dangerous-direct-browser-access"] = "true"
betas.add(ANTHROPIC_OAUTH_BETA_HEADER)
else:
headers["x-api-key"] = api_key
if user_anthropic_beta_headers is not None:
betas.update(user_anthropic_beta_headers)
# Don't send any beta headers to Vertex, except web search which is required
if is_vertex_request is True:
# Vertex AI requires web search beta header for web search to work
if web_search_tool_used:
from litellm.types.llms.anthropic import ANTHROPIC_BETA_HEADER_VALUES
headers[
"anthropic-beta"
] = ANTHROPIC_BETA_HEADER_VALUES.WEB_SEARCH_2025_03_05.value
elif len(betas) > 0:
headers["anthropic-beta"] = ",".join(betas)
return headers
def validate_environment(
self,
headers: dict,
model: str,
messages: List[AllMessageValues],
optional_params: dict,
litellm_params: dict,
api_key: Optional[str] = None,
api_base: Optional[str] = None,
) -> Dict:
# Check for Anthropic OAuth token in headers
headers, api_key = optionally_handle_anthropic_oauth(
headers=headers, api_key=api_key
)
if api_key is None:
raise litellm.AuthenticationError(
message="Missing Anthropic API Key - A call is being made to anthropic but no key is set either in the environment variables or via params. Please set `ANTHROPIC_API_KEY` in your environment vars",
llm_provider="anthropic",
model=model,
)
tools = optional_params.get("tools")
prompt_caching_set = self.is_cache_control_set(messages=messages)
computer_tool_used = self.is_computer_tool_used(tools=tools)
mcp_server_used = self.is_mcp_server_used(
mcp_servers=optional_params.get("mcp_servers")
)
pdf_used = self.is_pdf_used(messages=messages)
file_id_used = self.is_file_id_used(messages=messages)
web_search_tool_used = self.is_web_search_tool_used(tools=tools)
tool_search_used = self.is_tool_search_used(tools=tools)
programmatic_tool_calling_used = self.is_programmatic_tool_calling_used(
tools=tools
)
input_examples_used = self.is_input_examples_used(tools=tools)
effort_used = self.is_effort_used(optional_params=optional_params, model=model)
code_execution_tool_used = self.is_code_execution_tool_used(tools=tools)
container_with_skills_used = self.is_container_with_skills_used(
optional_params=optional_params
)
user_anthropic_beta_headers = self._get_user_anthropic_beta_headers(
anthropic_beta_header=headers.get("anthropic-beta")
)
anthropic_headers = self.get_anthropic_headers(
computer_tool_used=computer_tool_used,
prompt_caching_set=prompt_caching_set,
pdf_used=pdf_used,
api_key=api_key,
file_id_used=file_id_used,
web_search_tool_used=web_search_tool_used,
is_vertex_request=optional_params.get("is_vertex_request", False),
user_anthropic_beta_headers=user_anthropic_beta_headers,
mcp_server_used=mcp_server_used,
tool_search_used=tool_search_used,
programmatic_tool_calling_used=programmatic_tool_calling_used,
input_examples_used=input_examples_used,
effort_used=effort_used,
code_execution_tool_used=code_execution_tool_used,
container_with_skills_used=container_with_skills_used,
)
headers = {**headers, **anthropic_headers}
return headers
@staticmethod
def get_api_base(api_base: Optional[str] = None) -> Optional[str]:
from litellm.secret_managers.main import get_secret_str
return (
api_base
or get_secret_str("ANTHROPIC_API_BASE")
or "https://api.anthropic.com"
)
@staticmethod
def get_api_key(api_key: Optional[str] = None) -> Optional[str]:
from litellm.secret_managers.main import get_secret_str
return api_key or get_secret_str("ANTHROPIC_API_KEY")
@staticmethod
def get_base_model(model: Optional[str] = None) -> Optional[str]:
return model.replace("anthropic/", "") if model else None
def get_models(
self, api_key: Optional[str] = None, api_base: Optional[str] = None
) -> List[str]:
api_base = AnthropicModelInfo.get_api_base(api_base)
api_key = AnthropicModelInfo.get_api_key(api_key)
if api_base is None or api_key is None:
raise ValueError(
"ANTHROPIC_API_BASE or ANTHROPIC_API_KEY is not set. Please set the environment variable, to query Anthropic's `/models` endpoint."
)
response = litellm.module_level_client.get(
url=f"{api_base}/v1/models",
headers={"x-api-key": api_key, "anthropic-version": "2023-06-01"},
)
try:
response.raise_for_status()
except httpx.HTTPStatusError:
raise Exception(
f"Failed to fetch models from Anthropic. Status code: {response.status_code}, Response: {response.text}"
)
models = response.json()["data"]
litellm_model_names = []
for model in models:
stripped_model_name = model["id"]
litellm_model_name = "anthropic/" + stripped_model_name
litellm_model_names.append(litellm_model_name)
return litellm_model_names
def get_token_counter(self) -> Optional[BaseTokenCounter]:
"""
Factory method to create an Anthropic token counter.
Returns:
AnthropicTokenCounter instance for this provider.
"""
from litellm.llms.anthropic.count_tokens.token_counter import (
AnthropicTokenCounter,
)
return AnthropicTokenCounter()
def process_anthropic_headers(headers: Union[httpx.Headers, dict]) -> dict:
openai_headers = {}
if "anthropic-ratelimit-requests-limit" in headers:
openai_headers["x-ratelimit-limit-requests"] = headers[
"anthropic-ratelimit-requests-limit"
]
if "anthropic-ratelimit-requests-remaining" in headers:
openai_headers["x-ratelimit-remaining-requests"] = headers[
"anthropic-ratelimit-requests-remaining"
]
if "anthropic-ratelimit-tokens-limit" in headers:
openai_headers["x-ratelimit-limit-tokens"] = headers[
"anthropic-ratelimit-tokens-limit"
]
if "anthropic-ratelimit-tokens-remaining" in headers:
openai_headers["x-ratelimit-remaining-tokens"] = headers[
"anthropic-ratelimit-tokens-remaining"
]
llm_response_headers = {
"{}-{}".format("llm_provider", k): v for k, v in headers.items()
}
additional_headers = {**llm_response_headers, **openai_headers}
return additional_headers

View File

@@ -0,0 +1,5 @@
"""
Anthropic /complete API - uses `llm_http_handler.py` to make httpx requests
Request/Response transformation is handled in `transformation.py`
"""

View File

@@ -0,0 +1,310 @@
"""
Translation logic for anthropic's `/v1/complete` endpoint
Litellm provider slug: `anthropic_text/<model_name>`
"""
import json
import time
from typing import AsyncIterator, Dict, Iterator, List, Optional, Union
import httpx
import litellm
from litellm.constants import DEFAULT_MAX_TOKENS
from litellm.litellm_core_utils.prompt_templates.factory import (
custom_prompt,
prompt_factory,
)
from litellm.llms.base_llm.base_model_iterator import BaseModelResponseIterator
from litellm.llms.base_llm.chat.transformation import (
BaseConfig,
BaseLLMException,
LiteLLMLoggingObj,
)
from litellm.types.llms.openai import AllMessageValues
from litellm.types.utils import (
ChatCompletionToolCallChunk,
ChatCompletionUsageBlock,
GenericStreamingChunk,
ModelResponse,
Usage,
)
class AnthropicTextError(BaseLLMException):
def __init__(self, status_code, message):
self.status_code = status_code
self.message = message
self.request = httpx.Request(
method="POST", url="https://api.anthropic.com/v1/complete"
)
self.response = httpx.Response(status_code=status_code, request=self.request)
super().__init__(
message=self.message,
status_code=self.status_code,
request=self.request,
response=self.response,
) # Call the base class constructor with the parameters it needs
class AnthropicTextConfig(BaseConfig):
"""
Reference: https://docs.anthropic.com/claude/reference/complete_post
to pass metadata to anthropic, it's {"user_id": "any-relevant-information"}
"""
max_tokens_to_sample: Optional[
int
] = litellm.max_tokens # anthropic requires a default
stop_sequences: Optional[list] = None
temperature: Optional[int] = None
top_p: Optional[int] = None
top_k: Optional[int] = None
metadata: Optional[dict] = None
def __init__(
self,
max_tokens_to_sample: Optional[
int
] = DEFAULT_MAX_TOKENS, # anthropic requires a default
stop_sequences: Optional[list] = None,
temperature: Optional[int] = None,
top_p: Optional[int] = None,
top_k: Optional[int] = None,
metadata: Optional[dict] = None,
) -> None:
locals_ = locals().copy()
for key, value in locals_.items():
if key != "self" and value is not None:
setattr(self.__class__, key, value)
# makes headers for API call
def validate_environment(
self,
headers: dict,
model: str,
messages: List[AllMessageValues],
optional_params: dict,
litellm_params: dict,
api_key: Optional[str] = None,
api_base: Optional[str] = None,
) -> dict:
if api_key is None:
raise ValueError(
"Missing Anthropic API Key - A call is being made to anthropic but no key is set either in the environment variables or via params"
)
_headers = {
"accept": "application/json",
"anthropic-version": "2023-06-01",
"content-type": "application/json",
"x-api-key": api_key,
}
headers.update(_headers)
return headers
def transform_request(
self,
model: str,
messages: List[AllMessageValues],
optional_params: dict,
litellm_params: dict,
headers: dict,
) -> dict:
prompt = self._get_anthropic_text_prompt_from_messages(
messages=messages, model=model
)
## Load Config
config = litellm.AnthropicTextConfig.get_config()
for k, v in config.items():
if (
k not in optional_params
): # completion(top_k=3) > anthropic_config(top_k=3) <- allows for dynamic variables to be passed in
optional_params[k] = v
data = {
"model": model,
"prompt": prompt,
**optional_params,
}
return data
def get_supported_openai_params(self, model: str):
"""
Anthropic /complete API Ref: https://docs.anthropic.com/en/api/complete
"""
return [
"stream",
"max_tokens",
"max_completion_tokens",
"stop",
"temperature",
"top_p",
"extra_headers",
"user",
]
def map_openai_params(
self,
non_default_params: dict,
optional_params: dict,
model: str,
drop_params: bool,
) -> dict:
"""
Follows the same logic as the AnthropicConfig.map_openai_params method (which is the Anthropic /messages API)
Note: the only difference is in the get supported openai params method between the AnthropicConfig and AnthropicTextConfig
API Ref: https://docs.anthropic.com/en/api/complete
"""
for param, value in non_default_params.items():
if param == "max_tokens":
optional_params["max_tokens_to_sample"] = value
if param == "max_completion_tokens":
optional_params["max_tokens_to_sample"] = value
if param == "stream" and value is True:
optional_params["stream"] = value
if param == "stop" and (isinstance(value, str) or isinstance(value, list)):
_value = litellm.AnthropicConfig()._map_stop_sequences(value)
if _value is not None:
optional_params["stop_sequences"] = _value
if param == "temperature":
optional_params["temperature"] = value
if param == "top_p":
optional_params["top_p"] = value
if param == "user":
optional_params["metadata"] = {"user_id": value}
return optional_params
def transform_response(
self,
model: str,
raw_response: httpx.Response,
model_response: ModelResponse,
logging_obj: LiteLLMLoggingObj,
request_data: dict,
messages: List[AllMessageValues],
optional_params: dict,
litellm_params: dict,
encoding: str,
api_key: Optional[str] = None,
json_mode: Optional[bool] = None,
) -> ModelResponse:
try:
completion_response = raw_response.json()
except Exception:
raise AnthropicTextError(
message=raw_response.text, status_code=raw_response.status_code
)
prompt = self._get_anthropic_text_prompt_from_messages(
messages=messages, model=model
)
if "error" in completion_response:
raise AnthropicTextError(
message=str(completion_response["error"]),
status_code=raw_response.status_code,
)
else:
if len(completion_response["completion"]) > 0:
model_response.choices[0].message.content = completion_response[ # type: ignore
"completion"
]
model_response.choices[0].finish_reason = completion_response["stop_reason"]
## CALCULATING USAGE
prompt_tokens = len(
encoding.encode(prompt)
) ##[TODO] use the anthropic tokenizer here
completion_tokens = len(
encoding.encode(model_response["choices"][0]["message"].get("content", ""))
) ##[TODO] use the anthropic tokenizer here
model_response.created = int(time.time())
model_response.model = model
usage = Usage(
prompt_tokens=prompt_tokens,
completion_tokens=completion_tokens,
total_tokens=prompt_tokens + completion_tokens,
)
setattr(model_response, "usage", usage)
return model_response
def get_error_class(
self, error_message: str, status_code: int, headers: Union[Dict, httpx.Headers]
) -> BaseLLMException:
return AnthropicTextError(
status_code=status_code,
message=error_message,
)
@staticmethod
def _is_anthropic_text_model(model: str) -> bool:
return model == "claude-2" or model == "claude-instant-1"
def _get_anthropic_text_prompt_from_messages(
self, messages: List[AllMessageValues], model: str
) -> str:
custom_prompt_dict = litellm.custom_prompt_dict
if model in custom_prompt_dict:
# check if the model has a registered custom prompt
model_prompt_details = custom_prompt_dict[model]
prompt = custom_prompt(
role_dict=model_prompt_details["roles"],
initial_prompt_value=model_prompt_details["initial_prompt_value"],
final_prompt_value=model_prompt_details["final_prompt_value"],
messages=messages,
)
else:
prompt = prompt_factory(
model=model, messages=messages, custom_llm_provider="anthropic"
)
return str(prompt)
def get_model_response_iterator(
self,
streaming_response: Union[Iterator[str], AsyncIterator[str], ModelResponse],
sync_stream: bool,
json_mode: Optional[bool] = False,
):
return AnthropicTextCompletionResponseIterator(
streaming_response=streaming_response,
sync_stream=sync_stream,
json_mode=json_mode,
)
class AnthropicTextCompletionResponseIterator(BaseModelResponseIterator):
def chunk_parser(self, chunk: dict) -> GenericStreamingChunk:
try:
text = ""
tool_use: Optional[ChatCompletionToolCallChunk] = None
is_finished = False
finish_reason = ""
usage: Optional[ChatCompletionUsageBlock] = None
provider_specific_fields = None
index = int(chunk.get("index", 0))
_chunk_text = chunk.get("completion", None)
if _chunk_text is not None and isinstance(_chunk_text, str):
text = _chunk_text
finish_reason = chunk.get("stop_reason") or ""
if finish_reason is not None:
is_finished = True
returned_chunk = GenericStreamingChunk(
text=text,
tool_use=tool_use,
is_finished=is_finished,
finish_reason=finish_reason,
usage=usage,
index=index,
provider_specific_fields=provider_specific_fields,
)
return returned_chunk
except json.JSONDecodeError:
raise ValueError(f"Failed to decode JSON from chunk: {chunk}")

View File

@@ -0,0 +1,132 @@
"""
Helper util for handling anthropic-specific cost calculation
- e.g.: prompt caching
"""
from typing import TYPE_CHECKING, Optional, Tuple
from litellm.litellm_core_utils.llm_cost_calc.utils import (
_get_token_base_cost,
_parse_prompt_tokens_details,
calculate_cache_writing_cost,
generic_cost_per_token,
)
if TYPE_CHECKING:
from litellm.types.utils import ModelInfo, Usage
import litellm
def _compute_cache_only_cost(model_info: "ModelInfo", usage: "Usage") -> float:
"""
Return only the cache-related portion of the prompt cost (cache read + cache write).
These costs must NOT be scaled by geo/speed multipliers because the old
explicit ``fast/`` model entries carried unchanged cache rates while
multiplying only the regular input/output token costs.
"""
if usage.prompt_tokens_details is None:
return 0.0
prompt_tokens_details = _parse_prompt_tokens_details(usage)
(
_,
_,
cache_creation_cost,
cache_creation_cost_above_1hr,
cache_read_cost,
) = _get_token_base_cost(model_info=model_info, usage=usage)
cache_cost = float(prompt_tokens_details["cache_hit_tokens"]) * cache_read_cost
if (
prompt_tokens_details["cache_creation_tokens"]
or prompt_tokens_details["cache_creation_token_details"] is not None
):
cache_cost += calculate_cache_writing_cost(
cache_creation_tokens=prompt_tokens_details["cache_creation_tokens"],
cache_creation_token_details=prompt_tokens_details[
"cache_creation_token_details"
],
cache_creation_cost_above_1hr=cache_creation_cost_above_1hr,
cache_creation_cost=cache_creation_cost,
)
return cache_cost
def cost_per_token(model: str, usage: "Usage") -> Tuple[float, float]:
"""
Calculates the cost per token for a given model, prompt tokens, and completion tokens.
Input:
- model: str, the model name without provider prefix
- usage: LiteLLM Usage block, containing anthropic caching information
Returns:
Tuple[float, float] - prompt_cost_in_usd, completion_cost_in_usd
"""
prompt_cost, completion_cost = generic_cost_per_token(
model=model, usage=usage, custom_llm_provider="anthropic"
)
# Apply provider_specific_entry multipliers for geo/speed routing
try:
model_info = litellm.get_model_info(
model=model, custom_llm_provider="anthropic"
)
provider_specific_entry: dict = model_info.get("provider_specific_entry") or {}
multiplier = 1.0
if (
hasattr(usage, "inference_geo")
and usage.inference_geo
and usage.inference_geo.lower() not in ["global", "not_available"]
):
multiplier *= provider_specific_entry.get(usage.inference_geo.lower(), 1.0)
if hasattr(usage, "speed") and usage.speed == "fast":
multiplier *= provider_specific_entry.get("fast", 1.0)
if multiplier != 1.0:
cache_cost = _compute_cache_only_cost(model_info=model_info, usage=usage)
prompt_cost = (prompt_cost - cache_cost) * multiplier + cache_cost
completion_cost *= multiplier
except Exception:
pass
return prompt_cost, completion_cost
def get_cost_for_anthropic_web_search(
model_info: Optional["ModelInfo"] = None,
usage: Optional["Usage"] = None,
) -> float:
"""
Get the cost of using a web search tool for Anthropic.
"""
from litellm.types.utils import SearchContextCostPerQuery
## Check if web search requests are in the usage object
if model_info is None:
return 0.0
if (
usage is None
or usage.server_tool_use is None
or usage.server_tool_use.web_search_requests is None
):
return 0.0
## Get the cost per web search request
search_context_pricing: SearchContextCostPerQuery = (
model_info.get("search_context_cost_per_query") or SearchContextCostPerQuery()
)
cost_per_web_search_request = search_context_pricing.get(
"search_context_size_medium", 0.0
)
if cost_per_web_search_request is None or cost_per_web_search_request == 0.0:
return 0.0
## Calculate the total cost
total_cost = cost_per_web_search_request * usage.server_tool_use.web_search_requests
return total_cost

View File

@@ -0,0 +1,15 @@
"""
Anthropic CountTokens API implementation.
"""
from litellm.llms.anthropic.count_tokens.handler import AnthropicCountTokensHandler
from litellm.llms.anthropic.count_tokens.token_counter import AnthropicTokenCounter
from litellm.llms.anthropic.count_tokens.transformation import (
AnthropicCountTokensConfig,
)
__all__ = [
"AnthropicCountTokensHandler",
"AnthropicCountTokensConfig",
"AnthropicTokenCounter",
]

View File

@@ -0,0 +1,128 @@
"""
Anthropic CountTokens API handler.
Uses httpx for HTTP requests instead of the Anthropic SDK.
"""
from typing import Any, Dict, List, Optional, Union
import httpx
import litellm
from litellm._logging import verbose_logger
from litellm.llms.anthropic.common_utils import AnthropicError
from litellm.llms.anthropic.count_tokens.transformation import (
AnthropicCountTokensConfig,
)
from litellm.llms.custom_httpx.http_handler import get_async_httpx_client
class AnthropicCountTokensHandler(AnthropicCountTokensConfig):
"""
Handler for Anthropic CountTokens API requests.
Uses httpx for HTTP requests, following the same pattern as BedrockCountTokensHandler.
"""
async def handle_count_tokens_request(
self,
model: str,
messages: List[Dict[str, Any]],
api_key: str,
api_base: Optional[str] = None,
timeout: Optional[Union[float, httpx.Timeout]] = None,
tools: Optional[List[Dict[str, Any]]] = None,
system: Optional[Any] = None,
) -> Dict[str, Any]:
"""
Handle a CountTokens request using httpx.
Args:
model: The model identifier (e.g., "claude-3-5-sonnet-20241022")
messages: The messages to count tokens for
api_key: The Anthropic API key
api_base: Optional custom API base URL
timeout: Optional timeout for the request (defaults to litellm.request_timeout)
Returns:
Dictionary containing token count response
Raises:
AnthropicError: If the API request fails
"""
try:
# Validate the request
self.validate_request(model, messages)
verbose_logger.debug(
f"Processing Anthropic CountTokens request for model: {model}"
)
# Transform request to Anthropic format
request_body = self.transform_request_to_count_tokens(
model=model,
messages=messages,
tools=tools,
system=system,
)
verbose_logger.debug(f"Transformed request: {request_body}")
# Get endpoint URL
endpoint_url = api_base or self.get_anthropic_count_tokens_endpoint()
verbose_logger.debug(f"Making request to: {endpoint_url}")
# Get required headers
headers = self.get_required_headers(api_key)
# Use LiteLLM's async httpx client
async_client = get_async_httpx_client(
llm_provider=litellm.LlmProviders.ANTHROPIC
)
# Use provided timeout or fall back to litellm.request_timeout
request_timeout = (
timeout if timeout is not None else litellm.request_timeout
)
response = await async_client.post(
endpoint_url,
headers=headers,
json=request_body,
timeout=request_timeout,
)
verbose_logger.debug(f"Response status: {response.status_code}")
if response.status_code != 200:
error_text = response.text
verbose_logger.error(f"Anthropic API error: {error_text}")
raise AnthropicError(
status_code=response.status_code,
message=error_text,
)
anthropic_response = response.json()
verbose_logger.debug(f"Anthropic response: {anthropic_response}")
# Return Anthropic response directly - no transformation needed
return anthropic_response
except AnthropicError:
# Re-raise Anthropic exceptions as-is
raise
except httpx.HTTPStatusError as e:
# HTTP errors - preserve the actual status code
verbose_logger.error(f"HTTP error in CountTokens handler: {str(e)}")
raise AnthropicError(
status_code=e.response.status_code,
message=e.response.text,
)
except Exception as e:
verbose_logger.error(f"Error in CountTokens handler: {str(e)}")
raise AnthropicError(
status_code=500,
message=f"CountTokens processing error: {str(e)}",
)

View File

@@ -0,0 +1,108 @@
"""
Anthropic Token Counter implementation using the CountTokens API.
"""
import os
from typing import Any, Dict, List, Optional
from litellm._logging import verbose_logger
from litellm.llms.anthropic.count_tokens.handler import AnthropicCountTokensHandler
from litellm.llms.base_llm.base_utils import BaseTokenCounter
from litellm.types.utils import LlmProviders, TokenCountResponse
# Global handler instance - reuse across all token counting requests
anthropic_count_tokens_handler = AnthropicCountTokensHandler()
class AnthropicTokenCounter(BaseTokenCounter):
"""Token counter implementation for Anthropic provider using the CountTokens API."""
def should_use_token_counting_api(
self,
custom_llm_provider: Optional[str] = None,
) -> bool:
return custom_llm_provider == LlmProviders.ANTHROPIC.value
async def count_tokens(
self,
model_to_use: str,
messages: Optional[List[Dict[str, Any]]],
contents: Optional[List[Dict[str, Any]]],
deployment: Optional[Dict[str, Any]] = None,
request_model: str = "",
tools: Optional[List[Dict[str, Any]]] = None,
system: Optional[Any] = None,
) -> Optional[TokenCountResponse]:
"""
Count tokens using Anthropic's CountTokens API.
Args:
model_to_use: The model identifier
messages: The messages to count tokens for
contents: Alternative content format (not used for Anthropic)
deployment: Deployment configuration containing litellm_params
request_model: The original request model name
Returns:
TokenCountResponse with token count, or None if counting fails
"""
from litellm.llms.anthropic.common_utils import AnthropicError
if not messages:
return None
deployment = deployment or {}
litellm_params = deployment.get("litellm_params", {})
# Get Anthropic API key from deployment config or environment
api_key = litellm_params.get("api_key")
if not api_key:
api_key = os.getenv("ANTHROPIC_API_KEY")
if not api_key:
verbose_logger.warning("No Anthropic API key found for token counting")
return None
try:
result = await anthropic_count_tokens_handler.handle_count_tokens_request(
model=model_to_use,
messages=messages,
api_key=api_key,
tools=tools,
system=system,
)
if result is not None:
return TokenCountResponse(
total_tokens=result.get("input_tokens", 0),
request_model=request_model,
model_used=model_to_use,
tokenizer_type="anthropic_api",
original_response=result,
)
except AnthropicError as e:
verbose_logger.warning(
f"Anthropic CountTokens API error: status={e.status_code}, message={e.message}"
)
return TokenCountResponse(
total_tokens=0,
request_model=request_model,
model_used=model_to_use,
tokenizer_type="anthropic_api",
error=True,
error_message=e.message,
status_code=e.status_code,
)
except Exception as e:
verbose_logger.warning(f"Error calling Anthropic CountTokens API: {e}")
return TokenCountResponse(
total_tokens=0,
request_model=request_model,
model_used=model_to_use,
tokenizer_type="anthropic_api",
error=True,
error_message=str(e),
status_code=500,
)
return None

View File

@@ -0,0 +1,107 @@
"""
Anthropic CountTokens API transformation logic.
This module handles the transformation of requests to Anthropic's CountTokens API format.
"""
from typing import Any, Dict, List, Optional
from litellm.constants import ANTHROPIC_TOKEN_COUNTING_BETA_VERSION
class AnthropicCountTokensConfig:
"""
Configuration and transformation logic for Anthropic CountTokens API.
Anthropic CountTokens API Specification:
- Endpoint: POST https://api.anthropic.com/v1/messages/count_tokens
- Beta header required: anthropic-beta: token-counting-2024-11-01
- Response: {"input_tokens": <number>}
"""
def get_anthropic_count_tokens_endpoint(self) -> str:
"""
Get the Anthropic CountTokens API endpoint.
Returns:
The endpoint URL for the CountTokens API
"""
return "https://api.anthropic.com/v1/messages/count_tokens"
def transform_request_to_count_tokens(
self,
model: str,
messages: List[Dict[str, Any]],
tools: Optional[List[Dict[str, Any]]] = None,
system: Optional[Any] = None,
) -> Dict[str, Any]:
"""
Transform request to Anthropic CountTokens format.
Includes optional system and tools fields for accurate token counting.
"""
request: Dict[str, Any] = {
"model": model,
"messages": messages,
}
if system is not None:
request["system"] = system
if tools is not None:
request["tools"] = tools
return request
def get_required_headers(self, api_key: str) -> Dict[str, str]:
"""
Get the required headers for the CountTokens API.
Args:
api_key: The Anthropic API key
Returns:
Dictionary of required headers
"""
from litellm.llms.anthropic.common_utils import (
optionally_handle_anthropic_oauth,
)
headers: Dict[str, str] = {
"Content-Type": "application/json",
"x-api-key": api_key,
"anthropic-version": "2023-06-01",
"anthropic-beta": ANTHROPIC_TOKEN_COUNTING_BETA_VERSION,
}
headers, _ = optionally_handle_anthropic_oauth(headers=headers, api_key=api_key)
return headers
def validate_request(self, model: str, messages: List[Dict[str, Any]]) -> None:
"""
Validate the incoming count tokens request.
Args:
model: The model name
messages: The messages to count tokens for
Raises:
ValueError: If the request is invalid
"""
if not model:
raise ValueError("model parameter is required")
if not messages:
raise ValueError("messages parameter is required")
if not isinstance(messages, list):
raise ValueError("messages must be a list")
for i, message in enumerate(messages):
if not isinstance(message, dict):
raise ValueError(f"Message {i} must be a dictionary")
if "role" not in message:
raise ValueError(f"Message {i} must have a 'role' field")
if "content" not in message:
raise ValueError(f"Message {i} must have a 'content' field")

View File

@@ -0,0 +1,3 @@
from .transformation import LiteLLMAnthropicMessagesAdapter
__all__ = ["LiteLLMAnthropicMessagesAdapter"]

View File

@@ -0,0 +1,345 @@
from typing import (
TYPE_CHECKING,
Any,
AsyncIterator,
Coroutine,
Dict,
List,
Optional,
Tuple,
Union,
cast,
)
import litellm
from litellm.llms.anthropic.experimental_pass_through.adapters.transformation import (
AnthropicAdapter,
)
from litellm.types.llms.anthropic_messages.anthropic_response import (
AnthropicMessagesResponse,
)
from litellm.types.utils import ModelResponse
from litellm.utils import get_model_info
if TYPE_CHECKING:
pass
########################################################
# init adapter
ANTHROPIC_ADAPTER = AnthropicAdapter()
########################################################
class LiteLLMMessagesToCompletionTransformationHandler:
@staticmethod
def _route_openai_thinking_to_responses_api_if_needed(
completion_kwargs: Dict[str, Any],
*,
thinking: Optional[Dict[str, Any]],
) -> None:
"""
When users call `litellm.anthropic.messages.*` with a non-Anthropic model and
`thinking={"type": "enabled", ...}`, LiteLLM converts this into OpenAI
`reasoning_effort`.
For OpenAI models, Chat Completions typically does not return reasoning text
(only token accounting). To return a thinking-like content block in the
Anthropic response format, we route the request through OpenAI's Responses API
and request a reasoning summary.
"""
custom_llm_provider = completion_kwargs.get("custom_llm_provider")
if custom_llm_provider is None:
try:
_, inferred_provider, _, _ = litellm.utils.get_llm_provider(
model=cast(str, completion_kwargs.get("model"))
)
custom_llm_provider = inferred_provider
except Exception:
custom_llm_provider = None
if custom_llm_provider != "openai":
return
if not isinstance(thinking, dict) or thinking.get("type") != "enabled":
return
model = completion_kwargs.get("model")
try:
model_info = get_model_info(
model=cast(str, model), custom_llm_provider=custom_llm_provider
)
if model_info and model_info.get("supports_reasoning") is False:
# Model doesn't support reasoning/responses API, don't route
return
except Exception:
pass
if isinstance(model, str) and model and not model.startswith("responses/"):
# Prefix model with "responses/" to route to OpenAI Responses API
completion_kwargs["model"] = f"responses/{model}"
reasoning_effort = completion_kwargs.get("reasoning_effort")
if isinstance(reasoning_effort, str) and reasoning_effort:
completion_kwargs["reasoning_effort"] = {
"effort": reasoning_effort,
"summary": "detailed",
}
elif isinstance(reasoning_effort, dict):
if (
"summary" not in reasoning_effort
and "generate_summary" not in reasoning_effort
):
updated_reasoning_effort = dict(reasoning_effort)
updated_reasoning_effort["summary"] = "detailed"
completion_kwargs["reasoning_effort"] = updated_reasoning_effort
@staticmethod
def _prepare_completion_kwargs(
*,
max_tokens: int,
messages: List[Dict],
model: str,
metadata: Optional[Dict] = None,
stop_sequences: Optional[List[str]] = None,
stream: Optional[bool] = False,
system: Optional[str] = None,
temperature: Optional[float] = None,
thinking: Optional[Dict] = None,
tool_choice: Optional[Dict] = None,
tools: Optional[List[Dict]] = None,
top_k: Optional[int] = None,
top_p: Optional[float] = None,
output_format: Optional[Dict] = None,
extra_kwargs: Optional[Dict[str, Any]] = None,
) -> Tuple[Dict[str, Any], Dict[str, str]]:
"""Prepare kwargs for litellm.completion/acompletion.
Returns:
Tuple of (completion_kwargs, tool_name_mapping)
- tool_name_mapping maps truncated tool names back to original names
for tools that exceeded OpenAI's 64-char limit
"""
from litellm.litellm_core_utils.litellm_logging import (
Logging as LiteLLMLoggingObject,
)
request_data = {
"model": model,
"messages": messages,
"max_tokens": max_tokens,
}
if metadata:
request_data["metadata"] = metadata
if stop_sequences:
request_data["stop_sequences"] = stop_sequences
if system:
request_data["system"] = system
if temperature is not None:
request_data["temperature"] = temperature
if thinking:
request_data["thinking"] = thinking
if tool_choice:
request_data["tool_choice"] = tool_choice
if tools:
request_data["tools"] = tools
if top_k is not None:
request_data["top_k"] = top_k
if top_p is not None:
request_data["top_p"] = top_p
if output_format:
request_data["output_format"] = output_format
(
openai_request,
tool_name_mapping,
) = ANTHROPIC_ADAPTER.translate_completion_input_params_with_tool_mapping(
request_data
)
if openai_request is None:
raise ValueError("Failed to translate request to OpenAI format")
completion_kwargs: Dict[str, Any] = dict(openai_request)
if stream:
completion_kwargs["stream"] = stream
completion_kwargs["stream_options"] = {
"include_usage": True,
}
excluded_keys = {"anthropic_messages"}
extra_kwargs = extra_kwargs or {}
for key, value in extra_kwargs.items():
if (
key == "litellm_logging_obj"
and value is not None
and isinstance(value, LiteLLMLoggingObject)
):
from litellm.types.utils import CallTypes
setattr(value, "call_type", CallTypes.completion.value)
setattr(
value, "stream_options", completion_kwargs.get("stream_options")
)
if (
key not in excluded_keys
and key not in completion_kwargs
and value is not None
):
completion_kwargs[key] = value
LiteLLMMessagesToCompletionTransformationHandler._route_openai_thinking_to_responses_api_if_needed(
completion_kwargs,
thinking=thinking,
)
return completion_kwargs, tool_name_mapping
@staticmethod
async def async_anthropic_messages_handler(
max_tokens: int,
messages: List[Dict],
model: str,
metadata: Optional[Dict] = None,
stop_sequences: Optional[List[str]] = None,
stream: Optional[bool] = False,
system: Optional[str] = None,
temperature: Optional[float] = None,
thinking: Optional[Dict] = None,
tool_choice: Optional[Dict] = None,
tools: Optional[List[Dict]] = None,
top_k: Optional[int] = None,
top_p: Optional[float] = None,
output_format: Optional[Dict] = None,
**kwargs,
) -> Union[AnthropicMessagesResponse, AsyncIterator]:
"""Handle non-Anthropic models asynchronously using the adapter"""
(
completion_kwargs,
tool_name_mapping,
) = LiteLLMMessagesToCompletionTransformationHandler._prepare_completion_kwargs(
max_tokens=max_tokens,
messages=messages,
model=model,
metadata=metadata,
stop_sequences=stop_sequences,
stream=stream,
system=system,
temperature=temperature,
thinking=thinking,
tool_choice=tool_choice,
tools=tools,
top_k=top_k,
top_p=top_p,
output_format=output_format,
extra_kwargs=kwargs,
)
completion_response = await litellm.acompletion(**completion_kwargs)
if stream:
transformed_stream = (
ANTHROPIC_ADAPTER.translate_completion_output_params_streaming(
completion_response,
model=model,
tool_name_mapping=tool_name_mapping,
)
)
if transformed_stream is not None:
return transformed_stream
raise ValueError("Failed to transform streaming response")
else:
anthropic_response = ANTHROPIC_ADAPTER.translate_completion_output_params(
cast(ModelResponse, completion_response),
tool_name_mapping=tool_name_mapping,
)
if anthropic_response is not None:
return anthropic_response
raise ValueError("Failed to transform response to Anthropic format")
@staticmethod
def anthropic_messages_handler(
max_tokens: int,
messages: List[Dict],
model: str,
metadata: Optional[Dict] = None,
stop_sequences: Optional[List[str]] = None,
stream: Optional[bool] = False,
system: Optional[str] = None,
temperature: Optional[float] = None,
thinking: Optional[Dict] = None,
tool_choice: Optional[Dict] = None,
tools: Optional[List[Dict]] = None,
top_k: Optional[int] = None,
top_p: Optional[float] = None,
output_format: Optional[Dict] = None,
_is_async: bool = False,
**kwargs,
) -> Union[
AnthropicMessagesResponse,
AsyncIterator[Any],
Coroutine[Any, Any, Union[AnthropicMessagesResponse, AsyncIterator[Any]]],
]:
"""Handle non-Anthropic models using the adapter."""
if _is_async is True:
return LiteLLMMessagesToCompletionTransformationHandler.async_anthropic_messages_handler(
max_tokens=max_tokens,
messages=messages,
model=model,
metadata=metadata,
stop_sequences=stop_sequences,
stream=stream,
system=system,
temperature=temperature,
thinking=thinking,
tool_choice=tool_choice,
tools=tools,
top_k=top_k,
top_p=top_p,
output_format=output_format,
**kwargs,
)
(
completion_kwargs,
tool_name_mapping,
) = LiteLLMMessagesToCompletionTransformationHandler._prepare_completion_kwargs(
max_tokens=max_tokens,
messages=messages,
model=model,
metadata=metadata,
stop_sequences=stop_sequences,
stream=stream,
system=system,
temperature=temperature,
thinking=thinking,
tool_choice=tool_choice,
tools=tools,
top_k=top_k,
top_p=top_p,
output_format=output_format,
extra_kwargs=kwargs,
)
completion_response = litellm.completion(**completion_kwargs)
if stream:
transformed_stream = (
ANTHROPIC_ADAPTER.translate_completion_output_params_streaming(
completion_response,
model=model,
tool_name_mapping=tool_name_mapping,
)
)
if transformed_stream is not None:
return transformed_stream
raise ValueError("Failed to transform streaming response")
else:
anthropic_response = ANTHROPIC_ADAPTER.translate_completion_output_params(
cast(ModelResponse, completion_response),
tool_name_mapping=tool_name_mapping,
)
if anthropic_response is not None:
return anthropic_response
raise ValueError("Failed to transform response to Anthropic format")

View File

@@ -0,0 +1,488 @@
# What is this?
## Translates OpenAI call to Anthropic `/v1/messages` format
import json
import traceback
from collections import deque
from typing import TYPE_CHECKING, Any, AsyncIterator, Dict, Iterator, Literal, Optional
from litellm import verbose_logger
from litellm._uuid import uuid
from litellm.types.llms.anthropic import UsageDelta
from litellm.types.utils import AdapterCompletionStreamWrapper
if TYPE_CHECKING:
from litellm.types.utils import ModelResponseStream
class AnthropicStreamWrapper(AdapterCompletionStreamWrapper):
"""
- first chunk return 'message_start'
- content block must be started and stopped
- finish_reason must map exactly to anthropic reason, else anthropic client won't be able to parse it.
"""
from litellm.types.llms.anthropic import (
ContentBlockContentBlockDict,
ContentBlockStart,
ContentBlockStartText,
TextBlock,
)
sent_first_chunk: bool = False
sent_content_block_start: bool = False
sent_content_block_finish: bool = False
current_content_block_type: Literal["text", "tool_use", "thinking"] = "text"
sent_last_message: bool = False
holding_chunk: Optional[Any] = None
holding_stop_reason_chunk: Optional[Any] = None
queued_usage_chunk: bool = False
current_content_block_index: int = 0
current_content_block_start: ContentBlockContentBlockDict = TextBlock(
type="text",
text="",
)
chunk_queue: deque = deque() # Queue for buffering multiple chunks
def __init__(
self,
completion_stream: Any,
model: str,
tool_name_mapping: Optional[Dict[str, str]] = None,
):
super().__init__(completion_stream)
self.model = model
# Mapping of truncated tool names to original names (for OpenAI's 64-char limit)
self.tool_name_mapping = tool_name_mapping or {}
def _create_initial_usage_delta(self) -> UsageDelta:
"""
Create the initial UsageDelta for the message_start event.
Initializes cache token fields (cache_creation_input_tokens, cache_read_input_tokens)
to 0 to indicate to clients (like Claude Code) that prompt caching is supported.
The actual cache token values will be provided in the message_delta event at the
end of the stream, since Bedrock Converse API only returns usage data in the final
response chunk.
Returns:
UsageDelta with all token counts initialized to 0.
"""
return UsageDelta(
input_tokens=0,
output_tokens=0,
cache_creation_input_tokens=0,
cache_read_input_tokens=0,
)
def __next__(self):
from .transformation import LiteLLMAnthropicMessagesAdapter
try:
# Always return queued chunks first
if self.chunk_queue:
return self.chunk_queue.popleft()
# Queue initial chunks if not sent yet
if self.sent_first_chunk is False:
self.sent_first_chunk = True
self.chunk_queue.append(
{
"type": "message_start",
"message": {
"id": "msg_{}".format(uuid.uuid4()),
"type": "message",
"role": "assistant",
"content": [],
"model": self.model,
"stop_reason": None,
"stop_sequence": None,
"usage": self._create_initial_usage_delta(),
},
}
)
return self.chunk_queue.popleft()
if self.sent_content_block_start is False:
self.sent_content_block_start = True
self.chunk_queue.append(
{
"type": "content_block_start",
"index": self.current_content_block_index,
"content_block": {"type": "text", "text": ""},
}
)
return self.chunk_queue.popleft()
for chunk in self.completion_stream:
if chunk == "None" or chunk is None:
raise Exception
should_start_new_block = self._should_start_new_content_block(chunk)
if should_start_new_block:
self._increment_content_block_index()
processed_chunk = LiteLLMAnthropicMessagesAdapter().translate_streaming_openai_response_to_anthropic(
response=chunk,
current_content_block_index=self.current_content_block_index,
)
if should_start_new_block and not self.sent_content_block_finish:
# Queue the sequence: content_block_stop -> content_block_start
# The trigger chunk itself is not emitted as a delta since the
# content_block_start already carries the relevant information.
self.chunk_queue.append(
{
"type": "content_block_stop",
"index": max(self.current_content_block_index - 1, 0),
}
)
self.chunk_queue.append(
{
"type": "content_block_start",
"index": self.current_content_block_index,
"content_block": self.current_content_block_start,
}
)
self.sent_content_block_finish = False
return self.chunk_queue.popleft()
if (
processed_chunk["type"] == "message_delta"
and self.sent_content_block_finish is False
):
# Queue both the content_block_stop and the message_delta
self.chunk_queue.append(
{
"type": "content_block_stop",
"index": self.current_content_block_index,
}
)
self.sent_content_block_finish = True
self.chunk_queue.append(processed_chunk)
return self.chunk_queue.popleft()
elif self.holding_chunk is not None:
self.chunk_queue.append(self.holding_chunk)
self.chunk_queue.append(processed_chunk)
self.holding_chunk = None
return self.chunk_queue.popleft()
else:
self.chunk_queue.append(processed_chunk)
return self.chunk_queue.popleft()
# Handle any remaining held chunks after stream ends
if self.holding_chunk is not None:
self.chunk_queue.append(self.holding_chunk)
self.holding_chunk = None
if not self.sent_last_message:
self.sent_last_message = True
self.chunk_queue.append({"type": "message_stop"})
if self.chunk_queue:
return self.chunk_queue.popleft()
raise StopIteration
except StopIteration:
if self.chunk_queue:
return self.chunk_queue.popleft()
if self.sent_last_message is False:
self.sent_last_message = True
return {"type": "message_stop"}
raise StopIteration
except Exception as e:
verbose_logger.error(
"Anthropic Adapter - {}\n{}".format(e, traceback.format_exc())
)
raise StopAsyncIteration
async def __anext__(self): # noqa: PLR0915
from .transformation import LiteLLMAnthropicMessagesAdapter
try:
# Always return queued chunks first
if self.chunk_queue:
return self.chunk_queue.popleft()
# Queue initial chunks if not sent yet
if self.sent_first_chunk is False:
self.sent_first_chunk = True
self.chunk_queue.append(
{
"type": "message_start",
"message": {
"id": "msg_{}".format(uuid.uuid4()),
"type": "message",
"role": "assistant",
"content": [],
"model": self.model,
"stop_reason": None,
"stop_sequence": None,
"usage": self._create_initial_usage_delta(),
},
}
)
return self.chunk_queue.popleft()
if self.sent_content_block_start is False:
self.sent_content_block_start = True
self.chunk_queue.append(
{
"type": "content_block_start",
"index": self.current_content_block_index,
"content_block": {"type": "text", "text": ""},
}
)
return self.chunk_queue.popleft()
async for chunk in self.completion_stream:
if chunk == "None" or chunk is None:
raise Exception
# Check if we need to start a new content block
should_start_new_block = self._should_start_new_content_block(chunk)
if should_start_new_block:
self._increment_content_block_index()
processed_chunk = LiteLLMAnthropicMessagesAdapter().translate_streaming_openai_response_to_anthropic(
response=chunk,
current_content_block_index=self.current_content_block_index,
)
# Check if this is a usage chunk and we have a held stop_reason chunk
if (
self.holding_stop_reason_chunk is not None
and getattr(chunk, "usage", None) is not None
):
# Merge usage into the held stop_reason chunk
merged_chunk = self.holding_stop_reason_chunk.copy()
if "delta" not in merged_chunk:
merged_chunk["delta"] = {}
# Add usage to the held chunk
uncached_input_tokens = chunk.usage.prompt_tokens or 0
if (
hasattr(chunk.usage, "prompt_tokens_details")
and chunk.usage.prompt_tokens_details
):
cached_tokens = (
getattr(
chunk.usage.prompt_tokens_details, "cached_tokens", 0
)
or 0
)
uncached_input_tokens -= cached_tokens
usage_dict: UsageDelta = {
"input_tokens": uncached_input_tokens,
"output_tokens": chunk.usage.completion_tokens or 0,
}
# Add cache tokens if available (for prompt caching support)
if (
hasattr(chunk.usage, "_cache_creation_input_tokens")
and chunk.usage._cache_creation_input_tokens > 0
):
usage_dict[
"cache_creation_input_tokens"
] = chunk.usage._cache_creation_input_tokens
if (
hasattr(chunk.usage, "_cache_read_input_tokens")
and chunk.usage._cache_read_input_tokens > 0
):
usage_dict[
"cache_read_input_tokens"
] = chunk.usage._cache_read_input_tokens
merged_chunk["usage"] = usage_dict
# Queue the merged chunk and reset
self.chunk_queue.append(merged_chunk)
self.queued_usage_chunk = True
self.holding_stop_reason_chunk = None
return self.chunk_queue.popleft()
# Check if this processed chunk has a stop_reason - hold it for next chunk
if not self.queued_usage_chunk:
if should_start_new_block and not self.sent_content_block_finish:
# Queue the sequence: content_block_stop -> content_block_start
# The trigger chunk itself is not emitted as a delta since the
# content_block_start already carries the relevant information.
# 1. Stop current content block
self.chunk_queue.append(
{
"type": "content_block_stop",
"index": max(self.current_content_block_index - 1, 0),
}
)
# 2. Start new content block
self.chunk_queue.append(
{
"type": "content_block_start",
"index": self.current_content_block_index,
"content_block": self.current_content_block_start,
}
)
# Reset state for new block
self.sent_content_block_finish = False
# Return the first queued item
return self.chunk_queue.popleft()
if (
processed_chunk["type"] == "message_delta"
and self.sent_content_block_finish is False
):
# Queue both the content_block_stop and the holding chunk
self.chunk_queue.append(
{
"type": "content_block_stop",
"index": self.current_content_block_index,
}
)
self.sent_content_block_finish = True
if (
processed_chunk.get("delta", {}).get("stop_reason")
is not None
):
self.holding_stop_reason_chunk = processed_chunk
else:
self.chunk_queue.append(processed_chunk)
return self.chunk_queue.popleft()
elif self.holding_chunk is not None:
# Queue both chunks
self.chunk_queue.append(self.holding_chunk)
self.chunk_queue.append(processed_chunk)
self.holding_chunk = None
return self.chunk_queue.popleft()
else:
# Queue the current chunk
self.chunk_queue.append(processed_chunk)
return self.chunk_queue.popleft()
# Handle any remaining held chunks after stream ends
if not self.queued_usage_chunk:
if self.holding_stop_reason_chunk is not None:
self.chunk_queue.append(self.holding_stop_reason_chunk)
self.holding_stop_reason_chunk = None
if self.holding_chunk is not None:
self.chunk_queue.append(self.holding_chunk)
self.holding_chunk = None
if not self.sent_last_message:
self.sent_last_message = True
self.chunk_queue.append({"type": "message_stop"})
# Return queued items if any
if self.chunk_queue:
return self.chunk_queue.popleft()
raise StopIteration
except StopIteration:
# Handle any remaining queued chunks before stopping
if self.chunk_queue:
return self.chunk_queue.popleft()
# Handle any held stop_reason chunk
if self.holding_stop_reason_chunk is not None:
return self.holding_stop_reason_chunk
if not self.sent_last_message:
self.sent_last_message = True
return {"type": "message_stop"}
raise StopAsyncIteration
def anthropic_sse_wrapper(self) -> Iterator[bytes]:
"""
Convert AnthropicStreamWrapper dict chunks to Server-Sent Events format.
Similar to the Bedrock bedrock_sse_wrapper implementation.
This wrapper ensures dict chunks are SSE formatted with both event and data lines.
"""
for chunk in self:
if isinstance(chunk, dict):
event_type: str = str(chunk.get("type", "message"))
payload = f"event: {event_type}\ndata: {json.dumps(chunk)}\n\n"
yield payload.encode()
else:
# For non-dict chunks, forward the original value unchanged
yield chunk
async def async_anthropic_sse_wrapper(self) -> AsyncIterator[bytes]:
"""
Async version of anthropic_sse_wrapper.
Convert AnthropicStreamWrapper dict chunks to Server-Sent Events format.
"""
async for chunk in self:
if isinstance(chunk, dict):
event_type: str = str(chunk.get("type", "message"))
payload = f"event: {event_type}\ndata: {json.dumps(chunk)}\n\n"
yield payload.encode()
else:
# For non-dict chunks, forward the original value unchanged
yield chunk
def _increment_content_block_index(self):
self.current_content_block_index += 1
def _should_start_new_content_block(self, chunk: "ModelResponseStream") -> bool:
"""
Determine if we should start a new content block based on the processed chunk.
Override this method with your specific logic for detecting new content blocks.
Examples of when you might want to start a new content block:
- Switching from text to tool calls
- Different content types in the response
- Specific markers in the content
"""
from .transformation import LiteLLMAnthropicMessagesAdapter
# Example logic - customize based on your needs:
# If chunk indicates a tool call
if chunk.choices[0].finish_reason is not None:
return False
(
block_type,
content_block_start,
) = LiteLLMAnthropicMessagesAdapter()._translate_streaming_openai_chunk_to_anthropic_content_block(
choices=chunk.choices # type: ignore
)
# Restore original tool name if it was truncated for OpenAI's 64-char limit
if block_type == "tool_use":
# Type narrowing: content_block_start is ToolUseBlock when block_type is "tool_use"
from typing import cast
from litellm.types.llms.anthropic import ToolUseBlock
tool_block = cast(ToolUseBlock, content_block_start)
if tool_block.get("name"):
truncated_name = tool_block["name"]
original_name = self.tool_name_mapping.get(
truncated_name, truncated_name
)
tool_block["name"] = original_name
if block_type != self.current_content_block_type:
self.current_content_block_type = block_type
self.current_content_block_start = content_block_start
return True
# For parallel tool calls, we'll necessarily have a new content block
# if we get a function name since it signals a new tool call
if block_type == "tool_use":
from typing import cast
from litellm.types.llms.anthropic import ToolUseBlock
tool_block = cast(ToolUseBlock, content_block_start)
if tool_block.get("name"):
self.current_content_block_type = block_type
self.current_content_block_start = content_block_start
return True
return False

View File

@@ -0,0 +1,51 @@
# Anthropic Messages Pass-Through Architecture
## Request Flow
```mermaid
flowchart TD
A[litellm.anthropic.messages.acreate] --> B{Provider?}
B -->|anthropic| C[AnthropicMessagesConfig]
B -->|azure_ai| D[AzureAnthropicMessagesConfig]
B -->|bedrock invoke| E[BedrockAnthropicMessagesConfig]
B -->|vertex_ai| F[VertexAnthropicMessagesConfig]
B -->|Other providers| G[LiteLLMAnthropicMessagesAdapter]
C --> H[Direct Anthropic API]
D --> I[Azure AI Foundry API]
E --> J[Bedrock Invoke API]
F --> K[Vertex AI API]
G --> L[translate_anthropic_to_openai]
L --> M[litellm.completion]
M --> N[Provider API]
N --> O[translate_openai_response_to_anthropic]
O --> P[Anthropic Response Format]
H --> P
I --> P
J --> P
K --> P
```
## Adapter Flow (Non-Native Providers)
```mermaid
sequenceDiagram
participant User
participant Handler as anthropic_messages_handler
participant Adapter as LiteLLMAnthropicMessagesAdapter
participant LiteLLM as litellm.completion
participant Provider as Provider API
User->>Handler: Anthropic Messages Request
Handler->>Adapter: translate_anthropic_to_openai()
Note over Adapter: messages, tools, thinking,<br/>output_format → response_format
Adapter->>LiteLLM: OpenAI Format Request
LiteLLM->>Provider: Provider-specific Request
Provider->>LiteLLM: Provider Response
LiteLLM->>Adapter: OpenAI Format Response
Adapter->>Handler: translate_openai_response_to_anthropic()
Handler->>User: Anthropic Messages Response
```

View File

@@ -0,0 +1,251 @@
"""
Fake Streaming Iterator for Anthropic Messages
This module provides a fake streaming iterator that converts non-streaming
Anthropic Messages responses into proper streaming format.
Used when WebSearch interception converts stream=True to stream=False but
the LLM doesn't make a tool call, and we need to return a stream to the user.
"""
import json
from typing import Any, Dict, List, cast
from litellm.types.llms.anthropic_messages.anthropic_response import (
AnthropicMessagesResponse,
)
class FakeAnthropicMessagesStreamIterator:
"""
Fake streaming iterator for Anthropic Messages responses.
Used when we need to convert a non-streaming response to a streaming format,
such as when WebSearch interception converts stream=True to stream=False but
the LLM doesn't make a tool call.
This creates a proper Anthropic-style streaming response with multiple events:
- message_start
- content_block_start (for each content block)
- content_block_delta (for text content, chunked)
- content_block_stop
- message_delta (for usage)
- message_stop
"""
def __init__(self, response: AnthropicMessagesResponse):
self.response = response
self.chunks = self._create_streaming_chunks()
self.current_index = 0
def _create_streaming_chunks(self) -> List[bytes]:
"""Convert the non-streaming response to streaming chunks"""
chunks = []
# Cast response to dict for easier access
response_dict = cast(Dict[str, Any], self.response)
# 1. message_start event
usage = response_dict.get("usage", {})
message_start = {
"type": "message_start",
"message": {
"id": response_dict.get("id"),
"type": "message",
"role": response_dict.get("role", "assistant"),
"model": response_dict.get("model"),
"content": [],
"stop_reason": None,
"stop_sequence": None,
"usage": {
"input_tokens": usage.get("input_tokens", 0) if usage else 0,
"output_tokens": 0,
},
},
}
chunks.append(
f"event: message_start\ndata: {json.dumps(message_start)}\n\n".encode()
)
# 2-4. For each content block, send start/delta/stop events
content_blocks = response_dict.get("content", [])
if content_blocks:
for index, block in enumerate(content_blocks):
# Cast block to dict for easier access
block_dict = cast(Dict[str, Any], block)
block_type = block_dict.get("type")
if block_type == "text":
# content_block_start
content_block_start = {
"type": "content_block_start",
"index": index,
"content_block": {"type": "text", "text": ""},
}
chunks.append(
f"event: content_block_start\ndata: {json.dumps(content_block_start)}\n\n".encode()
)
# content_block_delta (send full text as one delta for simplicity)
text = block_dict.get("text", "")
content_block_delta = {
"type": "content_block_delta",
"index": index,
"delta": {"type": "text_delta", "text": text},
}
chunks.append(
f"event: content_block_delta\ndata: {json.dumps(content_block_delta)}\n\n".encode()
)
# content_block_stop
content_block_stop = {"type": "content_block_stop", "index": index}
chunks.append(
f"event: content_block_stop\ndata: {json.dumps(content_block_stop)}\n\n".encode()
)
elif block_type == "thinking":
# content_block_start for thinking
content_block_start = {
"type": "content_block_start",
"index": index,
"content_block": {
"type": "thinking",
"thinking": "",
"signature": "",
},
}
chunks.append(
f"event: content_block_start\ndata: {json.dumps(content_block_start)}\n\n".encode()
)
# content_block_delta for thinking text
thinking_text = block_dict.get("thinking", "")
if thinking_text:
content_block_delta = {
"type": "content_block_delta",
"index": index,
"delta": {
"type": "thinking_delta",
"thinking": thinking_text,
},
}
chunks.append(
f"event: content_block_delta\ndata: {json.dumps(content_block_delta)}\n\n".encode()
)
# content_block_delta for signature (if present)
signature = block_dict.get("signature", "")
if signature:
signature_delta = {
"type": "content_block_delta",
"index": index,
"delta": {
"type": "signature_delta",
"signature": signature,
},
}
chunks.append(
f"event: content_block_delta\ndata: {json.dumps(signature_delta)}\n\n".encode()
)
# content_block_stop
content_block_stop = {"type": "content_block_stop", "index": index}
chunks.append(
f"event: content_block_stop\ndata: {json.dumps(content_block_stop)}\n\n".encode()
)
elif block_type == "redacted_thinking":
# content_block_start for redacted_thinking
content_block_start = {
"type": "content_block_start",
"index": index,
"content_block": {"type": "redacted_thinking"},
}
chunks.append(
f"event: content_block_start\ndata: {json.dumps(content_block_start)}\n\n".encode()
)
# content_block_stop (no delta for redacted thinking)
content_block_stop = {"type": "content_block_stop", "index": index}
chunks.append(
f"event: content_block_stop\ndata: {json.dumps(content_block_stop)}\n\n".encode()
)
elif block_type == "tool_use":
# content_block_start
content_block_start = {
"type": "content_block_start",
"index": index,
"content_block": {
"type": "tool_use",
"id": block_dict.get("id"),
"name": block_dict.get("name"),
"input": {},
},
}
chunks.append(
f"event: content_block_start\ndata: {json.dumps(content_block_start)}\n\n".encode()
)
# content_block_delta (send input as JSON delta)
input_data = block_dict.get("input", {})
content_block_delta = {
"type": "content_block_delta",
"index": index,
"delta": {
"type": "input_json_delta",
"partial_json": json.dumps(input_data),
},
}
chunks.append(
f"event: content_block_delta\ndata: {json.dumps(content_block_delta)}\n\n".encode()
)
# content_block_stop
content_block_stop = {"type": "content_block_stop", "index": index}
chunks.append(
f"event: content_block_stop\ndata: {json.dumps(content_block_stop)}\n\n".encode()
)
# 5. message_delta event (with final usage and stop_reason)
message_delta = {
"type": "message_delta",
"delta": {
"stop_reason": response_dict.get("stop_reason"),
"stop_sequence": response_dict.get("stop_sequence"),
},
"usage": {"output_tokens": usage.get("output_tokens", 0) if usage else 0},
}
chunks.append(
f"event: message_delta\ndata: {json.dumps(message_delta)}\n\n".encode()
)
# 6. message_stop event
message_stop = {"type": "message_stop", "usage": usage if usage else {}}
chunks.append(
f"event: message_stop\ndata: {json.dumps(message_stop)}\n\n".encode()
)
return chunks
def __aiter__(self):
return self
async def __anext__(self):
if self.current_index >= len(self.chunks):
raise StopAsyncIteration
chunk = self.chunks[self.current_index]
self.current_index += 1
return chunk
def __iter__(self):
return self
def __next__(self):
if self.current_index >= len(self.chunks):
raise StopIteration
chunk = self.chunks[self.current_index]
self.current_index += 1
return chunk

View File

@@ -0,0 +1,362 @@
"""
- call /messages on Anthropic API
- Make streaming + non-streaming request - just pass it through direct to Anthropic. No need to do anything special here
- Ensure requests are logged in the DB - stream + non-stream
"""
import asyncio
import contextvars
from functools import partial
from typing import Any, AsyncIterator, Coroutine, Dict, List, Optional, Union
import litellm
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
from litellm.llms.base_llm.anthropic_messages.transformation import (
BaseAnthropicMessagesConfig,
)
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
from litellm.llms.custom_httpx.llm_http_handler import BaseLLMHTTPHandler
from litellm.types.llms.anthropic_messages.anthropic_request import AnthropicMetadata
from litellm.types.llms.anthropic_messages.anthropic_response import (
AnthropicMessagesResponse,
)
from litellm.types.router import GenericLiteLLMParams
from litellm.utils import ProviderConfigManager, client
from ..adapters.handler import LiteLLMMessagesToCompletionTransformationHandler
from ..responses_adapters.handler import LiteLLMMessagesToResponsesAPIHandler
from .utils import AnthropicMessagesRequestUtils, mock_response
# Providers that are routed directly to the OpenAI Responses API instead of
# going through chat/completions.
_RESPONSES_API_PROVIDERS = frozenset({"openai"})
def _should_route_to_responses_api(custom_llm_provider: Optional[str]) -> bool:
"""Return True when the provider should use the Responses API path.
Set ``litellm.use_chat_completions_url_for_anthropic_messages = True`` to
opt out and route OpenAI/Azure requests through chat/completions instead.
"""
if litellm.use_chat_completions_url_for_anthropic_messages:
return False
return custom_llm_provider in _RESPONSES_API_PROVIDERS
####### ENVIRONMENT VARIABLES ###################
# Initialize any necessary instances or variables here
base_llm_http_handler = BaseLLMHTTPHandler()
#################################################
async def _execute_pre_request_hooks(
model: str,
messages: List[Dict],
tools: Optional[List[Dict]],
stream: Optional[bool],
custom_llm_provider: Optional[str],
**kwargs,
) -> Dict:
"""
Execute pre-request hooks from CustomLogger callbacks.
Allows CustomLoggers to modify request parameters before the API call.
Used for WebSearch tool conversion, stream modification, etc.
Args:
model: Model name
messages: List of messages
tools: Optional tools list
stream: Optional stream flag
custom_llm_provider: Provider name (if not set, will be extracted from model)
**kwargs: Additional request parameters
Returns:
Dict containing all (potentially modified) request parameters including tools, stream
"""
# If custom_llm_provider not provided, extract from model
if not custom_llm_provider:
try:
_, custom_llm_provider, _, _ = litellm.get_llm_provider(model=model)
except Exception:
# If extraction fails, continue without provider
pass
# Build complete request kwargs dict
request_kwargs = {
"tools": tools,
"stream": stream,
"litellm_params": {
"custom_llm_provider": custom_llm_provider,
},
**kwargs,
}
if not litellm.callbacks:
return request_kwargs
from litellm.integrations.custom_logger import CustomLogger as _CustomLogger
for callback in litellm.callbacks:
if not isinstance(callback, _CustomLogger):
continue
# Call the pre-request hook
modified_kwargs = await callback.async_pre_request_hook(
model, messages, request_kwargs
)
# If hook returned modified kwargs, use them
if modified_kwargs is not None:
request_kwargs = modified_kwargs
return request_kwargs
@client
async def anthropic_messages(
max_tokens: int,
messages: List[Dict],
model: str,
metadata: Optional[Dict] = None,
stop_sequences: Optional[List[str]] = None,
stream: Optional[bool] = False,
system: Optional[str] = None,
temperature: Optional[float] = None,
thinking: Optional[Dict] = None,
tool_choice: Optional[Dict] = None,
tools: Optional[List[Dict]] = None,
top_k: Optional[int] = None,
top_p: Optional[float] = None,
api_key: Optional[str] = None,
api_base: Optional[str] = None,
client: Optional[AsyncHTTPHandler] = None,
custom_llm_provider: Optional[str] = None,
**kwargs,
) -> Union[AnthropicMessagesResponse, AsyncIterator]:
"""
Async: Make llm api request in Anthropic /messages API spec
"""
# Execute pre-request hooks to allow CustomLoggers to modify request
request_kwargs = await _execute_pre_request_hooks(
model=model,
messages=messages,
tools=tools,
stream=stream,
custom_llm_provider=custom_llm_provider,
**kwargs,
)
# Extract modified parameters
tools = request_kwargs.pop("tools", tools)
stream = request_kwargs.pop("stream", stream)
# Remove litellm_params from kwargs (only needed for hooks)
request_kwargs.pop("litellm_params", None)
# Merge back any other modifications
kwargs.update(request_kwargs)
loop = asyncio.get_event_loop()
kwargs["is_async"] = True
func = partial(
anthropic_messages_handler,
max_tokens=max_tokens,
messages=messages,
model=model,
metadata=metadata,
stop_sequences=stop_sequences,
stream=stream,
system=system,
temperature=temperature,
thinking=thinking,
tool_choice=tool_choice,
tools=tools,
top_k=top_k,
top_p=top_p,
api_key=api_key,
api_base=api_base,
client=client,
custom_llm_provider=custom_llm_provider,
**kwargs,
)
ctx = contextvars.copy_context()
func_with_context = partial(ctx.run, func)
init_response = await loop.run_in_executor(None, func_with_context)
if asyncio.iscoroutine(init_response):
response = await init_response
else:
response = init_response
return response
def validate_anthropic_api_metadata(metadata: Optional[Dict] = None) -> Optional[Dict]:
"""
Validate Anthropic API metadata - This is done to ensure only allowed `metadata` fields are passed to Anthropic API
If there are any litellm specific metadata fields, use `litellm_metadata` key to pass them.
"""
if metadata is None:
return None
anthropic_metadata_obj = AnthropicMetadata(**metadata)
return anthropic_metadata_obj.model_dump(exclude_none=True)
def anthropic_messages_handler(
max_tokens: int,
messages: List[Dict],
model: str,
metadata: Optional[Dict] = None,
stop_sequences: Optional[List[str]] = None,
stream: Optional[bool] = False,
system: Optional[str] = None,
temperature: Optional[float] = None,
thinking: Optional[Dict] = None,
tool_choice: Optional[Dict] = None,
tools: Optional[List[Dict]] = None,
top_k: Optional[int] = None,
top_p: Optional[float] = None,
container: Optional[Dict] = None,
api_key: Optional[str] = None,
api_base: Optional[str] = None,
client: Optional[AsyncHTTPHandler] = None,
custom_llm_provider: Optional[str] = None,
**kwargs,
) -> Union[
AnthropicMessagesResponse,
AsyncIterator[Any],
Coroutine[Any, Any, Union[AnthropicMessagesResponse, AsyncIterator[Any]]],
]:
"""
Makes Anthropic `/v1/messages` API calls In the Anthropic API Spec
Args:
container: Container config with skills for code execution
"""
from litellm.types.utils import LlmProviders
metadata = validate_anthropic_api_metadata(metadata)
local_vars = locals()
is_async = kwargs.pop("is_async", False)
# Use provided client or create a new one
litellm_logging_obj: LiteLLMLoggingObj = kwargs.get("litellm_logging_obj") # type: ignore
# Store original model name before get_llm_provider strips the provider prefix
# This is needed by agentic hooks (e.g., websearch_interception) to make follow-up requests
original_model = model
litellm_params = GenericLiteLLMParams(
**kwargs,
api_key=api_key,
api_base=api_base,
custom_llm_provider=custom_llm_provider,
)
(
model,
custom_llm_provider,
dynamic_api_key,
dynamic_api_base,
) = litellm.get_llm_provider(
model=model,
custom_llm_provider=custom_llm_provider,
api_base=litellm_params.api_base,
api_key=litellm_params.api_key,
)
# Store agentic loop params in logging object for agentic hooks
# This provides original request context needed for follow-up calls
if litellm_logging_obj is not None:
litellm_logging_obj.model_call_details["agentic_loop_params"] = {
"model": original_model,
"custom_llm_provider": custom_llm_provider,
}
# Check if stream was converted for WebSearch interception
# This is set in the async wrapper above when stream=True is converted to stream=False
if kwargs.get("_websearch_interception_converted_stream", False):
litellm_logging_obj.model_call_details[
"websearch_interception_converted_stream"
] = True
if litellm_params.mock_response and isinstance(litellm_params.mock_response, str):
return mock_response(
model=model,
messages=messages,
max_tokens=max_tokens,
mock_response=litellm_params.mock_response,
)
anthropic_messages_provider_config: Optional[BaseAnthropicMessagesConfig] = None
if custom_llm_provider is not None and custom_llm_provider in [
provider.value for provider in LlmProviders
]:
anthropic_messages_provider_config = (
ProviderConfigManager.get_provider_anthropic_messages_config(
model=model,
provider=litellm.LlmProviders(custom_llm_provider),
)
)
if anthropic_messages_provider_config is None:
# Route to Responses API for OpenAI / Azure, chat/completions for everything else.
_shared_kwargs = dict(
max_tokens=max_tokens,
messages=messages,
model=model,
metadata=metadata,
stop_sequences=stop_sequences,
stream=stream,
system=system,
temperature=temperature,
thinking=thinking,
tool_choice=tool_choice,
tools=tools,
top_k=top_k,
top_p=top_p,
_is_async=is_async,
api_key=api_key,
api_base=api_base,
client=client,
custom_llm_provider=custom_llm_provider,
**kwargs,
)
if _should_route_to_responses_api(custom_llm_provider):
return LiteLLMMessagesToResponsesAPIHandler.anthropic_messages_handler(
**_shared_kwargs
)
return (
LiteLLMMessagesToCompletionTransformationHandler.anthropic_messages_handler(
**_shared_kwargs
)
)
if custom_llm_provider is None:
raise ValueError(
f"custom_llm_provider is required for Anthropic messages, passed in model={model}, custom_llm_provider={custom_llm_provider}"
)
local_vars.update(kwargs)
anthropic_messages_optional_request_params = (
AnthropicMessagesRequestUtils.get_requested_anthropic_messages_optional_param(
params=local_vars
)
)
return base_llm_http_handler.anthropic_messages_handler(
model=model,
messages=messages,
anthropic_messages_provider_config=anthropic_messages_provider_config,
anthropic_messages_optional_request_params=dict(
anthropic_messages_optional_request_params
),
_is_async=is_async,
client=client,
custom_llm_provider=custom_llm_provider,
litellm_params=litellm_params,
logging_obj=litellm_logging_obj,
api_key=api_key,
api_base=api_base,
stream=stream,
kwargs=kwargs,
)

View File

@@ -0,0 +1,108 @@
import asyncio
import json
from datetime import datetime
from typing import Any, AsyncIterator, List, Union
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
from litellm.proxy.pass_through_endpoints.success_handler import (
PassThroughEndpointLogging,
)
from litellm.types.passthrough_endpoints.pass_through_endpoints import EndpointType
from litellm.types.utils import GenericStreamingChunk, ModelResponseStream
GLOBAL_PASS_THROUGH_SUCCESS_HANDLER_OBJ = PassThroughEndpointLogging()
class BaseAnthropicMessagesStreamingIterator:
"""
Base class for Anthropic Messages streaming iterators that provides common logic
for streaming response handling and logging.
"""
def __init__(
self,
litellm_logging_obj: LiteLLMLoggingObj,
request_body: dict,
):
self.litellm_logging_obj = litellm_logging_obj
self.request_body = request_body
self.start_time = datetime.now()
async def _handle_streaming_logging(self, collected_chunks: List[bytes]):
"""Handle the logging after all chunks have been collected."""
from litellm.proxy.pass_through_endpoints.streaming_handler import (
PassThroughStreamingHandler,
)
end_time = datetime.now()
asyncio.create_task(
PassThroughStreamingHandler._route_streaming_logging_to_handler(
litellm_logging_obj=self.litellm_logging_obj,
passthrough_success_handler_obj=GLOBAL_PASS_THROUGH_SUCCESS_HANDLER_OBJ,
url_route="/v1/messages",
request_body=self.request_body or {},
endpoint_type=EndpointType.ANTHROPIC,
start_time=self.start_time,
raw_bytes=collected_chunks,
end_time=end_time,
)
)
def get_async_streaming_response_iterator(
self,
httpx_response,
request_body: dict,
litellm_logging_obj: LiteLLMLoggingObj,
) -> AsyncIterator:
"""Helper function to handle Anthropic streaming responses using the existing logging handlers"""
from litellm.proxy.pass_through_endpoints.streaming_handler import (
PassThroughStreamingHandler,
)
# Use the existing streaming handler for Anthropic
return PassThroughStreamingHandler.chunk_processor(
response=httpx_response,
request_body=request_body,
litellm_logging_obj=litellm_logging_obj,
endpoint_type=EndpointType.ANTHROPIC,
start_time=self.start_time,
passthrough_success_handler_obj=GLOBAL_PASS_THROUGH_SUCCESS_HANDLER_OBJ,
url_route="/v1/messages",
)
def _convert_chunk_to_sse_format(self, chunk: Union[dict, Any]) -> bytes:
"""
Convert a chunk to Server-Sent Events format.
This method should be overridden by subclasses if they need custom
chunk formatting logic.
"""
if isinstance(chunk, dict):
event_type: str = str(chunk.get("type", "message"))
payload = f"event: {event_type}\n" f"data: {json.dumps(chunk)}\n\n"
return payload.encode()
else:
# For non-dict chunks, return as is
return chunk
async def async_sse_wrapper(
self,
completion_stream: AsyncIterator[
Union[bytes, GenericStreamingChunk, ModelResponseStream, dict]
],
) -> AsyncIterator[bytes]:
"""
Generic async SSE wrapper that converts streaming chunks to SSE format
and handles logging.
This method provides the common logic for both Anthropic and Bedrock implementations.
"""
collected_chunks = []
async for chunk in completion_stream:
encoded_chunk = self._convert_chunk_to_sse_format(chunk)
collected_chunks.append(encoded_chunk)
yield encoded_chunk
# Handle logging after all chunks are processed
await self._handle_streaming_logging(collected_chunks)

View File

@@ -0,0 +1,308 @@
from typing import Any, AsyncIterator, Dict, List, Optional, Tuple
import httpx
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
from litellm.litellm_core_utils.litellm_logging import verbose_logger
from litellm.llms.base_llm.anthropic_messages.transformation import (
BaseAnthropicMessagesConfig,
)
from litellm.types.llms.anthropic import (
ANTHROPIC_BETA_HEADER_VALUES,
AnthropicMessagesRequest,
)
from litellm.types.llms.anthropic_messages.anthropic_response import (
AnthropicMessagesResponse,
)
from litellm.types.llms.anthropic_tool_search import get_tool_search_beta_header
from litellm.types.router import GenericLiteLLMParams
from ...common_utils import (
AnthropicError,
AnthropicModelInfo,
optionally_handle_anthropic_oauth,
)
DEFAULT_ANTHROPIC_API_BASE = "https://api.anthropic.com"
DEFAULT_ANTHROPIC_API_VERSION = "2023-06-01"
class AnthropicMessagesConfig(BaseAnthropicMessagesConfig):
def get_supported_anthropic_messages_params(self, model: str) -> list:
return [
"messages",
"model",
"system",
"max_tokens",
"stop_sequences",
"temperature",
"top_p",
"top_k",
"tools",
"tool_choice",
"thinking",
"context_management",
"output_format",
"inference_geo",
"speed",
"output_config",
# TODO: Add Anthropic `metadata` support
# "metadata",
]
@staticmethod
def _filter_billing_headers_from_system(system_param):
"""
Filter out x-anthropic-billing-header metadata from system parameter.
Args:
system_param: Can be a string or a list of system message content blocks
Returns:
Filtered system parameter (string or list), or None if all content was filtered
"""
if isinstance(system_param, str):
# If it's a string and starts with billing header, filter it out
if system_param.startswith("x-anthropic-billing-header:"):
return None
return system_param
elif isinstance(system_param, list):
# Filter list of system content blocks
filtered_list = []
for content_block in system_param:
if isinstance(content_block, dict):
text = content_block.get("text", "")
content_type = content_block.get("type", "")
# Skip text blocks that start with billing header
if content_type == "text" and text.startswith(
"x-anthropic-billing-header:"
):
continue
filtered_list.append(content_block)
else:
# Keep non-dict items as-is
filtered_list.append(content_block)
return filtered_list if len(filtered_list) > 0 else None
else:
return system_param
def get_complete_url(
self,
api_base: Optional[str],
api_key: Optional[str],
model: str,
optional_params: dict,
litellm_params: dict,
stream: Optional[bool] = None,
) -> str:
api_base = api_base or DEFAULT_ANTHROPIC_API_BASE
if not api_base.endswith("/v1/messages"):
api_base = f"{api_base}/v1/messages"
return api_base
def validate_anthropic_messages_environment(
self,
headers: dict,
model: str,
messages: List[Any],
optional_params: dict,
litellm_params: dict,
api_key: Optional[str] = None,
api_base: Optional[str] = None,
) -> Tuple[dict, Optional[str]]:
import os
# Check for Anthropic OAuth token in Authorization header
headers, api_key = optionally_handle_anthropic_oauth(
headers=headers, api_key=api_key
)
if api_key is None:
api_key = os.getenv("ANTHROPIC_API_KEY")
if "x-api-key" not in headers and "authorization" not in headers and api_key:
headers["x-api-key"] = api_key
if "anthropic-version" not in headers:
headers["anthropic-version"] = DEFAULT_ANTHROPIC_API_VERSION
if "content-type" not in headers:
headers["content-type"] = "application/json"
headers = self._update_headers_with_anthropic_beta(
headers=headers,
optional_params=optional_params,
)
return headers, api_base
def transform_anthropic_messages_request(
self,
model: str,
messages: List[Dict],
anthropic_messages_optional_request_params: Dict,
litellm_params: GenericLiteLLMParams,
headers: dict,
) -> Dict:
"""
No transformation is needed for Anthropic messages
This takes in a request in the Anthropic /v1/messages API spec -> transforms it to /v1/messages API spec (i.e) no transformation is needed
"""
max_tokens = anthropic_messages_optional_request_params.pop("max_tokens", None)
if max_tokens is None:
raise AnthropicError(
message="max_tokens is required for Anthropic /v1/messages API",
status_code=400,
)
# Filter out x-anthropic-billing-header from system messages
system_param = anthropic_messages_optional_request_params.get("system")
if system_param is not None:
filtered_system = self._filter_billing_headers_from_system(system_param)
if filtered_system is not None and len(filtered_system) > 0:
anthropic_messages_optional_request_params["system"] = filtered_system
else:
# Remove system parameter if all content was filtered out
anthropic_messages_optional_request_params.pop("system", None)
# Transform context_management from OpenAI format to Anthropic format if needed
context_management_param = anthropic_messages_optional_request_params.get(
"context_management"
)
if context_management_param is not None:
from litellm.llms.anthropic.chat.transformation import AnthropicConfig
transformed_context_management = (
AnthropicConfig.map_openai_context_management_to_anthropic(
context_management_param
)
)
if transformed_context_management is not None:
anthropic_messages_optional_request_params[
"context_management"
] = transformed_context_management
####### get required params for all anthropic messages requests ######
verbose_logger.debug(f"TRANSFORMATION DEBUG - Messages: {messages}")
anthropic_messages_request: AnthropicMessagesRequest = AnthropicMessagesRequest(
messages=messages,
max_tokens=max_tokens,
model=model,
**anthropic_messages_optional_request_params,
)
return dict(anthropic_messages_request)
def transform_anthropic_messages_response(
self,
model: str,
raw_response: httpx.Response,
logging_obj: LiteLLMLoggingObj,
) -> AnthropicMessagesResponse:
"""
No transformation is needed for Anthropic messages, since we want the response in the Anthropic /v1/messages API spec
"""
try:
raw_response_json = raw_response.json()
except Exception:
raise AnthropicError(
message=raw_response.text, status_code=raw_response.status_code
)
return AnthropicMessagesResponse(**raw_response_json)
def get_async_streaming_response_iterator(
self,
model: str,
httpx_response: httpx.Response,
request_body: dict,
litellm_logging_obj: LiteLLMLoggingObj,
) -> AsyncIterator:
"""Helper function to handle Anthropic streaming responses using the existing logging handlers"""
from litellm.llms.anthropic.experimental_pass_through.messages.streaming_iterator import (
BaseAnthropicMessagesStreamingIterator,
)
# Use the shared streaming handler for Anthropic
handler = BaseAnthropicMessagesStreamingIterator(
litellm_logging_obj=litellm_logging_obj,
request_body=request_body,
)
return handler.get_async_streaming_response_iterator(
httpx_response=httpx_response,
request_body=request_body,
litellm_logging_obj=litellm_logging_obj,
)
@staticmethod
def _update_headers_with_anthropic_beta(
headers: dict,
optional_params: dict,
custom_llm_provider: str = "anthropic",
) -> dict:
"""
Auto-inject anthropic-beta headers based on features used.
Handles:
- context_management: adds 'context-management-2025-06-27'
- tool_search: adds provider-specific tool search header
- output_format: adds 'structured-outputs-2025-11-13'
- speed: adds 'fast-mode-2026-02-01'
Args:
headers: Request headers dict
optional_params: Optional parameters including tools, context_management, output_format, speed
custom_llm_provider: Provider name for looking up correct tool search header
"""
beta_values: set = set()
# Get existing beta headers if any
existing_beta = headers.get("anthropic-beta")
if existing_beta:
beta_values.update(b.strip() for b in existing_beta.split(","))
# Check for context management
context_management_param = optional_params.get("context_management")
if context_management_param is not None:
# Check edits array for compact_20260112 type
edits = context_management_param.get("edits", [])
has_compact = False
has_other = False
for edit in edits:
edit_type = edit.get("type", "")
if edit_type == "compact_20260112":
has_compact = True
else:
has_other = True
# Add compact header if any compact edits exist
if has_compact:
beta_values.add(ANTHROPIC_BETA_HEADER_VALUES.COMPACT_2026_01_12.value)
# Add context management header if any other edits exist
if has_other:
beta_values.add(
ANTHROPIC_BETA_HEADER_VALUES.CONTEXT_MANAGEMENT_2025_06_27.value
)
# Check for structured outputs
if optional_params.get("output_format") is not None:
beta_values.add(
ANTHROPIC_BETA_HEADER_VALUES.STRUCTURED_OUTPUT_2025_09_25.value
)
# Check for fast mode
if optional_params.get("speed") == "fast":
beta_values.add(ANTHROPIC_BETA_HEADER_VALUES.FAST_MODE_2026_02_01.value)
# Check for tool search tools
tools = optional_params.get("tools")
if tools:
anthropic_model_info = AnthropicModelInfo()
if anthropic_model_info.is_tool_search_used(tools):
# Use provider-specific tool search header
tool_search_header = get_tool_search_beta_header(custom_llm_provider)
beta_values.add(tool_search_header)
if beta_values:
headers["anthropic-beta"] = ",".join(sorted(beta_values))
return headers

View File

@@ -0,0 +1,75 @@
from typing import Any, Dict, List, cast, get_type_hints
from litellm.types.llms.anthropic import AnthropicMessagesRequestOptionalParams
from litellm.types.llms.anthropic_messages.anthropic_response import (
AnthropicMessagesResponse,
)
class AnthropicMessagesRequestUtils:
@staticmethod
def get_requested_anthropic_messages_optional_param(
params: Dict[str, Any],
) -> AnthropicMessagesRequestOptionalParams:
"""
Filter parameters to only include those defined in AnthropicMessagesRequestOptionalParams.
Args:
params: Dictionary of parameters to filter
Returns:
AnthropicMessagesRequestOptionalParams instance with only the valid parameters
"""
valid_keys = get_type_hints(AnthropicMessagesRequestOptionalParams).keys()
filtered_params = {
k: v for k, v in params.items() if k in valid_keys and v is not None
}
return cast(AnthropicMessagesRequestOptionalParams, filtered_params)
def mock_response(
model: str,
messages: List[Dict],
max_tokens: int,
mock_response: str = "Hi! My name is Claude.",
**kwargs,
) -> AnthropicMessagesResponse:
"""
Mock response for Anthropic messages
"""
from litellm.exceptions import (
ContextWindowExceededError,
InternalServerError,
RateLimitError,
)
if mock_response == "litellm.InternalServerError":
raise InternalServerError(
message="this is a mock internal server error",
llm_provider="anthropic",
model=model,
)
elif mock_response == "litellm.ContextWindowExceededError":
raise ContextWindowExceededError(
message="this is a mock context window exceeded error",
llm_provider="anthropic",
model=model,
)
elif mock_response == "litellm.RateLimitError":
raise RateLimitError(
message="this is a mock rate limit error",
llm_provider="anthropic",
model=model,
)
return AnthropicMessagesResponse(
**{
"content": [{"text": mock_response, "type": "text"}],
"id": "msg_013Zva2CMHLNnXjNJJKqJ2EF",
"model": "claude-sonnet-4-20250514",
"role": "assistant",
"stop_reason": "end_turn",
"stop_sequence": None,
"type": "message",
"usage": {"input_tokens": 2095, "output_tokens": 503},
}
)

View File

@@ -0,0 +1,3 @@
from .transformation import LiteLLMAnthropicToResponsesAPIAdapter
__all__ = ["LiteLLMAnthropicToResponsesAPIAdapter"]

View File

@@ -0,0 +1,239 @@
"""
Handler for the Anthropic v1/messages -> OpenAI Responses API path.
Used when the target model is an OpenAI or Azure model.
"""
from typing import Any, AsyncIterator, Coroutine, Dict, List, Optional, Union
import litellm
from litellm.types.llms.anthropic import AnthropicMessagesRequest
from litellm.types.llms.anthropic_messages.anthropic_response import (
AnthropicMessagesResponse,
)
from litellm.types.llms.openai import ResponsesAPIResponse
from .streaming_iterator import AnthropicResponsesStreamWrapper
from .transformation import LiteLLMAnthropicToResponsesAPIAdapter
_ADAPTER = LiteLLMAnthropicToResponsesAPIAdapter()
def _build_responses_kwargs(
*,
max_tokens: int,
messages: List[Dict],
model: str,
context_management: Optional[Dict] = None,
metadata: Optional[Dict] = None,
output_config: Optional[Dict] = None,
stop_sequences: Optional[List[str]] = None,
stream: Optional[bool] = False,
system: Optional[str] = None,
temperature: Optional[float] = None,
thinking: Optional[Dict] = None,
tool_choice: Optional[Dict] = None,
tools: Optional[List[Dict]] = None,
top_k: Optional[int] = None,
top_p: Optional[float] = None,
output_format: Optional[Dict] = None,
extra_kwargs: Optional[Dict[str, Any]] = None,
) -> Dict[str, Any]:
"""
Build the kwargs dict to pass directly to litellm.responses() / litellm.aresponses().
"""
# Build a typed AnthropicMessagesRequest for the adapter
request_data: Dict[str, Any] = {
"model": model,
"messages": messages,
"max_tokens": max_tokens,
}
if context_management:
request_data["context_management"] = context_management
if output_config:
request_data["output_config"] = output_config
if metadata:
request_data["metadata"] = metadata
if system:
request_data["system"] = system
if temperature is not None:
request_data["temperature"] = temperature
if thinking:
request_data["thinking"] = thinking
if tool_choice:
request_data["tool_choice"] = tool_choice
if tools:
request_data["tools"] = tools
if top_p is not None:
request_data["top_p"] = top_p
if output_format:
request_data["output_format"] = output_format
anthropic_request = AnthropicMessagesRequest(**request_data) # type: ignore[typeddict-item]
responses_kwargs = _ADAPTER.translate_request(anthropic_request)
if stream:
responses_kwargs["stream"] = True
# Forward litellm-specific kwargs (api_key, api_base, logging obj, etc.)
excluded = {"anthropic_messages"}
for key, value in (extra_kwargs or {}).items():
if key == "litellm_logging_obj" and value is not None:
from litellm.litellm_core_utils.litellm_logging import (
Logging as LiteLLMLoggingObject,
)
from litellm.types.utils import CallTypes
if isinstance(value, LiteLLMLoggingObject):
# Reclassify as acompletion so the success handler doesn't try to
# validate the Responses API event as an AnthropicResponse.
# (Mirrors the pattern used in LiteLLMMessagesToCompletionTransformationHandler.)
setattr(value, "call_type", CallTypes.acompletion.value)
responses_kwargs[key] = value
elif key not in excluded and key not in responses_kwargs and value is not None:
responses_kwargs[key] = value
return responses_kwargs
class LiteLLMMessagesToResponsesAPIHandler:
"""
Handles Anthropic /v1/messages requests for OpenAI / Azure models by
calling litellm.responses() / litellm.aresponses() directly and translating
the response back to Anthropic format.
"""
@staticmethod
async def async_anthropic_messages_handler(
max_tokens: int,
messages: List[Dict],
model: str,
context_management: Optional[Dict] = None,
metadata: Optional[Dict] = None,
output_config: Optional[Dict] = None,
stop_sequences: Optional[List[str]] = None,
stream: Optional[bool] = False,
system: Optional[str] = None,
temperature: Optional[float] = None,
thinking: Optional[Dict] = None,
tool_choice: Optional[Dict] = None,
tools: Optional[List[Dict]] = None,
top_k: Optional[int] = None,
top_p: Optional[float] = None,
output_format: Optional[Dict] = None,
**kwargs,
) -> Union[AnthropicMessagesResponse, AsyncIterator]:
responses_kwargs = _build_responses_kwargs(
max_tokens=max_tokens,
messages=messages,
model=model,
context_management=context_management,
metadata=metadata,
output_config=output_config,
stop_sequences=stop_sequences,
stream=stream,
system=system,
temperature=temperature,
thinking=thinking,
tool_choice=tool_choice,
tools=tools,
top_k=top_k,
top_p=top_p,
output_format=output_format,
extra_kwargs=kwargs,
)
result = await litellm.aresponses(**responses_kwargs)
if stream:
wrapper = AnthropicResponsesStreamWrapper(
responses_stream=result, model=model
)
return wrapper.async_anthropic_sse_wrapper()
if not isinstance(result, ResponsesAPIResponse):
raise ValueError(f"Expected ResponsesAPIResponse, got {type(result)}")
return _ADAPTER.translate_response(result)
@staticmethod
def anthropic_messages_handler(
max_tokens: int,
messages: List[Dict],
model: str,
context_management: Optional[Dict] = None,
metadata: Optional[Dict] = None,
output_config: Optional[Dict] = None,
stop_sequences: Optional[List[str]] = None,
stream: Optional[bool] = False,
system: Optional[str] = None,
temperature: Optional[float] = None,
thinking: Optional[Dict] = None,
tool_choice: Optional[Dict] = None,
tools: Optional[List[Dict]] = None,
top_k: Optional[int] = None,
top_p: Optional[float] = None,
output_format: Optional[Dict] = None,
_is_async: bool = False,
**kwargs,
) -> Union[
AnthropicMessagesResponse,
AsyncIterator[Any],
Coroutine[Any, Any, Union[AnthropicMessagesResponse, AsyncIterator[Any]]],
]:
if _is_async:
return (
LiteLLMMessagesToResponsesAPIHandler.async_anthropic_messages_handler(
max_tokens=max_tokens,
messages=messages,
model=model,
context_management=context_management,
metadata=metadata,
output_config=output_config,
stop_sequences=stop_sequences,
stream=stream,
system=system,
temperature=temperature,
thinking=thinking,
tool_choice=tool_choice,
tools=tools,
top_k=top_k,
top_p=top_p,
output_format=output_format,
**kwargs,
)
)
# Sync path
responses_kwargs = _build_responses_kwargs(
max_tokens=max_tokens,
messages=messages,
model=model,
context_management=context_management,
metadata=metadata,
output_config=output_config,
stop_sequences=stop_sequences,
stream=stream,
system=system,
temperature=temperature,
thinking=thinking,
tool_choice=tool_choice,
tools=tools,
top_k=top_k,
top_p=top_p,
output_format=output_format,
extra_kwargs=kwargs,
)
result = litellm.responses(**responses_kwargs)
if stream:
wrapper = AnthropicResponsesStreamWrapper(
responses_stream=result, model=model
)
return wrapper.async_anthropic_sse_wrapper()
if not isinstance(result, ResponsesAPIResponse):
raise ValueError(f"Expected ResponsesAPIResponse, got {type(result)}")
return _ADAPTER.translate_response(result)

View File

@@ -0,0 +1,344 @@
# What is this?
## Translates OpenAI call to Anthropic `/v1/messages` format
import json
import traceback
from collections import deque
from typing import Any, AsyncIterator, Dict
from litellm import verbose_logger
from litellm._uuid import uuid
class AnthropicResponsesStreamWrapper:
"""
Wraps a Responses API streaming iterator and re-emits events in Anthropic SSE format.
Responses API event flow (relevant subset):
response.created -> message_start
response.output_item.added -> content_block_start (if message/function_call)
response.output_text.delta -> content_block_delta (text_delta)
response.reasoning_summary_text.delta -> content_block_delta (thinking_delta)
response.function_call_arguments.delta -> content_block_delta (input_json_delta)
response.output_item.done -> content_block_stop
response.completed -> message_delta + message_stop
"""
def __init__(
self,
responses_stream: Any,
model: str,
) -> None:
self.responses_stream = responses_stream
self.model = model
self._message_id: str = f"msg_{uuid.uuid4()}"
self._current_block_index: int = -1
# Map item_id -> content_block_index so we can stop the right block later
self._item_id_to_block_index: Dict[str, int] = {}
# Track open function_call items by item_id so we can emit tool_use start
self._pending_tool_ids: Dict[
str, str
] = {} # item_id -> call_id / name accumulator
self._sent_message_start = False
self._sent_message_stop = False
self._chunk_queue: deque = deque()
def _make_message_start(self) -> Dict[str, Any]:
return {
"type": "message_start",
"message": {
"id": self._message_id,
"type": "message",
"role": "assistant",
"content": [],
"model": self.model,
"stop_reason": None,
"stop_sequence": None,
"usage": {
"input_tokens": 0,
"output_tokens": 0,
"cache_creation_input_tokens": 0,
"cache_read_input_tokens": 0,
},
},
}
def _next_block_index(self) -> int:
self._current_block_index += 1
return self._current_block_index
def _process_event(self, event: Any) -> None: # noqa: PLR0915
"""Convert one Responses API event into zero or more Anthropic chunks queued for emission."""
event_type = getattr(event, "type", None)
if event_type is None and isinstance(event, dict):
event_type = event.get("type")
if event_type is None:
return
# ---- message_start ----
if event_type == "response.created":
self._sent_message_start = True
self._chunk_queue.append(self._make_message_start())
return
# ---- content_block_start for a new output message item ----
if event_type == "response.output_item.added":
item = getattr(event, "item", None) or (
event.get("item") if isinstance(event, dict) else None
)
if item is None:
return
item_type = getattr(item, "type", None) or (
item.get("type") if isinstance(item, dict) else None
)
item_id = getattr(item, "id", None) or (
item.get("id") if isinstance(item, dict) else None
)
if item_type == "message":
block_idx = self._next_block_index()
if item_id:
self._item_id_to_block_index[item_id] = block_idx
self._chunk_queue.append(
{
"type": "content_block_start",
"index": block_idx,
"content_block": {"type": "text", "text": ""},
}
)
elif item_type == "function_call":
call_id = (
getattr(item, "call_id", None)
or (item.get("call_id") if isinstance(item, dict) else None)
or ""
)
name = (
getattr(item, "name", None)
or (item.get("name") if isinstance(item, dict) else None)
or ""
)
block_idx = self._next_block_index()
if item_id:
self._item_id_to_block_index[item_id] = block_idx
self._pending_tool_ids[item_id] = call_id
self._chunk_queue.append(
{
"type": "content_block_start",
"index": block_idx,
"content_block": {
"type": "tool_use",
"id": call_id,
"name": name,
"input": {},
},
}
)
elif item_type == "reasoning":
block_idx = self._next_block_index()
if item_id:
self._item_id_to_block_index[item_id] = block_idx
self._chunk_queue.append(
{
"type": "content_block_start",
"index": block_idx,
"content_block": {"type": "thinking", "thinking": ""},
}
)
return
# ---- text delta ----
if event_type == "response.output_text.delta":
item_id = getattr(event, "item_id", None) or (
event.get("item_id") if isinstance(event, dict) else None
)
delta = getattr(event, "delta", "") or (
event.get("delta", "") if isinstance(event, dict) else ""
)
block_idx = (
self._item_id_to_block_index.get(item_id, self._current_block_index)
if item_id
else self._current_block_index
)
self._chunk_queue.append(
{
"type": "content_block_delta",
"index": block_idx,
"delta": {"type": "text_delta", "text": delta},
}
)
return
# ---- reasoning summary text delta ----
if event_type == "response.reasoning_summary_text.delta":
item_id = getattr(event, "item_id", None) or (
event.get("item_id") if isinstance(event, dict) else None
)
delta = getattr(event, "delta", "") or (
event.get("delta", "") if isinstance(event, dict) else ""
)
block_idx = (
self._item_id_to_block_index.get(item_id, self._current_block_index)
if item_id
else self._current_block_index
)
self._chunk_queue.append(
{
"type": "content_block_delta",
"index": block_idx,
"delta": {"type": "thinking_delta", "thinking": delta},
}
)
return
# ---- function call arguments delta ----
if event_type == "response.function_call_arguments.delta":
item_id = getattr(event, "item_id", None) or (
event.get("item_id") if isinstance(event, dict) else None
)
delta = getattr(event, "delta", "") or (
event.get("delta", "") if isinstance(event, dict) else ""
)
block_idx = (
self._item_id_to_block_index.get(item_id, self._current_block_index)
if item_id
else self._current_block_index
)
self._chunk_queue.append(
{
"type": "content_block_delta",
"index": block_idx,
"delta": {"type": "input_json_delta", "partial_json": delta},
}
)
return
# ---- output item done -> content_block_stop ----
if event_type == "response.output_item.done":
item = getattr(event, "item", None) or (
event.get("item") if isinstance(event, dict) else None
)
item_id = (
getattr(item, "id", None)
or (item.get("id") if isinstance(item, dict) else None)
if item
else None
)
block_idx = (
self._item_id_to_block_index.get(item_id, self._current_block_index)
if item_id
else self._current_block_index
)
self._chunk_queue.append(
{
"type": "content_block_stop",
"index": block_idx,
}
)
return
# ---- response completed -> message_delta + message_stop ----
if event_type in (
"response.completed",
"response.failed",
"response.incomplete",
):
response_obj = getattr(event, "response", None) or (
event.get("response") if isinstance(event, dict) else None
)
stop_reason = "end_turn"
input_tokens = 0
output_tokens = 0
cache_creation_tokens = 0
cache_read_tokens = 0
if response_obj is not None:
status = getattr(response_obj, "status", None)
if status == "incomplete":
stop_reason = "max_tokens"
usage = getattr(response_obj, "usage", None)
if usage is not None:
input_tokens = getattr(usage, "input_tokens", 0) or 0
output_tokens = getattr(usage, "output_tokens", 0) or 0
cache_creation_tokens = getattr(usage, "input_tokens_details", None) # type: ignore[assignment]
cache_read_tokens = getattr(usage, "output_tokens_details", None) # type: ignore[assignment]
# Prefer direct cache fields if present
cache_creation_tokens = int(
getattr(usage, "cache_creation_input_tokens", 0) or 0
)
cache_read_tokens = int(
getattr(usage, "cache_read_input_tokens", 0) or 0
)
# Check if tool_use was in the output to override stop_reason
if response_obj is not None:
output = getattr(response_obj, "output", []) or []
for out_item in output:
out_type = getattr(out_item, "type", None) or (
out_item.get("type") if isinstance(out_item, dict) else None
)
if out_type == "function_call":
stop_reason = "tool_use"
break
usage_delta: Dict[str, Any] = {
"input_tokens": input_tokens,
"output_tokens": output_tokens,
}
if cache_creation_tokens:
usage_delta["cache_creation_input_tokens"] = cache_creation_tokens
if cache_read_tokens:
usage_delta["cache_read_input_tokens"] = cache_read_tokens
self._chunk_queue.append(
{
"type": "message_delta",
"delta": {"stop_reason": stop_reason, "stop_sequence": None},
"usage": usage_delta,
}
)
self._chunk_queue.append({"type": "message_stop"})
self._sent_message_stop = True
return
def __aiter__(self) -> "AnthropicResponsesStreamWrapper":
return self
async def __anext__(self) -> Dict[str, Any]:
# Return any queued chunks first
if self._chunk_queue:
return self._chunk_queue.popleft()
# Emit message_start if not yet done (fallback if response.created wasn't fired)
if not self._sent_message_start:
self._sent_message_start = True
self._chunk_queue.append(self._make_message_start())
return self._chunk_queue.popleft()
# Consume the upstream stream
try:
async for event in self.responses_stream:
self._process_event(event)
if self._chunk_queue:
return self._chunk_queue.popleft()
except StopAsyncIteration:
pass
except Exception as e:
verbose_logger.error(
f"AnthropicResponsesStreamWrapper error: {e}\n{traceback.format_exc()}"
)
# Drain any remaining queued chunks
if self._chunk_queue:
return self._chunk_queue.popleft()
raise StopAsyncIteration
async def async_anthropic_sse_wrapper(self) -> AsyncIterator[bytes]:
"""Yield SSE-encoded bytes for each Anthropic event chunk."""
async for chunk in self:
if isinstance(chunk, dict):
event_type: str = str(chunk.get("type", "message"))
payload = f"event: {event_type}\ndata: {json.dumps(chunk)}\n\n"
yield payload.encode()
else:
yield chunk

View File

@@ -0,0 +1,488 @@
"""
Transformation layer: Anthropic /v1/messages <-> OpenAI Responses API.
This module owns all format conversions for the direct v1/messages -> Responses API
path used for OpenAI and Azure models.
"""
import json
from typing import Any, Dict, List, Optional, Union, cast
from litellm.types.llms.anthropic import (
AllAnthropicToolsValues,
AnthopicMessagesAssistantMessageParam,
AnthropicFinishReason,
AnthropicMessagesRequest,
AnthropicMessagesToolChoice,
AnthropicMessagesUserMessageParam,
AnthropicResponseContentBlockText,
AnthropicResponseContentBlockThinking,
AnthropicResponseContentBlockToolUse,
)
from litellm.types.llms.anthropic_messages.anthropic_response import (
AnthropicMessagesResponse,
AnthropicUsage,
)
from litellm.types.llms.openai import ResponsesAPIResponse
class LiteLLMAnthropicToResponsesAPIAdapter:
"""
Converts Anthropic /v1/messages requests to OpenAI Responses API format and
converts Responses API responses back to Anthropic format.
"""
# ------------------------------------------------------------------ #
# Request translation: Anthropic -> Responses API #
# ------------------------------------------------------------------ #
@staticmethod
def _translate_anthropic_image_source_to_url(source: dict) -> Optional[str]:
"""Convert Anthropic image source to a URL string."""
source_type = source.get("type")
if source_type == "base64":
media_type = source.get("media_type", "image/jpeg")
data = source.get("data", "")
return f"data:{media_type};base64,{data}" if data else None
elif source_type == "url":
return source.get("url")
return None
def translate_messages_to_responses_input( # noqa: PLR0915
self,
messages: List[
Union[
AnthropicMessagesUserMessageParam,
AnthopicMessagesAssistantMessageParam,
]
],
) -> List[Dict[str, Any]]:
"""
Convert Anthropic messages list to Responses API `input` items.
Mapping:
user text -> message(role=user, input_text)
user image -> message(role=user, input_image)
user tool_result -> function_call_output
assistant text -> message(role=assistant, output_text)
assistant tool_use -> function_call
"""
input_items: List[Dict[str, Any]] = []
for m in messages:
role = m["role"]
content = m.get("content")
if role == "user":
if isinstance(content, str):
input_items.append(
{
"type": "message",
"role": "user",
"content": [{"type": "input_text", "text": content}],
}
)
elif isinstance(content, list):
user_parts: List[Dict[str, Any]] = []
for block in content:
if not isinstance(block, dict):
continue
btype = block.get("type")
if btype == "text":
user_parts.append(
{"type": "input_text", "text": block.get("text", "")}
)
elif btype == "image":
url = self._translate_anthropic_image_source_to_url(
block.get("source", {})
)
if url:
user_parts.append(
{"type": "input_image", "image_url": url}
)
elif btype == "tool_result":
tool_use_id = block.get("tool_use_id", "")
inner = block.get("content")
if inner is None:
output_text = ""
elif isinstance(inner, str):
output_text = inner
elif isinstance(inner, list):
parts = [
c.get("text", "")
for c in inner
if isinstance(c, dict) and c.get("type") == "text"
]
output_text = "\n".join(parts)
else:
output_text = str(inner)
# tool_result is a top-level item, not inside the message
input_items.append(
{
"type": "function_call_output",
"call_id": tool_use_id,
"output": output_text,
}
)
if user_parts:
input_items.append(
{
"type": "message",
"role": "user",
"content": user_parts,
}
)
elif role == "assistant":
if isinstance(content, str):
input_items.append(
{
"type": "message",
"role": "assistant",
"content": [{"type": "output_text", "text": content}],
}
)
elif isinstance(content, list):
asst_parts: List[Dict[str, Any]] = []
for block in content:
if not isinstance(block, dict):
continue
btype = block.get("type")
if btype == "text":
asst_parts.append(
{"type": "output_text", "text": block.get("text", "")}
)
elif btype == "tool_use":
# tool_use becomes a top-level function_call item
input_items.append(
{
"type": "function_call",
"call_id": block.get("id", ""),
"name": block.get("name", ""),
"arguments": json.dumps(block.get("input", {})),
}
)
elif btype == "thinking":
thinking_text = block.get("thinking", "")
if thinking_text:
asst_parts.append(
{"type": "output_text", "text": thinking_text}
)
if asst_parts:
input_items.append(
{
"type": "message",
"role": "assistant",
"content": asst_parts,
}
)
return input_items
def translate_tools_to_responses_api(
self,
tools: List[AllAnthropicToolsValues],
) -> List[Dict[str, Any]]:
"""Convert Anthropic tool definitions to Responses API function tools."""
result: List[Dict[str, Any]] = []
for tool in tools:
tool_dict = cast(Dict[str, Any], tool)
tool_type = tool_dict.get("type", "")
tool_name = tool_dict.get("name", "")
# web_search tool
if (
isinstance(tool_type, str) and tool_type.startswith("web_search")
) or tool_name == "web_search":
result.append({"type": "web_search_preview"})
continue
func_tool: Dict[str, Any] = {"type": "function", "name": tool_name}
if "description" in tool_dict:
func_tool["description"] = tool_dict["description"]
if "input_schema" in tool_dict:
func_tool["parameters"] = tool_dict["input_schema"]
result.append(func_tool)
return result
@staticmethod
def translate_tool_choice_to_responses_api(
tool_choice: AnthropicMessagesToolChoice,
) -> Dict[str, Any]:
"""Convert Anthropic tool_choice to Responses API tool_choice."""
tc_type = tool_choice.get("type")
if tc_type == "any":
return {"type": "required"}
elif tc_type == "tool":
return {"type": "function", "name": tool_choice.get("name", "")}
return {"type": "auto"}
@staticmethod
def translate_context_management_to_responses_api(
context_management: Dict[str, Any],
) -> Optional[List[Dict[str, Any]]]:
"""
Convert Anthropic context_management dict to OpenAI Responses API array format.
Anthropic format: {"edits": [{"type": "compact_20260112", "trigger": {"type": "input_tokens", "value": 150000}}]}
OpenAI format: [{"type": "compaction", "compact_threshold": 150000}]
"""
if not isinstance(context_management, dict):
return None
edits = context_management.get("edits", [])
if not isinstance(edits, list):
return None
result: List[Dict[str, Any]] = []
for edit in edits:
if not isinstance(edit, dict):
continue
edit_type = edit.get("type", "")
if edit_type == "compact_20260112":
entry: Dict[str, Any] = {"type": "compaction"}
trigger = edit.get("trigger")
if isinstance(trigger, dict) and trigger.get("value") is not None:
entry["compact_threshold"] = int(trigger["value"])
result.append(entry)
return result if result else None
@staticmethod
def translate_thinking_to_reasoning(
thinking: Dict[str, Any]
) -> Optional[Dict[str, Any]]:
"""
Convert Anthropic thinking param to Responses API reasoning param.
thinking.budget_tokens maps to reasoning effort:
>= 10000 -> high, >= 5000 -> medium, >= 2000 -> low, < 2000 -> minimal
"""
if not isinstance(thinking, dict) or thinking.get("type") != "enabled":
return None
budget = thinking.get("budget_tokens", 0)
if budget >= 10000:
effort = "high"
elif budget >= 5000:
effort = "medium"
elif budget >= 2000:
effort = "low"
else:
effort = "minimal"
return {"effort": effort, "summary": "detailed"}
def translate_request(
self,
anthropic_request: AnthropicMessagesRequest,
) -> Dict[str, Any]:
"""
Translate a full Anthropic /v1/messages request dict to
litellm.responses() / litellm.aresponses() kwargs.
"""
model: str = anthropic_request["model"]
messages_list = cast(
List[
Union[
AnthropicMessagesUserMessageParam,
AnthopicMessagesAssistantMessageParam,
]
],
anthropic_request["messages"],
)
responses_kwargs: Dict[str, Any] = {
"model": model,
"input": self.translate_messages_to_responses_input(messages_list),
}
# system -> instructions
system = anthropic_request.get("system")
if system:
if isinstance(system, str):
responses_kwargs["instructions"] = system
elif isinstance(system, list):
text_parts = [
b.get("text", "")
for b in system
if isinstance(b, dict) and b.get("type") == "text"
]
responses_kwargs["instructions"] = "\n".join(filter(None, text_parts))
# max_tokens -> max_output_tokens
max_tokens = anthropic_request.get("max_tokens")
if max_tokens:
responses_kwargs["max_output_tokens"] = max_tokens
# temperature / top_p passed through
if "temperature" in anthropic_request:
responses_kwargs["temperature"] = anthropic_request["temperature"]
if "top_p" in anthropic_request:
responses_kwargs["top_p"] = anthropic_request["top_p"]
# tools
tools = anthropic_request.get("tools")
if tools:
responses_kwargs["tools"] = self.translate_tools_to_responses_api(
cast(List[AllAnthropicToolsValues], tools)
)
# tool_choice
tool_choice = anthropic_request.get("tool_choice")
if tool_choice:
responses_kwargs[
"tool_choice"
] = self.translate_tool_choice_to_responses_api(
cast(AnthropicMessagesToolChoice, tool_choice)
)
# thinking -> reasoning
thinking = anthropic_request.get("thinking")
if isinstance(thinking, dict):
reasoning = self.translate_thinking_to_reasoning(thinking)
if reasoning:
responses_kwargs["reasoning"] = reasoning
# output_format / output_config.format -> text format
# output_format: {"type": "json_schema", "schema": {...}}
# output_config: {"format": {"type": "json_schema", "schema": {...}}}
output_format: Any = anthropic_request.get("output_format")
output_config = anthropic_request.get("output_config")
if not isinstance(output_format, dict) and isinstance(output_config, dict):
output_format = output_config.get("format") # type: ignore[assignment]
if (
isinstance(output_format, dict)
and output_format.get("type") == "json_schema"
):
schema = output_format.get("schema")
if schema:
responses_kwargs["text"] = {
"format": {
"type": "json_schema",
"name": "structured_output",
"schema": schema,
"strict": True,
}
}
# context_management: Anthropic dict -> OpenAI array
context_management = anthropic_request.get("context_management")
if isinstance(context_management, dict):
openai_cm = self.translate_context_management_to_responses_api(
context_management
)
if openai_cm is not None:
responses_kwargs["context_management"] = openai_cm
# metadata user_id -> user
metadata = anthropic_request.get("metadata")
if isinstance(metadata, dict) and "user_id" in metadata:
responses_kwargs["user"] = str(metadata["user_id"])[:64]
return responses_kwargs
# ------------------------------------------------------------------ #
# Response translation: Responses API -> Anthropic #
# ------------------------------------------------------------------ #
def translate_response(
self,
response: ResponsesAPIResponse,
) -> AnthropicMessagesResponse:
"""
Translate an OpenAI ResponsesAPIResponse to AnthropicMessagesResponse.
"""
from openai.types.responses import (
ResponseFunctionToolCall,
ResponseOutputMessage,
ResponseReasoningItem,
)
from litellm.types.llms.openai import ResponseAPIUsage
content: List[Dict[str, Any]] = []
stop_reason: AnthropicFinishReason = "end_turn"
for item in response.output:
if isinstance(item, ResponseReasoningItem):
for summary in item.summary:
text = getattr(summary, "text", "")
if text:
content.append(
AnthropicResponseContentBlockThinking(
type="thinking",
thinking=text,
signature=None,
).model_dump()
)
elif isinstance(item, ResponseOutputMessage):
for part in item.content:
if getattr(part, "type", None) == "output_text":
content.append(
AnthropicResponseContentBlockText(
type="text", text=getattr(part, "text", "")
).model_dump()
)
elif isinstance(item, ResponseFunctionToolCall):
try:
input_data = json.loads(item.arguments) if item.arguments else {}
except (json.JSONDecodeError, TypeError):
input_data = {}
content.append(
AnthropicResponseContentBlockToolUse(
type="tool_use",
id=item.call_id or item.id or "",
name=item.name,
input=input_data,
).model_dump()
)
stop_reason = "tool_use"
elif isinstance(item, dict):
item_type = item.get("type")
if item_type == "message":
for part in item.get("content", []):
if isinstance(part, dict) and part.get("type") == "output_text":
content.append(
AnthropicResponseContentBlockText(
type="text", text=part.get("text", "")
).model_dump()
)
elif item_type == "function_call":
try:
input_data = json.loads(item.get("arguments", "{}"))
except (json.JSONDecodeError, TypeError):
input_data = {}
content.append(
AnthropicResponseContentBlockToolUse(
type="tool_use",
id=item.get("call_id") or item.get("id", ""),
name=item.get("name", ""),
input=input_data,
).model_dump()
)
stop_reason = "tool_use"
# status -> stop_reason override
if response.status == "incomplete":
stop_reason = "max_tokens"
# usage
raw_usage: Optional[ResponseAPIUsage] = response.usage
input_tokens = int(getattr(raw_usage, "input_tokens", 0) or 0)
output_tokens = int(getattr(raw_usage, "output_tokens", 0) or 0)
anthropic_usage = AnthropicUsage(
input_tokens=input_tokens,
output_tokens=output_tokens,
)
return AnthropicMessagesResponse(
id=response.id,
type="message",
role="assistant",
model=response.model or "unknown-model",
stop_sequence=None,
usage=anthropic_usage, # type: ignore
content=content, # type: ignore
stop_reason=stop_reason,
)

View File

@@ -0,0 +1,4 @@
from .handler import AnthropicFilesHandler
from .transformation import AnthropicFilesConfig
__all__ = ["AnthropicFilesHandler", "AnthropicFilesConfig"]

View File

@@ -0,0 +1,366 @@
import asyncio
import json
import time
from typing import Any, Coroutine, Optional, Union
import httpx
import litellm
from litellm._logging import verbose_logger
from litellm._uuid import uuid
from litellm.llms.custom_httpx.http_handler import (
get_async_httpx_client,
)
from litellm.litellm_core_utils.litellm_logging import Logging
from litellm.types.llms.openai import (
FileContentRequest,
HttpxBinaryResponseContent,
OpenAIBatchResult,
OpenAIChatCompletionResponse,
OpenAIErrorBody,
)
from litellm.types.utils import CallTypes, LlmProviders, ModelResponse
from ..chat.transformation import AnthropicConfig
from ..common_utils import AnthropicModelInfo
# Map Anthropic error types to HTTP status codes
ANTHROPIC_ERROR_STATUS_CODE_MAP = {
"invalid_request_error": 400,
"authentication_error": 401,
"permission_error": 403,
"not_found_error": 404,
"rate_limit_error": 429,
"api_error": 500,
"overloaded_error": 503,
"timeout_error": 504,
}
class AnthropicFilesHandler:
"""
Handles Anthropic Files API operations.
Currently supports:
- file_content() for retrieving Anthropic Message Batch results
"""
def __init__(self):
self.anthropic_model_info = AnthropicModelInfo()
async def afile_content(
self,
file_content_request: FileContentRequest,
api_base: Optional[str] = None,
api_key: Optional[str] = None,
timeout: Union[float, httpx.Timeout] = 600.0,
max_retries: Optional[int] = None,
) -> HttpxBinaryResponseContent:
"""
Async: Retrieve file content from Anthropic.
For batch results, the file_id should be the batch_id.
This will call Anthropic's /v1/messages/batches/{batch_id}/results endpoint.
Args:
file_content_request: Contains file_id (batch_id for batch results)
api_base: Anthropic API base URL
api_key: Anthropic API key
timeout: Request timeout
max_retries: Max retry attempts (unused for now)
Returns:
HttpxBinaryResponseContent: Binary content wrapped in compatible response format
"""
file_id = file_content_request.get("file_id")
if not file_id:
raise ValueError("file_id is required in file_content_request")
# Extract batch_id from file_id
# Handle both formats: "anthropic_batch_results:{batch_id}" or just "{batch_id}"
if file_id.startswith("anthropic_batch_results:"):
batch_id = file_id.replace("anthropic_batch_results:", "", 1)
else:
batch_id = file_id
# Get Anthropic API credentials
api_base = self.anthropic_model_info.get_api_base(api_base)
api_key = api_key or self.anthropic_model_info.get_api_key()
if not api_key:
raise ValueError("Missing Anthropic API Key")
# Construct the Anthropic batch results URL
results_url = f"{api_base.rstrip('/')}/v1/messages/batches/{batch_id}/results"
# Prepare headers
headers = {
"accept": "application/json",
"anthropic-version": "2023-06-01",
"x-api-key": api_key,
}
# Make the request to Anthropic
async_client = get_async_httpx_client(llm_provider=LlmProviders.ANTHROPIC)
anthropic_response = await async_client.get(url=results_url, headers=headers)
anthropic_response.raise_for_status()
# Transform Anthropic batch results to OpenAI format
transformed_content = self._transform_anthropic_batch_results_to_openai_format(
anthropic_response.content
)
# Create a new response with transformed content
transformed_response = httpx.Response(
status_code=anthropic_response.status_code,
headers=anthropic_response.headers,
content=transformed_content,
request=anthropic_response.request,
)
# Return the transformed response content
return HttpxBinaryResponseContent(response=transformed_response)
def file_content(
self,
_is_async: bool,
file_content_request: FileContentRequest,
api_base: Optional[str] = None,
api_key: Optional[str] = None,
timeout: Union[float, httpx.Timeout] = 600.0,
max_retries: Optional[int] = None,
) -> Union[
HttpxBinaryResponseContent, Coroutine[Any, Any, HttpxBinaryResponseContent]
]:
"""
Retrieve file content from Anthropic.
For batch results, the file_id should be the batch_id.
This will call Anthropic's /v1/messages/batches/{batch_id}/results endpoint.
Args:
_is_async: Whether to run asynchronously
file_content_request: Contains file_id (batch_id for batch results)
api_base: Anthropic API base URL
api_key: Anthropic API key
timeout: Request timeout
max_retries: Max retry attempts (unused for now)
Returns:
HttpxBinaryResponseContent or Coroutine: Binary content wrapped in compatible response format
"""
if _is_async:
return self.afile_content(
file_content_request=file_content_request,
api_base=api_base,
api_key=api_key,
max_retries=max_retries,
)
else:
return asyncio.run(
self.afile_content(
file_content_request=file_content_request,
api_base=api_base,
api_key=api_key,
timeout=timeout,
max_retries=max_retries,
)
)
def _transform_anthropic_batch_results_to_openai_format(
self, anthropic_content: bytes
) -> bytes:
"""
Transform Anthropic batch results JSONL to OpenAI batch results JSONL format.
Anthropic format:
{
"custom_id": "...",
"result": {
"type": "succeeded",
"message": { ... } // Anthropic message format
}
}
OpenAI format:
{
"custom_id": "...",
"response": {
"status_code": 200,
"request_id": "...",
"body": { ... } // OpenAI chat completion format
}
}
"""
try:
anthropic_config = AnthropicConfig()
transformed_lines = []
# Parse JSONL content
content_str = anthropic_content.decode("utf-8")
for line in content_str.strip().split("\n"):
if not line.strip():
continue
anthropic_result = json.loads(line)
custom_id = anthropic_result.get("custom_id", "")
result = anthropic_result.get("result", {})
result_type = result.get("type", "")
# Transform based on result type
if result_type == "succeeded":
# Transform Anthropic message to OpenAI format
anthropic_message = result.get("message", {})
if anthropic_message:
openai_response_body = (
self._transform_anthropic_message_to_openai_format(
anthropic_message=anthropic_message,
anthropic_config=anthropic_config,
)
)
# Create OpenAI batch result format
openai_result: OpenAIBatchResult = {
"custom_id": custom_id,
"response": {
"status_code": 200,
"request_id": anthropic_message.get("id", ""),
"body": openai_response_body,
},
}
transformed_lines.append(json.dumps(openai_result))
elif result_type == "errored":
# Handle error case
error = result.get("error", {})
error_obj = error.get("error", {})
error_message = error_obj.get("message", "Unknown error")
error_type = error_obj.get("type", "api_error")
status_code = ANTHROPIC_ERROR_STATUS_CODE_MAP.get(error_type, 500)
error_body_errored: OpenAIErrorBody = {
"error": {
"message": error_message,
"type": error_type,
}
}
openai_result_errored: OpenAIBatchResult = {
"custom_id": custom_id,
"response": {
"status_code": status_code,
"request_id": error.get("request_id", ""),
"body": error_body_errored,
},
}
transformed_lines.append(json.dumps(openai_result_errored))
elif result_type in ["canceled", "expired"]:
# Handle canceled/expired cases
error_body_canceled: OpenAIErrorBody = {
"error": {
"message": f"Batch request was {result_type}",
"type": "invalid_request_error",
}
}
openai_result_canceled: OpenAIBatchResult = {
"custom_id": custom_id,
"response": {
"status_code": 400,
"request_id": "",
"body": error_body_canceled,
},
}
transformed_lines.append(json.dumps(openai_result_canceled))
# Join lines and encode back to bytes
transformed_content = "\n".join(transformed_lines)
if transformed_lines:
transformed_content += "\n" # Add trailing newline for JSONL format
return transformed_content.encode("utf-8")
except Exception as e:
verbose_logger.error(
f"Error transforming Anthropic batch results to OpenAI format: {e}"
)
# Return original content if transformation fails
return anthropic_content
def _transform_anthropic_message_to_openai_format(
self, anthropic_message: dict, anthropic_config: AnthropicConfig
) -> OpenAIChatCompletionResponse:
"""
Transform a single Anthropic message to OpenAI chat completion format.
"""
try:
# Create a mock httpx.Response for transformation
mock_response = httpx.Response(
status_code=200,
content=json.dumps(anthropic_message).encode("utf-8"),
)
# Create a ModelResponse object
model_response = ModelResponse()
# Initialize with required fields - will be populated by transform_parsed_response
model_response.choices = [
litellm.Choices(
finish_reason="stop",
index=0,
message=litellm.Message(content="", role="assistant"),
)
] # type: ignore
# Create a logging object for transformation
logging_obj = Logging(
model=anthropic_message.get("model", "claude-3-5-sonnet-20241022"),
messages=[{"role": "user", "content": "batch_request"}],
stream=False,
call_type=CallTypes.aretrieve_batch,
start_time=time.time(),
litellm_call_id="batch_" + str(uuid.uuid4()),
function_id="batch_processing",
litellm_trace_id=str(uuid.uuid4()),
kwargs={"optional_params": {}},
)
logging_obj.optional_params = {}
# Transform using AnthropicConfig
transformed_response = anthropic_config.transform_parsed_response(
completion_response=anthropic_message,
raw_response=mock_response,
model_response=model_response,
json_mode=False,
prefix_prompt=None,
)
# Convert ModelResponse to OpenAI format dict - it's already in OpenAI format
openai_body: OpenAIChatCompletionResponse = transformed_response.model_dump(
exclude_none=True
)
# Ensure id comes from anthropic_message if not set
if not openai_body.get("id"):
openai_body["id"] = anthropic_message.get("id", "")
return openai_body
except Exception as e:
verbose_logger.error(
f"Error transforming Anthropic message to OpenAI format: {e}"
)
# Return a basic error response if transformation fails
error_response: OpenAIChatCompletionResponse = {
"id": anthropic_message.get("id", ""),
"object": "chat.completion",
"created": int(time.time()),
"model": anthropic_message.get("model", ""),
"choices": [
{
"index": 0,
"message": {"role": "assistant", "content": ""},
"finish_reason": "error",
}
],
"usage": {
"prompt_tokens": 0,
"completion_tokens": 0,
"total_tokens": 0,
},
}
return error_response

View File

@@ -0,0 +1,307 @@
"""
Anthropic Files API transformation config.
Implements BaseFilesConfig for Anthropic's Files API (beta).
Reference: https://docs.anthropic.com/en/docs/build-with-claude/files
Anthropic Files API endpoints:
- POST /v1/files - Upload a file
- GET /v1/files - List files
- GET /v1/files/{file_id} - Retrieve file metadata
- DELETE /v1/files/{file_id} - Delete a file
- GET /v1/files/{file_id}/content - Download file content
"""
import calendar
import time
from typing import Any, Dict, List, Optional, Union, cast
import httpx
from openai.types.file_deleted import FileDeleted
from litellm.litellm_core_utils.prompt_templates.common_utils import extract_file_data
from litellm.llms.base_llm.chat.transformation import BaseLLMException
from litellm.llms.base_llm.files.transformation import (
BaseFilesConfig,
LiteLLMLoggingObj,
)
from litellm.types.llms.openai import (
CreateFileRequest,
FileContentRequest,
HttpxBinaryResponseContent,
OpenAICreateFileRequestOptionalParams,
OpenAIFileObject,
)
from litellm.types.utils import LlmProviders
from ..common_utils import AnthropicError, AnthropicModelInfo
ANTHROPIC_FILES_API_BASE = "https://api.anthropic.com"
ANTHROPIC_FILES_BETA_HEADER = "files-api-2025-04-14"
class AnthropicFilesConfig(BaseFilesConfig):
"""
Transformation config for Anthropic Files API.
Anthropic uses:
- x-api-key header for authentication
- anthropic-beta: files-api-2025-04-14 header
- multipart/form-data for file uploads
- purpose="messages" (Anthropic-specific, not for batches/fine-tuning)
"""
def __init__(self):
pass
@property
def custom_llm_provider(self) -> LlmProviders:
return LlmProviders.ANTHROPIC
def get_complete_url(
self,
api_base: Optional[str],
api_key: Optional[str],
model: str,
optional_params: dict,
litellm_params: dict,
stream: Optional[bool] = None,
) -> str:
api_base = AnthropicModelInfo.get_api_base(api_base) or ANTHROPIC_FILES_API_BASE
return f"{api_base.rstrip('/')}/v1/files"
def get_error_class(
self,
error_message: str,
status_code: int,
headers: Union[dict, httpx.Headers],
) -> BaseLLMException:
return AnthropicError(
status_code=status_code,
message=error_message,
headers=cast(httpx.Headers, headers) if isinstance(headers, dict) else headers,
)
def validate_environment(
self,
headers: dict,
model: str,
messages: list,
optional_params: dict,
litellm_params: dict,
api_key: Optional[str] = None,
api_base: Optional[str] = None,
) -> dict:
api_key = AnthropicModelInfo.get_api_key(api_key)
if not api_key:
raise ValueError(
"Anthropic API key is required. Set ANTHROPIC_API_KEY environment variable or pass api_key parameter."
)
headers.update(
{
"x-api-key": api_key,
"anthropic-version": "2023-06-01",
"anthropic-beta": ANTHROPIC_FILES_BETA_HEADER,
}
)
return headers
def get_supported_openai_params(
self, model: str
) -> List[OpenAICreateFileRequestOptionalParams]:
return ["purpose"]
def map_openai_params(
self,
non_default_params: dict,
optional_params: dict,
model: str,
drop_params: bool,
) -> dict:
return optional_params
def transform_create_file_request(
self,
model: str,
create_file_data: CreateFileRequest,
optional_params: dict,
litellm_params: dict,
) -> dict:
"""
Transform to multipart form data for Anthropic file upload.
Anthropic expects: POST /v1/files with multipart form-data
- file: the file content
- purpose: "messages" (defaults to "messages" if not provided)
"""
file_data = create_file_data.get("file")
if file_data is None:
raise ValueError("File data is required")
extracted = extract_file_data(file_data)
filename = extracted["filename"] or f"file_{int(time.time())}"
content = extracted["content"]
content_type = extracted.get("content_type", "application/octet-stream")
purpose = create_file_data.get("purpose", "messages")
return {
"file": (filename, content, content_type),
"purpose": (None, purpose),
}
def transform_create_file_response(
self,
model: Optional[str],
raw_response: httpx.Response,
logging_obj: LiteLLMLoggingObj,
litellm_params: dict,
) -> OpenAIFileObject:
"""
Transform Anthropic file response to OpenAI format.
Anthropic response:
{
"id": "file-xxx",
"type": "file",
"filename": "document.pdf",
"mime_type": "application/pdf",
"size_bytes": 12345,
"created_at": "2025-01-01T00:00:00Z"
}
"""
response_json = raw_response.json()
return self._parse_anthropic_file(response_json)
def transform_retrieve_file_request(
self,
file_id: str,
optional_params: dict,
litellm_params: dict,
) -> tuple[str, dict]:
api_base = (
AnthropicModelInfo.get_api_base(litellm_params.get("api_base"))
or ANTHROPIC_FILES_API_BASE
)
return f"{api_base.rstrip('/')}/v1/files/{file_id}", {}
def transform_retrieve_file_response(
self,
raw_response: httpx.Response,
logging_obj: LiteLLMLoggingObj,
litellm_params: dict,
) -> OpenAIFileObject:
response_json = raw_response.json()
return self._parse_anthropic_file(response_json)
def transform_delete_file_request(
self,
file_id: str,
optional_params: dict,
litellm_params: dict,
) -> tuple[str, dict]:
api_base = (
AnthropicModelInfo.get_api_base(litellm_params.get("api_base"))
or ANTHROPIC_FILES_API_BASE
)
return f"{api_base.rstrip('/')}/v1/files/{file_id}", {}
def transform_delete_file_response(
self,
raw_response: httpx.Response,
logging_obj: LiteLLMLoggingObj,
litellm_params: dict,
) -> FileDeleted:
response_json = raw_response.json()
file_id = response_json.get("id", "")
return FileDeleted(
id=file_id,
deleted=True,
object="file",
)
def transform_list_files_request(
self,
purpose: Optional[str],
optional_params: dict,
litellm_params: dict,
) -> tuple[str, dict]:
api_base = (
AnthropicModelInfo.get_api_base(litellm_params.get("api_base"))
or ANTHROPIC_FILES_API_BASE
)
url = f"{api_base.rstrip('/')}/v1/files"
params: Dict[str, Any] = {}
if purpose:
params["purpose"] = purpose
return url, params
def transform_list_files_response(
self,
raw_response: httpx.Response,
logging_obj: LiteLLMLoggingObj,
litellm_params: dict,
) -> List[OpenAIFileObject]:
"""
Anthropic list response:
{
"data": [...],
"has_more": false,
"first_id": "...",
"last_id": "..."
}
"""
response_json = raw_response.json()
files_data = response_json.get("data", [])
return [self._parse_anthropic_file(f) for f in files_data]
def transform_file_content_request(
self,
file_content_request: FileContentRequest,
optional_params: dict,
litellm_params: dict,
) -> tuple[str, dict]:
file_id = file_content_request.get("file_id")
api_base = (
AnthropicModelInfo.get_api_base(litellm_params.get("api_base"))
or ANTHROPIC_FILES_API_BASE
)
return f"{api_base.rstrip('/')}/v1/files/{file_id}/content", {}
def transform_file_content_response(
self,
raw_response: httpx.Response,
logging_obj: LiteLLMLoggingObj,
litellm_params: dict,
) -> HttpxBinaryResponseContent:
return HttpxBinaryResponseContent(response=raw_response)
@staticmethod
def _parse_anthropic_file(file_data: dict) -> OpenAIFileObject:
"""Parse Anthropic file object into OpenAI format."""
created_at_str = file_data.get("created_at", "")
if created_at_str:
try:
created_at = int(
calendar.timegm(
time.strptime(
created_at_str.replace("Z", "+00:00")[:19],
"%Y-%m-%dT%H:%M:%S",
)
)
)
except (ValueError, TypeError):
created_at = int(time.time())
else:
created_at = int(time.time())
return OpenAIFileObject(
id=file_data.get("id", ""),
bytes=file_data.get("size_bytes", file_data.get("bytes", 0)),
created_at=created_at,
filename=file_data.get("filename", ""),
object="file",
purpose=file_data.get("purpose", "messages"),
status="uploaded",
status_details=None,
)

View File

@@ -0,0 +1,5 @@
"""Anthropic Skills API integration"""
from .transformation import AnthropicSkillsConfig
__all__ = ["AnthropicSkillsConfig"]

View File

@@ -0,0 +1,279 @@
# Anthropic Skills API Integration
This module provides comprehensive support for the Anthropic Skills API through LiteLLM.
## Features
The Skills API allows you to:
- **Create skills**: Define reusable AI capabilities
- **List skills**: Browse all available skills
- **Get skills**: Retrieve detailed information about a specific skill
- **Delete skills**: Remove skills that are no longer needed
## Quick Start
### Prerequisites
Set your Anthropic API key:
```python
import os
os.environ["ANTHROPIC_API_KEY"] = "your-api-key-here"
```
### Basic Usage
#### Create a Skill
```python
import litellm
# Create a skill with files
# Note: All files must be in the same top-level directory
# and must include a SKILL.md file at the root
skill = litellm.create_skill(
files=[
# List of file objects to upload
# Must include SKILL.md
],
display_title="Python Code Generator",
custom_llm_provider="anthropic"
)
print(f"Created skill: {skill.id}")
# Asynchronous version
skill = await litellm.acreate_skill(
files=[...], # Your files here
display_title="Python Code Generator",
custom_llm_provider="anthropic"
)
```
#### List Skills
```python
# List all skills
skills = litellm.list_skills(
custom_llm_provider="anthropic"
)
for skill in skills.data:
print(f"{skill.display_title}: {skill.id}")
# With pagination and filtering
skills = litellm.list_skills(
limit=20,
source="custom", # Filter by 'custom' or 'anthropic'
custom_llm_provider="anthropic"
)
# Get next page if available
if skills.has_more:
next_page = litellm.list_skills(
page=skills.next_page,
custom_llm_provider="anthropic"
)
```
#### Get a Skill
```python
skill = litellm.get_skill(
skill_id="skill_abc123",
custom_llm_provider="anthropic"
)
print(f"Skill: {skill.display_title}")
print(f"Created: {skill.created_at}")
print(f"Latest version: {skill.latest_version}")
print(f"Source: {skill.source}")
```
#### Delete a Skill
```python
result = litellm.delete_skill(
skill_id="skill_abc123",
custom_llm_provider="anthropic"
)
print(f"Deleted skill {result.id}, type: {result.type}")
```
## API Reference
### `create_skill()`
Create a new skill.
**Parameters:**
- `files` (List[Any], optional): Files to upload for the skill. All files must be in the same top-level directory and must include a SKILL.md file at the root.
- `display_title` (str, optional): Display title for the skill
- `custom_llm_provider` (str, optional): Provider name (default: "anthropic")
- `extra_headers` (dict, optional): Additional HTTP headers
- `timeout` (float, optional): Request timeout
**Returns:**
- `Skill`: The created skill object
**Async version:** `acreate_skill()`
### `list_skills()`
List all skills.
**Parameters:**
- `limit` (int, optional): Number of results to return per page (max 100, default 20)
- `page` (str, optional): Pagination token for fetching a specific page of results
- `source` (str, optional): Filter skills by source ('custom' or 'anthropic')
- `custom_llm_provider` (str, optional): Provider name (default: "anthropic")
- `extra_headers` (dict, optional): Additional HTTP headers
- `timeout` (float, optional): Request timeout
**Returns:**
- `ListSkillsResponse`: Object containing a list of skills and pagination info
**Async version:** `alist_skills()`
### `get_skill()`
Get a specific skill by ID.
**Parameters:**
- `skill_id` (str, required): The skill ID
- `custom_llm_provider` (str, optional): Provider name (default: "anthropic")
- `extra_headers` (dict, optional): Additional HTTP headers
- `timeout` (float, optional): Request timeout
**Returns:**
- `Skill`: The requested skill object
**Async version:** `aget_skill()`
### `delete_skill()`
Delete a skill.
**Parameters:**
- `skill_id` (str, required): The skill ID to delete
- `custom_llm_provider` (str, optional): Provider name (default: "anthropic")
- `extra_headers` (dict, optional): Additional HTTP headers
- `timeout` (float, optional): Request timeout
**Returns:**
- `DeleteSkillResponse`: Object with `id` and `type` fields
**Async version:** `adelete_skill()`
## Response Types
### `Skill`
Represents a skill from the Anthropic Skills API.
**Fields:**
- `id` (str): Unique identifier
- `created_at` (str): ISO 8601 timestamp
- `display_title` (str, optional): Display title
- `latest_version` (str, optional): Latest version identifier
- `source` (str): Source ("custom" or "anthropic")
- `type` (str): Object type (always "skill")
- `updated_at` (str): ISO 8601 timestamp
### `ListSkillsResponse`
Response from listing skills.
**Fields:**
- `data` (List[Skill]): List of skills
- `next_page` (str, optional): Pagination token for the next page
- `has_more` (bool): Whether more skills are available
### `DeleteSkillResponse`
Response from deleting a skill.
**Fields:**
- `id` (str): The deleted skill ID
- `type` (str): Deleted object type (always "skill_deleted")
## Architecture
The Skills API implementation follows LiteLLM's standard patterns:
1. **Type Definitions** (`litellm/types/llms/anthropic_skills.py`)
- Pydantic models for request/response types
- TypedDict definitions for request parameters
2. **Base Configuration** (`litellm/llms/base_llm/skills/transformation.py`)
- Abstract base class `BaseSkillsAPIConfig`
- Defines transformation interface for provider-specific implementations
3. **Provider Implementation** (`litellm/llms/anthropic/skills/transformation.py`)
- `AnthropicSkillsConfig` - Anthropic-specific transformations
- Handles API authentication, URL construction, and response mapping
4. **Main Handler** (`litellm/skills/main.py`)
- Public API functions (sync and async)
- Request validation and routing
- Error handling
5. **HTTP Handlers** (`litellm/llms/custom_httpx/llm_http_handler.py`)
- Low-level HTTP request/response handling
- Connection pooling and retry logic
## Beta API Support
The Skills API is in beta. The beta header (`skills-2025-10-02`) is automatically added by the Anthropic provider configuration. You can customize it if needed:
```python
skill = litellm.create_skill(
display_title="My Skill",
extra_headers={
"anthropic-beta": "skills-2025-10-02" # Or any other beta version
},
custom_llm_provider="anthropic"
)
```
The default beta version is configured in `litellm.constants.ANTHROPIC_SKILLS_API_BETA_VERSION`.
## Error Handling
All Skills API functions follow LiteLLM's standard error handling:
```python
import litellm
try:
skill = litellm.create_skill(
display_title="My Skill",
custom_llm_provider="anthropic"
)
except litellm.exceptions.AuthenticationError as e:
print(f"Authentication failed: {e}")
except litellm.exceptions.RateLimitError as e:
print(f"Rate limit exceeded: {e}")
except litellm.exceptions.APIError as e:
print(f"API error: {e}")
```
## Contributing
To add support for Skills API to a new provider:
1. Create provider-specific configuration class inheriting from `BaseSkillsAPIConfig`
2. Implement all abstract methods for request/response transformations
3. Register the config in `ProviderConfigManager.get_provider_skills_api_config()`
4. Add appropriate tests
## Related Documentation
- [Anthropic Skills API Documentation](https://platform.claude.com/docs/en/api/beta/skills/create)
- [LiteLLM Responses API](../../../responses/)
- [Provider Configuration System](../../base_llm/)
## Support
For issues or questions:
- GitHub Issues: https://github.com/BerriAI/litellm/issues
- Discord: https://discord.gg/wuPM9dRgDw

View File

@@ -0,0 +1,204 @@
"""
Anthropic Skills API configuration and transformations
"""
from typing import Any, Dict, Optional, Tuple
import httpx
from litellm._logging import verbose_logger
from litellm.llms.base_llm.skills.transformation import (
BaseSkillsAPIConfig,
LiteLLMLoggingObj,
)
from litellm.types.llms.anthropic_skills import (
CreateSkillRequest,
DeleteSkillResponse,
ListSkillsParams,
ListSkillsResponse,
Skill,
)
from litellm.types.router import GenericLiteLLMParams
from litellm.types.utils import LlmProviders
class AnthropicSkillsConfig(BaseSkillsAPIConfig):
"""Anthropic-specific Skills API configuration"""
@property
def custom_llm_provider(self) -> LlmProviders:
return LlmProviders.ANTHROPIC
def validate_environment(
self, headers: dict, litellm_params: Optional[GenericLiteLLMParams]
) -> dict:
"""Add Anthropic-specific headers"""
from litellm.llms.anthropic.common_utils import AnthropicModelInfo
# Get API key
api_key = None
if litellm_params:
api_key = litellm_params.api_key
api_key = AnthropicModelInfo.get_api_key(api_key)
if not api_key:
raise ValueError("ANTHROPIC_API_KEY is required for Skills API")
# Add required headers
headers["x-api-key"] = api_key
headers["anthropic-version"] = "2023-06-01"
# Add beta header for skills API
from litellm.constants import ANTHROPIC_SKILLS_API_BETA_VERSION
if "anthropic-beta" not in headers:
headers["anthropic-beta"] = ANTHROPIC_SKILLS_API_BETA_VERSION
elif isinstance(headers["anthropic-beta"], list):
if ANTHROPIC_SKILLS_API_BETA_VERSION not in headers["anthropic-beta"]:
headers["anthropic-beta"].append(ANTHROPIC_SKILLS_API_BETA_VERSION)
elif isinstance(headers["anthropic-beta"], str):
if ANTHROPIC_SKILLS_API_BETA_VERSION not in headers["anthropic-beta"]:
headers["anthropic-beta"] = [
headers["anthropic-beta"],
ANTHROPIC_SKILLS_API_BETA_VERSION,
]
headers["content-type"] = "application/json"
return headers
def get_complete_url(
self,
api_base: Optional[str],
endpoint: str,
skill_id: Optional[str] = None,
) -> str:
"""Get complete URL for Anthropic Skills API"""
from litellm.llms.anthropic.common_utils import AnthropicModelInfo
if api_base is None:
api_base = AnthropicModelInfo.get_api_base()
if skill_id:
return f"{api_base}/v1/skills/{skill_id}"
return f"{api_base}/v1/{endpoint}"
def transform_create_skill_request(
self,
create_request: CreateSkillRequest,
litellm_params: GenericLiteLLMParams,
headers: dict,
) -> Dict:
"""Transform create skill request for Anthropic"""
verbose_logger.debug("Transforming create skill request: %s", create_request)
# Anthropic expects the request body directly
request_body = {k: v for k, v in create_request.items() if v is not None}
return request_body
def transform_create_skill_response(
self,
raw_response: httpx.Response,
logging_obj: LiteLLMLoggingObj,
) -> Skill:
"""Transform Anthropic response to Skill object"""
response_json = raw_response.json()
verbose_logger.debug("Transforming create skill response: %s", response_json)
return Skill(**response_json)
def transform_list_skills_request(
self,
list_params: ListSkillsParams,
litellm_params: GenericLiteLLMParams,
headers: dict,
) -> Tuple[str, Dict]:
"""Transform list skills request for Anthropic"""
from litellm.llms.anthropic.common_utils import AnthropicModelInfo
api_base = AnthropicModelInfo.get_api_base(
litellm_params.api_base if litellm_params else None
)
url = self.get_complete_url(api_base=api_base, endpoint="skills")
# Build query parameters
query_params: Dict[str, Any] = {}
if "limit" in list_params and list_params["limit"]:
query_params["limit"] = list_params["limit"]
if "page" in list_params and list_params["page"]:
query_params["page"] = list_params["page"]
if "source" in list_params and list_params["source"]:
query_params["source"] = list_params["source"]
verbose_logger.debug(
"List skills request made to Anthropic Skills endpoint with params: %s",
query_params,
)
return url, query_params
def transform_list_skills_response(
self,
raw_response: httpx.Response,
logging_obj: LiteLLMLoggingObj,
) -> ListSkillsResponse:
"""Transform Anthropic response to ListSkillsResponse"""
response_json = raw_response.json()
verbose_logger.debug("Transforming list skills response: %s", response_json)
return ListSkillsResponse(**response_json)
def transform_get_skill_request(
self,
skill_id: str,
api_base: str,
litellm_params: GenericLiteLLMParams,
headers: dict,
) -> Tuple[str, Dict]:
"""Transform get skill request for Anthropic"""
url = self.get_complete_url(
api_base=api_base, endpoint="skills", skill_id=skill_id
)
verbose_logger.debug("Get skill request - URL: %s", url)
return url, headers
def transform_get_skill_response(
self,
raw_response: httpx.Response,
logging_obj: LiteLLMLoggingObj,
) -> Skill:
"""Transform Anthropic response to Skill object"""
response_json = raw_response.json()
verbose_logger.debug("Transforming get skill response: %s", response_json)
return Skill(**response_json)
def transform_delete_skill_request(
self,
skill_id: str,
api_base: str,
litellm_params: GenericLiteLLMParams,
headers: dict,
) -> Tuple[str, Dict]:
"""Transform delete skill request for Anthropic"""
url = self.get_complete_url(
api_base=api_base, endpoint="skills", skill_id=skill_id
)
verbose_logger.debug("Delete skill request - URL: %s", url)
return url, headers
def transform_delete_skill_response(
self,
raw_response: httpx.Response,
logging_obj: LiteLLMLoggingObj,
) -> DeleteSkillResponse:
"""Transform Anthropic response to DeleteSkillResponse"""
response_json = raw_response.json()
verbose_logger.debug("Transforming delete skill response: %s", response_json)
return DeleteSkillResponse(**response_json)