Files
lijiaoqiao/llm-gateway-competitors/litellm-wheel-src/litellm/llms/anthropic/chat/transformation.py

1922 lines
78 KiB
Python
Raw Normal View History

import json
import re
import time
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union, cast
import httpx
import litellm
from litellm.constants import (
ANTHROPIC_WEB_SEARCH_TOOL_MAX_USES,
DEFAULT_ANTHROPIC_CHAT_MAX_TOKENS,
DEFAULT_REASONING_EFFORT_HIGH_THINKING_BUDGET,
DEFAULT_REASONING_EFFORT_LOW_THINKING_BUDGET,
DEFAULT_REASONING_EFFORT_MEDIUM_THINKING_BUDGET,
DEFAULT_REASONING_EFFORT_MINIMAL_THINKING_BUDGET,
RESPONSE_FORMAT_TOOL_NAME,
)
from litellm.litellm_core_utils.core_helpers import map_finish_reason
from litellm.llms.base_llm.base_utils import type_to_response_format_param
from litellm.llms.base_llm.chat.transformation import BaseConfig, BaseLLMException
from litellm.types.llms.anthropic import (
ANTHROPIC_BETA_HEADER_VALUES,
ANTHROPIC_HOSTED_TOOLS,
AllAnthropicMessageValues,
AllAnthropicToolsValues,
AnthropicCodeExecutionTool,
AnthropicComputerTool,
AnthropicHostedTools,
AnthropicInputSchema,
AnthropicMcpServerTool,
AnthropicMessagesTool,
AnthropicMessagesToolChoice,
AnthropicOutputSchema,
AnthropicSystemMessageContent,
AnthropicThinkingParam,
AnthropicWebSearchTool,
AnthropicWebSearchUserLocation,
)
from litellm.types.llms.openai import (
REASONING_EFFORT,
AllMessageValues,
ChatCompletionCachedContent,
ChatCompletionRedactedThinkingBlock,
ChatCompletionSystemMessage,
ChatCompletionThinkingBlock,
ChatCompletionToolCallChunk,
ChatCompletionToolCallFunctionChunk,
ChatCompletionToolParam,
OpenAIChatCompletionFinishReason,
OpenAIMcpServerTool,
OpenAIWebSearchOptions,
)
from litellm.types.utils import (
CacheCreationTokenDetails,
CompletionTokensDetailsWrapper,
)
from litellm.types.utils import Message as LitellmMessage
from litellm.types.utils import PromptTokensDetailsWrapper, ServerToolUse
from litellm.utils import (
ModelResponse,
Usage,
add_dummy_tool,
any_assistant_message_has_thinking_blocks,
get_max_tokens,
has_tool_call_blocks,
last_assistant_with_tool_calls_has_no_thinking_blocks,
supports_reasoning,
token_counter,
)
from ..common_utils import AnthropicError, AnthropicModelInfo, process_anthropic_headers
if TYPE_CHECKING:
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
LoggingClass = LiteLLMLoggingObj
else:
LoggingClass = Any
class AnthropicConfig(AnthropicModelInfo, BaseConfig):
"""
Reference: https://docs.anthropic.com/claude/reference/messages_post
to pass metadata to anthropic, it's {"user_id": "any-relevant-information"}
"""
max_tokens: Optional[int] = None
stop_sequences: Optional[list] = None
temperature: Optional[int] = None
top_p: Optional[int] = None
top_k: Optional[int] = None
metadata: Optional[dict] = None
system: Optional[str] = None
def __init__(
self,
max_tokens: Optional[int] = None,
stop_sequences: Optional[list] = None,
temperature: Optional[int] = None,
top_p: Optional[int] = None,
top_k: Optional[int] = None,
metadata: Optional[dict] = None,
system: Optional[str] = None,
) -> None:
locals_ = locals().copy()
for key, value in locals_.items():
if key != "self" and value is not None:
setattr(self.__class__, key, value)
@property
def custom_llm_provider(self) -> Optional[str]:
return "anthropic"
@classmethod
def get_config(cls, *, model: Optional[str] = None):
config = super().get_config()
# anthropic requires a default value for max_tokens
if config.get("max_tokens") is None:
config["max_tokens"] = cls.get_max_tokens_for_model(model)
return config
@staticmethod
def get_max_tokens_for_model(model: Optional[str] = None) -> int:
"""
Get the max output tokens for a given model.
Falls back to DEFAULT_ANTHROPIC_CHAT_MAX_TOKENS (configurable via env var) if model is not found.
"""
if model is None:
return DEFAULT_ANTHROPIC_CHAT_MAX_TOKENS
try:
max_tokens = get_max_tokens(model)
if max_tokens is None:
return DEFAULT_ANTHROPIC_CHAT_MAX_TOKENS
return max_tokens
except Exception:
return DEFAULT_ANTHROPIC_CHAT_MAX_TOKENS
@staticmethod
def convert_tool_use_to_openai_format(
anthropic_tool_content: Dict[str, Any],
index: int,
) -> ChatCompletionToolCallChunk:
"""
Convert Anthropic tool_use format to OpenAI ChatCompletionToolCallChunk format.
Args:
anthropic_tool_content: Anthropic tool_use content block with format:
{"type": "tool_use", "id": "...", "name": "...", "input": {...}}
index: The index of this tool call
Returns:
ChatCompletionToolCallChunk in OpenAI format
"""
tool_call = ChatCompletionToolCallChunk(
id=anthropic_tool_content["id"],
type="function",
function=ChatCompletionToolCallFunctionChunk(
name=anthropic_tool_content["name"],
arguments=json.dumps(anthropic_tool_content["input"]),
),
index=index,
)
# Include caller information if present (for programmatic tool calling)
if "caller" in anthropic_tool_content:
tool_call["caller"] = cast(Dict[str, Any], anthropic_tool_content["caller"]) # type: ignore[typeddict-item]
return tool_call
@staticmethod
def _is_opus_4_6_model(model: str) -> bool:
"""Check if the model is specifically Claude Opus 4.6."""
model_lower = model.lower()
return any(
v in model_lower for v in ("opus-4-6", "opus_4_6", "opus-4.6", "opus_4.6")
)
def get_supported_openai_params(self, model: str):
params = [
"stream",
"stop",
"temperature",
"top_p",
"max_tokens",
"max_completion_tokens",
"tools",
"tool_choice",
"extra_headers",
"parallel_tool_calls",
"response_format",
"user",
"web_search_options",
"speed",
"context_management",
"cache_control",
]
if (
"claude-3-7-sonnet" in model
or AnthropicConfig._is_claude_4_6_model(model)
or supports_reasoning(
model=model,
custom_llm_provider=self.custom_llm_provider,
)
):
params.append("thinking")
params.append("reasoning_effort")
return params
@staticmethod
def filter_anthropic_output_schema(schema: Dict[str, Any]) -> Dict[str, Any]:
"""
Filter out unsupported fields from JSON schema for Anthropic's output_format API.
Anthropic's output_format doesn't support certain JSON schema properties:
- maxItems/minItems: Not supported for array types
- minimum/maximum: Not supported for numeric types
- minLength/maxLength: Not supported for string types
This mirrors the transformation done by the Anthropic Python SDK.
See: https://platform.claude.com/docs/en/build-with-claude/structured-outputs#how-sdk-transformation-works
The SDK approach:
1. Remove unsupported constraints from schema
2. Add constraint info to description (e.g., "Must be at least 100")
3. Validate responses against original schema
Args:
schema: The JSON schema dictionary to filter
Returns:
A new dictionary with unsupported fields removed and descriptions updated
Related issues:
- https://github.com/BerriAI/litellm/issues/19444
"""
if not isinstance(schema, dict):
return schema
# All numeric/string/array constraints not supported by Anthropic
unsupported_fields = {
"maxItems",
"minItems", # array constraints
"minimum",
"maximum", # numeric constraints
"exclusiveMinimum",
"exclusiveMaximum", # numeric constraints
"minLength",
"maxLength", # string constraints
}
# Build description additions from removed constraints
constraint_descriptions: list = []
constraint_labels = {
"minItems": "minimum number of items: {}",
"maxItems": "maximum number of items: {}",
"minimum": "minimum value: {}",
"maximum": "maximum value: {}",
"exclusiveMinimum": "exclusive minimum value: {}",
"exclusiveMaximum": "exclusive maximum value: {}",
"minLength": "minimum length: {}",
"maxLength": "maximum length: {}",
}
for field in unsupported_fields:
if field in schema:
constraint_descriptions.append(
constraint_labels[field].format(schema[field])
)
result: Dict[str, Any] = {}
# Update description with removed constraint info
if constraint_descriptions:
existing_desc = schema.get("description", "")
constraint_note = "Note: " + ", ".join(constraint_descriptions) + "."
if existing_desc:
result["description"] = existing_desc + " " + constraint_note
else:
result["description"] = constraint_note
for key, value in schema.items():
if key in unsupported_fields:
continue
if key == "description" and "description" in result:
# Already handled above
continue
if key == "properties" and isinstance(value, dict):
result[key] = {
k: AnthropicConfig.filter_anthropic_output_schema(v)
for k, v in value.items()
}
elif key == "items" and isinstance(value, dict):
result[key] = AnthropicConfig.filter_anthropic_output_schema(value)
elif key == "$defs" and isinstance(value, dict):
result[key] = {
k: AnthropicConfig.filter_anthropic_output_schema(v)
for k, v in value.items()
}
elif key == "anyOf" and isinstance(value, list):
result[key] = [
AnthropicConfig.filter_anthropic_output_schema(item)
for item in value
]
elif key == "allOf" and isinstance(value, list):
result[key] = [
AnthropicConfig.filter_anthropic_output_schema(item)
for item in value
]
elif key == "oneOf" and isinstance(value, list):
result[key] = [
AnthropicConfig.filter_anthropic_output_schema(item)
for item in value
]
else:
result[key] = value
# Anthropic requires additionalProperties=false for object schemas
# See: https://docs.anthropic.com/en/docs/build-with-claude/structured-outputs
if result.get("type") == "object" and "additionalProperties" not in result:
result["additionalProperties"] = False
return result
def get_json_schema_from_pydantic_object(
self, response_format: Union[Any, Dict, None]
) -> Optional[dict]:
return type_to_response_format_param(
response_format, ref_template="/$defs/{model}"
) # Relevant issue: https://github.com/BerriAI/litellm/issues/7755
def get_cache_control_headers(self) -> dict:
# Anthropic no longer requires the prompt-caching beta header
# Prompt caching now works automatically when cache_control is used in messages
# Reference: https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching
return {
"anthropic-version": "2023-06-01",
}
def _map_tool_choice(
self, tool_choice: Optional[str], parallel_tool_use: Optional[bool]
) -> Optional[AnthropicMessagesToolChoice]:
_tool_choice: Optional[AnthropicMessagesToolChoice] = None
if tool_choice == "auto":
_tool_choice = AnthropicMessagesToolChoice(
type="auto",
)
elif tool_choice == "required":
_tool_choice = AnthropicMessagesToolChoice(type="any")
elif tool_choice == "none":
_tool_choice = AnthropicMessagesToolChoice(type="none")
elif isinstance(tool_choice, dict):
if "type" in tool_choice and "function" not in tool_choice:
tool_type = tool_choice.get("type")
if tool_type == "auto":
_tool_choice = AnthropicMessagesToolChoice(type="auto")
elif tool_type == "required" or tool_type == "any":
_tool_choice = AnthropicMessagesToolChoice(type="any")
elif tool_type == "none":
_tool_choice = AnthropicMessagesToolChoice(type="none")
else:
_tool_name = tool_choice.get("function", {}).get("name")
if _tool_name is not None:
_tool_choice = AnthropicMessagesToolChoice(type="tool")
_tool_choice["name"] = _tool_name
if parallel_tool_use is not None:
# Anthropic uses 'disable_parallel_tool_use' flag to determine if parallel tool use is allowed
# this is the inverse of the openai flag.
if tool_choice == "none":
pass
elif _tool_choice is not None:
_tool_choice["disable_parallel_tool_use"] = not parallel_tool_use
else: # use anthropic defaults and make sure to send the disable_parallel_tool_use flag
_tool_choice = AnthropicMessagesToolChoice(
type="auto",
disable_parallel_tool_use=not parallel_tool_use,
)
return _tool_choice
def _map_tool_helper( # noqa: PLR0915
self, tool: ChatCompletionToolParam
) -> Tuple[Optional[AllAnthropicToolsValues], Optional[AnthropicMcpServerTool]]:
returned_tool: Optional[AllAnthropicToolsValues] = None
mcp_server: Optional[AnthropicMcpServerTool] = None
if tool["type"] == "function" or tool["type"] == "custom":
_input_schema: dict = tool["function"].get(
"parameters",
{
"type": "object",
"properties": {},
},
)
# Anthropic requires input_schema.type to be "object". Normalize
# schemas from external sources (MCP servers, OpenAI callers) that
# may omit the type field or use a non-object type.
if _input_schema.get("type") != "object":
litellm.verbose_logger.debug(
"_map_tool_helper: coercing input_schema type from %r to "
"'object' for Anthropic compatibility (tool: %s)",
_input_schema.get("type"),
tool["function"].get("name"),
)
_input_schema = dict(_input_schema) # avoid mutating caller's dict
_input_schema["type"] = "object"
if "properties" not in _input_schema:
_input_schema["properties"] = {}
_allowed_properties = set(AnthropicInputSchema.__annotations__.keys())
input_schema_filtered = {
k: v for k, v in _input_schema.items() if k in _allowed_properties
}
input_anthropic_schema: AnthropicInputSchema = AnthropicInputSchema(
**input_schema_filtered
)
_tool = AnthropicMessagesTool(
name=tool["function"]["name"],
input_schema=input_anthropic_schema,
)
_description = tool["function"].get("description")
if _description is not None:
_tool["description"] = _description
returned_tool = _tool
elif tool["type"].startswith("computer_"):
## check if all required 'display_' params are given
if "parameters" not in tool["function"]:
raise ValueError("Missing required parameter: parameters")
_display_width_px: Optional[int] = tool["function"]["parameters"].get(
"display_width_px"
)
_display_height_px: Optional[int] = tool["function"]["parameters"].get(
"display_height_px"
)
if _display_width_px is None or _display_height_px is None:
raise ValueError(
"Missing required parameter: display_width_px or display_height_px"
)
_computer_tool = AnthropicComputerTool(
type=tool["type"],
name=tool["function"].get("name", "computer"),
display_width_px=_display_width_px,
display_height_px=_display_height_px,
)
_display_number = tool["function"]["parameters"].get("display_number")
if _display_number is not None:
_computer_tool["display_number"] = _display_number
returned_tool = _computer_tool
elif any(tool["type"].startswith(t) for t in ANTHROPIC_HOSTED_TOOLS):
function_name_obj = tool.get("name", tool.get("function", {}).get("name"))
if function_name_obj is None or not isinstance(function_name_obj, str):
raise ValueError("Missing required parameter: name")
function_name = function_name_obj
additional_tool_params = {}
for k, v in tool.items():
if k != "type" and k != "name":
additional_tool_params[k] = v
returned_tool = AnthropicHostedTools(
type=tool["type"], name=function_name, **additional_tool_params # type: ignore
)
elif tool["type"] == "url": # mcp server tool
mcp_server = AnthropicMcpServerTool(**tool) # type: ignore
elif tool["type"] == "mcp":
mcp_server = self._map_openai_mcp_server_tool(
cast(OpenAIMcpServerTool, tool)
)
elif tool["type"] == "tool_search_tool_regex_20251119":
# Tool search tool using regex
from litellm.types.llms.anthropic import AnthropicToolSearchToolRegex
tool_name_obj = tool.get("name", "tool_search_tool_regex")
if not isinstance(tool_name_obj, str):
raise ValueError("Tool search tool must have a valid name")
tool_name = tool_name_obj
returned_tool = AnthropicToolSearchToolRegex(
type="tool_search_tool_regex_20251119",
name=tool_name,
)
elif tool["type"] == "tool_search_tool_bm25_20251119":
# Tool search tool using BM25
from litellm.types.llms.anthropic import AnthropicToolSearchToolBM25
tool_name_obj = tool.get("name", "tool_search_tool_bm25")
if not isinstance(tool_name_obj, str):
raise ValueError("Tool search tool must have a valid name")
tool_name = tool_name_obj
returned_tool = AnthropicToolSearchToolBM25(
type="tool_search_tool_bm25_20251119",
name=tool_name,
)
if returned_tool is None and mcp_server is None:
raise ValueError(f"Unsupported tool type: {tool['type']}")
## check if cache_control is set in the tool
_cache_control = tool.get("cache_control", None)
_cache_control_function = tool.get("function", {}).get("cache_control", None)
if returned_tool is not None:
# Only set cache_control on tools that support it (not tool search tools)
tool_type = returned_tool.get("type", "")
if tool_type not in (
"tool_search_tool_regex_20251119",
"tool_search_tool_bm25_20251119",
):
if _cache_control is not None:
returned_tool["cache_control"] = _cache_control # type: ignore[typeddict-item]
elif _cache_control_function is not None and isinstance(
_cache_control_function, dict
):
returned_tool["cache_control"] = ChatCompletionCachedContent( # type: ignore[typeddict-item]
**_cache_control_function # type: ignore
)
## check if defer_loading is set in the tool
_defer_loading = tool.get("defer_loading", None)
_defer_loading_function = tool.get("function", {}).get("defer_loading", None)
if returned_tool is not None:
# Only set defer_loading on tools that support it (not tool search tools or computer tools)
tool_type = returned_tool.get("type", "")
if tool_type not in (
"tool_search_tool_regex_20251119",
"tool_search_tool_bm25_20251119",
"computer_20241022",
"computer_20250124",
):
if _defer_loading is not None:
if not isinstance(_defer_loading, bool):
raise ValueError("defer_loading must be a boolean")
returned_tool["defer_loading"] = _defer_loading # type: ignore[typeddict-item]
elif _defer_loading_function is not None:
if not isinstance(_defer_loading_function, bool):
raise ValueError("defer_loading must be a boolean")
returned_tool["defer_loading"] = _defer_loading_function # type: ignore[typeddict-item]
## check if allowed_callers is set in the tool
_allowed_callers = tool.get("allowed_callers", None)
_allowed_callers_function = tool.get("function", {}).get(
"allowed_callers", None
)
if returned_tool is not None:
# Only set allowed_callers on tools that support it (not tool search tools or computer tools)
tool_type = returned_tool.get("type", "")
if tool_type not in (
"tool_search_tool_regex_20251119",
"tool_search_tool_bm25_20251119",
"computer_20241022",
"computer_20250124",
):
if _allowed_callers is not None:
if not isinstance(_allowed_callers, list) or not all(
isinstance(item, str) for item in _allowed_callers
):
raise ValueError("allowed_callers must be a list of strings")
returned_tool["allowed_callers"] = _allowed_callers # type: ignore[typeddict-item]
elif _allowed_callers_function is not None:
if not isinstance(_allowed_callers_function, list) or not all(
isinstance(item, str) for item in _allowed_callers_function
):
raise ValueError("allowed_callers must be a list of strings")
returned_tool["allowed_callers"] = _allowed_callers_function # type: ignore[typeddict-item]
## check if input_examples is set in the tool
_input_examples = tool.get("input_examples", None)
_input_examples_function = tool.get("function", {}).get("input_examples", None)
if returned_tool is not None:
# Only set input_examples on user-defined tools (type "custom" or no type)
tool_type = returned_tool.get("type", "")
if tool_type == "custom" or (tool_type == "" and "name" in returned_tool):
if _input_examples is not None and isinstance(_input_examples, list):
returned_tool["input_examples"] = _input_examples # type: ignore[typeddict-item]
elif _input_examples_function is not None and isinstance(
_input_examples_function, list
):
returned_tool["input_examples"] = _input_examples_function # type: ignore[typeddict-item]
return returned_tool, mcp_server
def _map_openai_mcp_server_tool(
self, tool: OpenAIMcpServerTool
) -> AnthropicMcpServerTool:
from litellm.types.llms.anthropic import AnthropicMcpServerToolConfiguration
allowed_tools = tool.get("allowed_tools", None)
tool_configuration: Optional[AnthropicMcpServerToolConfiguration] = None
if allowed_tools is not None:
tool_configuration = AnthropicMcpServerToolConfiguration(
allowed_tools=tool.get("allowed_tools", None),
)
headers = tool.get("headers", {})
authorization_token: Optional[str] = None
if headers is not None:
bearer_token = headers.get("Authorization", None)
if bearer_token is not None:
authorization_token = bearer_token.replace("Bearer ", "")
initial_tool = AnthropicMcpServerTool(
type="url",
url=tool["server_url"],
name=tool["server_label"],
)
if tool_configuration is not None:
initial_tool["tool_configuration"] = tool_configuration
if authorization_token is not None:
initial_tool["authorization_token"] = authorization_token
return initial_tool
def _map_tools(
self, tools: List
) -> Tuple[List[AllAnthropicToolsValues], List[AnthropicMcpServerTool]]:
anthropic_tools = []
mcp_servers = []
for tool in tools:
if "input_schema" in tool: # assume in anthropic format
anthropic_tools.append(tool)
else: # assume openai tool call
new_tool, mcp_server_tool = self._map_tool_helper(tool)
if new_tool is not None:
anthropic_tools.append(new_tool)
if mcp_server_tool is not None:
mcp_servers.append(mcp_server_tool)
return anthropic_tools, mcp_servers
def _detect_tool_search_tools(self, tools: Optional[List]) -> bool:
"""Check if tool search tools are present in the tools list."""
if not tools:
return False
for tool in tools:
tool_type = tool.get("type", "")
if tool_type in [
"tool_search_tool_regex_20251119",
"tool_search_tool_bm25_20251119",
]:
return True
return False
def _separate_deferred_tools(self, tools: List) -> Tuple[List, List]:
"""
Separate tools into deferred and non-deferred lists.
Returns:
Tuple of (non_deferred_tools, deferred_tools)
"""
non_deferred = []
deferred = []
for tool in tools:
if tool.get("defer_loading", False):
deferred.append(tool)
else:
non_deferred.append(tool)
return non_deferred, deferred
def _expand_tool_references(
self,
content: List,
deferred_tools: List,
) -> List:
"""
Expand tool_reference blocks to full tool definitions.
When Anthropic's tool search returns results, it includes tool_reference blocks
that reference tools by name. This method expands those references to full
tool definitions from the deferred_tools catalog.
Args:
content: Response content that may contain tool_reference blocks
deferred_tools: List of deferred tools that can be referenced
Returns:
Content with tool_reference blocks expanded to full tool definitions
"""
if not deferred_tools:
return content
# Create a mapping of tool names to tool definitions
tool_map = {}
for tool in deferred_tools:
tool_name = tool.get("name") or tool.get("function", {}).get("name")
if tool_name:
tool_map[tool_name] = tool
# Expand tool references in content
expanded_content = []
for item in content:
if isinstance(item, dict) and item.get("type") == "tool_reference":
tool_name = item.get("tool_name")
if tool_name and tool_name in tool_map:
# Replace reference with full tool definition
expanded_content.append(tool_map[tool_name])
else:
# Keep the reference if we can't find the tool
expanded_content.append(item)
else:
expanded_content.append(item)
return expanded_content
def _map_stop_sequences(
self, stop: Optional[Union[str, List[str]]]
) -> Optional[List[str]]:
new_stop: Optional[List[str]] = None
if isinstance(stop, str):
if (
stop.isspace() and litellm.drop_params is True
): # anthropic doesn't allow whitespace characters as stop-sequences
return new_stop
new_stop = [stop]
elif isinstance(stop, list):
new_v = []
for v in stop:
if (
v.isspace() and litellm.drop_params is True
): # anthropic doesn't allow whitespace characters as stop-sequences
continue
new_v.append(v)
if len(new_v) > 0:
new_stop = new_v
return new_stop
@staticmethod
def _map_reasoning_effort(
reasoning_effort: Optional[Union[REASONING_EFFORT, str]],
model: str,
) -> Optional[AnthropicThinkingParam]:
if reasoning_effort is None or reasoning_effort == "none":
return None
if AnthropicConfig._is_claude_4_6_model(model):
return AnthropicThinkingParam(
type="adaptive",
)
elif reasoning_effort == "low":
return AnthropicThinkingParam(
type="enabled",
budget_tokens=DEFAULT_REASONING_EFFORT_LOW_THINKING_BUDGET,
)
elif reasoning_effort == "medium":
return AnthropicThinkingParam(
type="enabled",
budget_tokens=DEFAULT_REASONING_EFFORT_MEDIUM_THINKING_BUDGET,
)
elif reasoning_effort == "high":
return AnthropicThinkingParam(
type="enabled",
budget_tokens=DEFAULT_REASONING_EFFORT_HIGH_THINKING_BUDGET,
)
elif reasoning_effort == "minimal":
return AnthropicThinkingParam(
type="enabled",
budget_tokens=DEFAULT_REASONING_EFFORT_MINIMAL_THINKING_BUDGET,
)
else:
raise ValueError(f"Unmapped reasoning effort: {reasoning_effort}")
def _extract_json_schema_from_response_format(
self, value: Optional[dict]
) -> Optional[dict]:
if value is None:
return None
json_schema: Optional[dict] = None
if "response_schema" in value:
json_schema = value["response_schema"]
elif "json_schema" in value:
json_schema = value["json_schema"]["schema"]
return json_schema
def map_response_format_to_anthropic_output_format(
self, value: Optional[dict]
) -> Optional[AnthropicOutputSchema]:
json_schema: Optional[dict] = self._extract_json_schema_from_response_format(
value
)
if json_schema is None:
return None
# Resolve $ref/$defs before filtering — Anthropic doesn't support
# external schema references (e.g., /$defs/CalendarEvent).
import copy
from litellm.litellm_core_utils.prompt_templates.common_utils import (
unpack_defs,
)
json_schema = copy.deepcopy(json_schema)
defs = json_schema.pop("$defs", json_schema.pop("definitions", {}))
if defs:
unpack_defs(json_schema, defs)
# Filter out unsupported fields for Anthropic's output_format API
filtered_schema = self.filter_anthropic_output_schema(json_schema)
return AnthropicOutputSchema(
type="json_schema",
schema=filtered_schema,
)
def map_response_format_to_anthropic_tool(
self, value: Optional[dict], optional_params: dict, is_thinking_enabled: bool
) -> Optional[AnthropicMessagesTool]:
ignore_response_format_types = ["text"]
if (
value is None or value["type"] in ignore_response_format_types
): # value is a no-op
return None
json_schema: Optional[dict] = self._extract_json_schema_from_response_format(
value
)
if json_schema is None:
return None
"""
When using tools in this way: - https://docs.anthropic.com/en/docs/build-with-claude/tool-use#json-mode
- You usually want to provide a single tool
- You should set tool_choice (see Forcing tool use) to instruct the model to explicitly use that tool
- Remember that the model will pass the input to the tool, so the name of the tool and description should be from the models perspective.
"""
_tool = self._create_json_tool_call_for_response_format(
json_schema=json_schema,
)
return _tool
def map_web_search_tool(
self,
value: OpenAIWebSearchOptions,
) -> AnthropicWebSearchTool:
value_typed = cast(OpenAIWebSearchOptions, value)
hosted_web_search_tool = AnthropicWebSearchTool(
type="web_search_20250305",
name="web_search",
)
user_location = value_typed.get("user_location")
if user_location is not None:
anthropic_user_location = AnthropicWebSearchUserLocation(type="approximate")
anthropic_user_location_keys = (
AnthropicWebSearchUserLocation.__annotations__.keys()
)
user_location_approximate = user_location.get("approximate")
if user_location_approximate is not None:
for key, user_location_value in user_location_approximate.items():
if key in anthropic_user_location_keys and key != "type":
anthropic_user_location[key] = user_location_value # type: ignore
hosted_web_search_tool["user_location"] = anthropic_user_location
## MAP SEARCH CONTEXT SIZE
search_context_size = value_typed.get("search_context_size")
if search_context_size is not None:
hosted_web_search_tool["max_uses"] = ANTHROPIC_WEB_SEARCH_TOOL_MAX_USES[
search_context_size
]
return hosted_web_search_tool
@staticmethod
def map_openai_context_management_to_anthropic(
context_management: Union[List[Dict[str, Any]], Dict[str, Any]],
) -> Optional[Dict[str, Any]]:
"""
OpenAI format: [{"type": "compaction", "compact_threshold": 200000}]
Anthropic format: {
"edits": [
{
"type": "compact_20260112",
"trigger": {"type": "input_tokens", "value": 150000}
}
]
}
Args:
context_management: OpenAI or Anthropic context_management parameter
Returns:
Anthropic-formatted context_management dict, or None if invalid
"""
# If already in Anthropic format (dict with 'edits'), pass through
if isinstance(context_management, dict) and "edits" in context_management:
return context_management
# If in OpenAI format (list), transform to Anthropic format
if isinstance(context_management, list):
anthropic_edits = []
for entry in context_management:
if not isinstance(entry, dict):
continue
entry_type = entry.get("type")
if entry_type == "compaction":
anthropic_edit: Dict[str, Any] = {"type": "compact_20260112"}
compact_threshold = entry.get("compact_threshold")
# Rewrite to 'trigger' with correct nesting if threshold exists
if compact_threshold is not None and isinstance(
compact_threshold, (int, float)
):
anthropic_edit["trigger"] = {
"type": "input_tokens",
"value": int(compact_threshold),
}
# Map any other keys by passthrough except handled ones
for k in entry:
if k not in {
"type",
"compact_threshold",
}: # only passthrough other keys
anthropic_edit[k] = entry[k]
anthropic_edits.append(anthropic_edit)
if anthropic_edits:
return {"edits": anthropic_edits}
return None
def map_openai_params( # noqa: PLR0915
self,
non_default_params: dict,
optional_params: dict,
model: str,
drop_params: bool,
) -> dict:
is_thinking_enabled = self.is_thinking_enabled(
non_default_params=non_default_params
)
for param, value in non_default_params.items():
if param == "max_tokens":
optional_params["max_tokens"] = (
value if isinstance(value, int) else max(1, int(round(value)))
)
elif param == "max_completion_tokens":
optional_params["max_tokens"] = (
value if isinstance(value, int) else max(1, int(round(value)))
)
elif param == "tools":
# check if optional params already has tools
anthropic_tools, mcp_servers = self._map_tools(value)
optional_params = self._add_tools_to_optional_params(
optional_params=optional_params, tools=anthropic_tools
)
if mcp_servers:
optional_params["mcp_servers"] = mcp_servers
elif param == "tool_choice" or param == "parallel_tool_calls":
_tool_choice: Optional[
AnthropicMessagesToolChoice
] = self._map_tool_choice(
tool_choice=non_default_params.get("tool_choice"),
parallel_tool_use=non_default_params.get("parallel_tool_calls"),
)
if _tool_choice is not None:
optional_params["tool_choice"] = _tool_choice
elif param == "stream" and value is True:
optional_params["stream"] = value
elif param == "stop" and (
isinstance(value, str) or isinstance(value, list)
):
_value = self._map_stop_sequences(value)
if _value is not None:
optional_params["stop_sequences"] = _value
elif param == "temperature":
optional_params["temperature"] = value
elif param == "top_p":
optional_params["top_p"] = value
elif param == "response_format" and isinstance(value, dict):
if any(
substring in model
for substring in {
"sonnet-4.5",
"sonnet-4-5",
"opus-4.1",
"opus-4-1",
"opus-4.5",
"opus-4-5",
"opus-4.6",
"opus-4-6",
"sonnet-4.6",
"sonnet-4-6",
"sonnet_4.6",
"sonnet_4_6",
}
):
_output_format = (
self.map_response_format_to_anthropic_output_format(value)
)
if _output_format is not None:
optional_params["output_format"] = _output_format
else:
_tool = self.map_response_format_to_anthropic_tool(
value, optional_params, is_thinking_enabled
)
if _tool is None:
continue
if not is_thinking_enabled:
_tool_choice = {
"name": RESPONSE_FORMAT_TOOL_NAME,
"type": "tool",
}
optional_params["tool_choice"] = _tool_choice
optional_params = self._add_tools_to_optional_params(
optional_params=optional_params, tools=[_tool]
)
optional_params["json_mode"] = True
elif (
param == "user"
and value is not None
and isinstance(value, str)
and _valid_user_id(value) # anthropic fails on emails
):
optional_params["metadata"] = {"user_id": value}
elif param == "thinking":
optional_params["thinking"] = value
elif param == "reasoning_effort" and isinstance(value, str):
optional_params["thinking"] = AnthropicConfig._map_reasoning_effort(
reasoning_effort=value, model=model
)
# For Claude 4.6 models, effort is controlled via output_config,
# not thinking budget_tokens. Map reasoning_effort to output_config.
if AnthropicConfig._is_claude_4_6_model(model):
effort_map = {
"low": "low",
"minimal": "low",
"medium": "medium",
"high": "high",
"max": "max",
}
mapped_effort = effort_map.get(value, value)
optional_params["output_config"] = {"effort": mapped_effort}
elif param == "web_search_options" and isinstance(value, dict):
hosted_web_search_tool = self.map_web_search_tool(
cast(OpenAIWebSearchOptions, value)
)
self._add_tools_to_optional_params(
optional_params=optional_params, tools=[hosted_web_search_tool]
)
elif param == "extra_headers":
optional_params["extra_headers"] = value
elif param == "context_management":
# Supports both OpenAI list format and Anthropic dict format
if isinstance(value, (list, dict)):
anthropic_context_management = (
self.map_openai_context_management_to_anthropic(value)
)
if anthropic_context_management is not None:
optional_params[
"context_management"
] = anthropic_context_management
elif param == "speed" and isinstance(value, str):
# Pass through Anthropic-specific speed parameter for fast mode
optional_params["speed"] = value
elif param == "cache_control" and isinstance(value, dict):
# Pass through top-level cache_control for automatic prompt caching
optional_params["cache_control"] = value
## handle thinking tokens
self.update_optional_params_with_thinking_tokens(
non_default_params=non_default_params, optional_params=optional_params
)
return optional_params
def _create_json_tool_call_for_response_format(
self,
json_schema: Optional[dict] = None,
) -> AnthropicMessagesTool:
"""
Handles creating a tool call for getting responses in JSON format.
Args:
json_schema (Optional[dict]): The JSON schema the response should be in
Returns:
AnthropicMessagesTool: The tool call to send to Anthropic API to get responses in JSON format
"""
_input_schema: AnthropicInputSchema = AnthropicInputSchema(
type="object",
)
if json_schema is None:
# Anthropic raises a 400 BadRequest error if properties is passed as None
# see usage with additionalProperties (Example 5) https://github.com/anthropics/anthropic-cookbook/blob/main/tool_use/extracting_structured_json.ipynb
_input_schema["additionalProperties"] = True
_input_schema["properties"] = {}
else:
_input_schema.update(cast(AnthropicInputSchema, json_schema))
_tool = AnthropicMessagesTool(
name=RESPONSE_FORMAT_TOOL_NAME, input_schema=_input_schema
)
return _tool
def translate_system_message(
self, messages: List[AllMessageValues]
) -> List[AnthropicSystemMessageContent]:
"""
Translate system message to anthropic format.
Removes system message from the original list and returns a new list of anthropic system message content.
Filters out system messages containing x-anthropic-billing-header metadata.
"""
system_prompt_indices = []
anthropic_system_message_list: List[AnthropicSystemMessageContent] = []
for idx, message in enumerate(messages):
if message["role"] == "system":
system_prompt_indices.append(idx)
system_message_block = ChatCompletionSystemMessage(**message)
if isinstance(system_message_block["content"], str):
# Skip empty text blocks - Anthropic API raises errors for empty text
if not system_message_block["content"]:
continue
# Skip system messages containing x-anthropic-billing-header metadata
if system_message_block["content"].startswith(
"x-anthropic-billing-header:"
):
continue
anthropic_system_message_content = AnthropicSystemMessageContent(
type="text",
text=system_message_block["content"],
)
if "cache_control" in system_message_block:
anthropic_system_message_content[
"cache_control"
] = system_message_block["cache_control"]
anthropic_system_message_list.append(
anthropic_system_message_content
)
elif isinstance(message["content"], list):
for _content in message["content"]:
# Skip empty text blocks - Anthropic API raises errors for empty text
text_value = _content.get("text")
if _content.get("type") == "text" and not text_value:
continue
# Skip system messages containing x-anthropic-billing-header metadata
if (
_content.get("type") == "text"
and text_value
and text_value.startswith("x-anthropic-billing-header:")
):
continue
anthropic_system_message_content = (
AnthropicSystemMessageContent(
type=_content.get("type"),
text=text_value,
)
)
if "cache_control" in _content:
anthropic_system_message_content[
"cache_control"
] = _content["cache_control"]
anthropic_system_message_list.append(
anthropic_system_message_content
)
if len(system_prompt_indices) > 0:
for idx in reversed(system_prompt_indices):
messages.pop(idx)
return anthropic_system_message_list
def add_code_execution_tool(
self,
messages: List[AllAnthropicMessageValues],
tools: List[Union[AllAnthropicToolsValues, Dict]],
) -> List[Union[AllAnthropicToolsValues, Dict]]:
"""if 'container_upload' in messages, add code_execution tool"""
add_code_execution_tool = False
for message in messages:
message_content = message.get("content", None)
if message_content and isinstance(message_content, list):
for content in message_content:
content_type = content.get("type", None)
if content_type == "container_upload":
add_code_execution_tool = True
break
if add_code_execution_tool:
## check if code_execution tool is already in tools
for tool in tools:
tool_type = tool.get("type", None)
if (
tool_type
and isinstance(tool_type, str)
and tool_type.startswith("code_execution")
):
return tools
tools.append(
AnthropicCodeExecutionTool(
name="code_execution",
type="code_execution_20250522",
)
)
return tools
def _ensure_beta_header(self, headers: dict, beta_value: str) -> None:
"""
Ensure a beta header value is present in the anthropic-beta header.
Merges with existing values instead of overriding them.
Args:
headers: Dictionary of headers to update
beta_value: The beta header value to add
"""
existing_beta = headers.get("anthropic-beta")
if existing_beta is None:
headers["anthropic-beta"] = beta_value
return
existing_values = [beta.strip() for beta in existing_beta.split(",")]
if beta_value not in existing_values:
headers["anthropic-beta"] = f"{existing_beta}, {beta_value}"
def _ensure_context_management_beta_header(
self, headers: dict, context_management: object
) -> None:
"""
Add appropriate beta headers based on context_management edits.
"""
edits = []
# If anthropic format (dict with "edits" key)
if isinstance(context_management, dict) and "edits" in context_management:
edits = context_management.get("edits", [])
# If OpenAI format: list of context management entries
elif isinstance(context_management, list):
edits = context_management
# Defensive: ignore/fallback if context_management not valid
else:
return
has_compact = False
has_other = False
for edit in edits:
edit_type = edit.get("type", "")
if edit_type == "compact_20260112" or edit_type == "compaction":
has_compact = True
else:
has_other = True
# Add compact header if any compact edits/entries exist
if has_compact:
self._ensure_beta_header(
headers, ANTHROPIC_BETA_HEADER_VALUES.COMPACT_2026_01_12.value
)
# Add context management header if any other edits/entries exist
if has_other:
self._ensure_beta_header(
headers,
ANTHROPIC_BETA_HEADER_VALUES.CONTEXT_MANAGEMENT_2025_06_27.value,
)
def update_headers_with_optional_anthropic_beta(
self, headers: dict, optional_params: dict
) -> dict:
"""Update headers with optional anthropic beta."""
# Skip adding beta headers for Vertex requests
# Vertex AI handles these headers differently
is_vertex_request = optional_params.get("is_vertex_request", False)
if is_vertex_request:
return headers
_tools = optional_params.get("tools", [])
for tool in _tools:
if tool.get("type", None) and tool.get("type").startswith(
ANTHROPIC_HOSTED_TOOLS.WEB_FETCH.value
):
self._ensure_beta_header(
headers, ANTHROPIC_BETA_HEADER_VALUES.WEB_FETCH_2025_09_10.value
)
elif tool.get("type", None) and tool.get("type").startswith(
ANTHROPIC_HOSTED_TOOLS.MEMORY.value
):
self._ensure_beta_header(
headers,
ANTHROPIC_BETA_HEADER_VALUES.CONTEXT_MANAGEMENT_2025_06_27.value,
)
if optional_params.get("context_management") is not None:
self._ensure_context_management_beta_header(
headers, optional_params["context_management"]
)
if optional_params.get("output_format") is not None:
self._ensure_beta_header(
headers, ANTHROPIC_BETA_HEADER_VALUES.STRUCTURED_OUTPUT_2025_09_25.value
)
if optional_params.get("speed") == "fast":
self._ensure_beta_header(
headers, ANTHROPIC_BETA_HEADER_VALUES.FAST_MODE_2026_02_01.value
)
return headers
def transform_request(
self,
model: str,
messages: List[AllMessageValues],
optional_params: dict,
litellm_params: dict,
headers: dict,
) -> dict:
"""
Translate messages to anthropic format.
"""
## VALIDATE REQUEST
"""
Anthropic doesn't support tool calling without `tools=` param specified.
"""
from litellm.litellm_core_utils.prompt_templates.factory import (
anthropic_messages_pt,
)
if (
"tools" not in optional_params
and messages is not None
and has_tool_call_blocks(messages)
):
if litellm.modify_params:
optional_params["tools"], _ = self._map_tools(
add_dummy_tool(custom_llm_provider="anthropic")
)
else:
raise litellm.UnsupportedParamsError(
message="Anthropic doesn't support tool calling without `tools=` param specified. Pass `tools=` param OR set `litellm.modify_params = True` // `litellm_settings::modify_params: True` to add dummy tool to the request.",
model="",
llm_provider="anthropic",
)
# Drop thinking param if thinking is enabled but thinking_blocks are missing
# This prevents the error: "Expected thinking or redacted_thinking, but found tool_use"
#
# IMPORTANT: Only drop thinking if NO assistant messages have thinking_blocks.
# If any message has thinking_blocks, we must keep thinking enabled, otherwise
# Anthropic errors with: "When thinking is disabled, an assistant message cannot contain thinking"
# Related issue: https://github.com/BerriAI/litellm/issues/18926
if (
optional_params.get("thinking") is not None
and messages is not None
and last_assistant_with_tool_calls_has_no_thinking_blocks(messages)
and not any_assistant_message_has_thinking_blocks(messages)
):
if litellm.modify_params:
optional_params.pop("thinking", None)
litellm.verbose_logger.warning(
"Dropping 'thinking' param because the last assistant message with tool_calls "
"has no thinking_blocks. The model won't use extended thinking for this turn."
)
headers = self.update_headers_with_optional_anthropic_beta(
headers=headers, optional_params=optional_params
)
# Separate system prompt from rest of message
anthropic_system_message_list = self.translate_system_message(messages=messages)
# Handling anthropic API Prompt Caching
if len(anthropic_system_message_list) > 0:
optional_params["system"] = anthropic_system_message_list
# Format rest of message according to anthropic guidelines
try:
anthropic_messages = anthropic_messages_pt(
model=model,
messages=messages,
llm_provider=self.custom_llm_provider or "anthropic",
)
except Exception as e:
raise AnthropicError(
status_code=400,
message="{}\nReceived Messages={}".format(str(e), messages),
) # don't use verbose_logger.exception, if exception is raised
## Add code_execution tool if container_upload is in messages
_tools = (
cast(
Optional[List[Union[AllAnthropicToolsValues, Dict]]],
optional_params.get("tools"),
)
or []
)
tools = self.add_code_execution_tool(messages=anthropic_messages, tools=_tools)
if len(tools) > 1:
optional_params["tools"] = tools
## Load Config
config = litellm.AnthropicConfig.get_config(model=model)
for k, v in config.items():
if (
k not in optional_params
): # completion(top_k=3) > anthropic_config(top_k=3) <- allows for dynamic variables to be passed in
optional_params[k] = v
## Handle user_id in metadata
_litellm_metadata = litellm_params.get("metadata", None)
if (
_litellm_metadata
and isinstance(_litellm_metadata, dict)
and "user_id" in _litellm_metadata
and _litellm_metadata["user_id"] is not None
and _valid_user_id(_litellm_metadata["user_id"])
):
optional_params["metadata"] = {"user_id": _litellm_metadata["user_id"]}
# Remove internal LiteLLM parameters that should not be sent to Anthropic API
optional_params.pop("is_vertex_request", None)
data = {
"model": model,
"messages": anthropic_messages,
**optional_params,
}
## Handle output_config (Anthropic-specific parameter)
if "output_config" in optional_params:
output_config = optional_params.get("output_config")
if output_config and isinstance(output_config, dict):
effort = output_config.get("effort")
if effort and effort not in ["high", "medium", "low", "max"]:
raise ValueError(
f"Invalid effort value: {effort}. Must be one of: 'high', 'medium', 'low', 'max'"
)
if effort == "max" and not self._is_opus_4_6_model(model):
raise ValueError(
f"effort='max' is only supported by Claude Opus 4.6. Got model: {model}"
)
data["output_config"] = output_config
return data
def _transform_response_for_json_mode(
self,
json_mode: Optional[bool],
tool_calls: List[ChatCompletionToolCallChunk],
) -> Optional[LitellmMessage]:
_message: Optional[LitellmMessage] = None
if json_mode is True and len(tool_calls) == 1:
# check if tool name is the default tool name
json_mode_content_str: Optional[str] = None
if (
"name" in tool_calls[0]["function"]
and tool_calls[0]["function"]["name"] == RESPONSE_FORMAT_TOOL_NAME
):
json_mode_content_str = tool_calls[0]["function"].get("arguments")
if json_mode_content_str is not None:
_message = AnthropicConfig._convert_tool_response_to_message(
tool_calls=tool_calls,
)
return _message
def extract_response_content(
self, completion_response: dict
) -> Tuple[
str,
Optional[List[Any]],
Optional[
List[
Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]
]
],
Optional[str],
List[ChatCompletionToolCallChunk],
Optional[List[Any]],
Optional[List[Any]],
Optional[List[Any]],
]:
text_content = ""
citations: Optional[List[Any]] = None
thinking_blocks: Optional[
List[
Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]
]
] = None
reasoning_content: Optional[str] = None
tool_calls: List[ChatCompletionToolCallChunk] = []
web_search_results: Optional[List[Any]] = None
tool_results: Optional[List[Any]] = None
compaction_blocks: Optional[List[Any]] = None
for idx, content in enumerate(completion_response["content"]):
if content["type"] == "text":
text_content += content["text"]
## TOOL CALLING
elif content["type"] == "tool_use" or content["type"] == "server_tool_use":
tool_call = AnthropicConfig.convert_tool_use_to_openai_format(
anthropic_tool_content=content,
index=idx,
)
tool_calls.append(tool_call)
## TOOL RESULTS - handle all tool result types (code execution, etc.)
elif content["type"].endswith("_tool_result"):
# Skip tool_search_tool_result as it's internal metadata
if content["type"] == "tool_search_tool_result":
continue
# Handle web_search_tool_result separately for backwards compatibility
if content["type"] == "web_search_tool_result":
if web_search_results is None:
web_search_results = []
web_search_results.append(content)
elif content["type"] == "web_fetch_tool_result":
if web_search_results is None:
web_search_results = []
web_search_results.append(content)
else:
# All other tool results (bash_code_execution_tool_result, text_editor_code_execution_tool_result, etc.)
if tool_results is None:
tool_results = []
tool_results.append(content)
elif content.get("thinking", None) is not None:
if thinking_blocks is None:
thinking_blocks = []
thinking_blocks.append(cast(ChatCompletionThinkingBlock, content))
elif content["type"] == "redacted_thinking":
if thinking_blocks is None:
thinking_blocks = []
thinking_blocks.append(
cast(ChatCompletionRedactedThinkingBlock, content)
)
## COMPACTION
elif content["type"] == "compaction":
if compaction_blocks is None:
compaction_blocks = []
compaction_blocks.append(content)
## CITATIONS
if content.get("citations") is not None:
if citations is None:
citations = []
citations.append(
[
{
**citation,
"supported_text": content.get("text", ""),
}
for citation in content["citations"]
]
)
if thinking_blocks is not None:
reasoning_content = ""
for block in thinking_blocks:
thinking_content = cast(Optional[str], block.get("thinking"))
if thinking_content is not None:
reasoning_content += thinking_content
return (
text_content,
citations,
thinking_blocks,
reasoning_content,
tool_calls,
web_search_results,
tool_results,
compaction_blocks,
)
def calculate_usage(
self,
usage_object: dict,
reasoning_content: Optional[str],
completion_response: Optional[dict] = None,
speed: Optional[str] = None,
) -> Usage:
# NOTE: Sometimes the usage object has None set explicitly for token counts, meaning .get() & key access returns None, and we need to account for this
prompt_tokens = usage_object.get("input_tokens", 0) or 0
completion_tokens = usage_object.get("output_tokens", 0) or 0
_usage = usage_object
cache_creation_input_tokens: int = 0
cache_read_input_tokens: int = 0
cache_creation_token_details: Optional[CacheCreationTokenDetails] = None
web_search_requests: Optional[int] = None
tool_search_requests: Optional[int] = None
inference_geo: Optional[str] = None
if "inference_geo" in _usage and _usage["inference_geo"] is not None:
inference_geo = _usage["inference_geo"]
if (
"cache_creation_input_tokens" in _usage
and _usage["cache_creation_input_tokens"] is not None
):
cache_creation_input_tokens = _usage["cache_creation_input_tokens"]
prompt_tokens += cache_creation_input_tokens
if (
"cache_read_input_tokens" in _usage
and _usage["cache_read_input_tokens"] is not None
):
cache_read_input_tokens = _usage["cache_read_input_tokens"]
prompt_tokens += cache_read_input_tokens
if "server_tool_use" in _usage and _usage["server_tool_use"] is not None:
if (
"web_search_requests" in _usage["server_tool_use"]
and _usage["server_tool_use"]["web_search_requests"] is not None
):
web_search_requests = cast(
int, _usage["server_tool_use"]["web_search_requests"]
)
if (
"tool_search_requests" in _usage["server_tool_use"]
and _usage["server_tool_use"]["tool_search_requests"] is not None
):
tool_search_requests = cast(
int, _usage["server_tool_use"]["tool_search_requests"]
)
# Count tool_search_requests from content blocks if not in usage
# Anthropic doesn't always include tool_search_requests in the usage object
if tool_search_requests is None and completion_response is not None:
tool_search_count = 0
for content in completion_response.get("content", []):
if content.get("type") == "server_tool_use":
tool_name = content.get("name", "")
if "tool_search" in tool_name:
tool_search_count += 1
if tool_search_count > 0:
tool_search_requests = tool_search_count
if "cache_creation" in _usage and _usage["cache_creation"] is not None:
cache_creation_token_details = CacheCreationTokenDetails(
ephemeral_5m_input_tokens=_usage["cache_creation"].get(
"ephemeral_5m_input_tokens"
),
ephemeral_1h_input_tokens=_usage["cache_creation"].get(
"ephemeral_1h_input_tokens"
),
)
prompt_tokens_details = PromptTokensDetailsWrapper(
cached_tokens=cache_read_input_tokens,
cache_creation_tokens=cache_creation_input_tokens,
cache_creation_token_details=cache_creation_token_details,
)
# Always populate completion_token_details, not just when there's reasoning_content
reasoning_tokens = (
token_counter(text=reasoning_content, count_response_tokens=True)
if reasoning_content
else 0
)
completion_token_details = CompletionTokensDetailsWrapper(
reasoning_tokens=reasoning_tokens if reasoning_tokens > 0 else 0,
text_tokens=(
completion_tokens - reasoning_tokens
if reasoning_tokens > 0
else completion_tokens
),
)
total_tokens = prompt_tokens + completion_tokens
usage = Usage(
prompt_tokens=prompt_tokens,
completion_tokens=completion_tokens,
total_tokens=total_tokens,
prompt_tokens_details=prompt_tokens_details,
cache_creation_input_tokens=cache_creation_input_tokens,
cache_read_input_tokens=cache_read_input_tokens,
completion_tokens_details=completion_token_details,
server_tool_use=(
ServerToolUse(
web_search_requests=web_search_requests,
tool_search_requests=tool_search_requests,
)
if (web_search_requests is not None or tool_search_requests is not None)
else None
),
inference_geo=inference_geo,
speed=speed,
)
return usage
def transform_parsed_response(
self,
completion_response: dict,
raw_response: httpx.Response,
model_response: ModelResponse,
json_mode: Optional[bool] = None,
prefix_prompt: Optional[str] = None,
speed: Optional[str] = None,
):
_hidden_params: Dict = {}
_hidden_params["additional_headers"] = process_anthropic_headers(
dict(raw_response.headers)
)
if "error" in completion_response:
response_headers = getattr(raw_response, "headers", None)
raise AnthropicError(
message=str(completion_response["error"]),
status_code=raw_response.status_code,
headers=response_headers,
)
else:
text_content = ""
citations: Optional[List[Any]] = None
thinking_blocks: Optional[
List[
Union[
ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock
]
]
] = None
reasoning_content: Optional[str] = None
tool_calls: List[ChatCompletionToolCallChunk] = []
(
text_content,
citations,
thinking_blocks,
reasoning_content,
tool_calls,
web_search_results,
tool_results,
compaction_blocks,
) = self.extract_response_content(completion_response=completion_response)
if (
prefix_prompt is not None
and not text_content.startswith(prefix_prompt)
and not litellm.disable_add_prefix_to_prompt
):
text_content = prefix_prompt + text_content
context_management: Optional[Dict] = completion_response.get(
"context_management"
)
container: Optional[Dict] = completion_response.get("container")
provider_specific_fields: Dict[str, Any] = {
"citations": citations,
"thinking_blocks": thinking_blocks,
}
if context_management is not None:
provider_specific_fields["context_management"] = context_management
if web_search_results is not None:
provider_specific_fields["web_search_results"] = web_search_results
if tool_results is not None:
provider_specific_fields["tool_results"] = tool_results
if container is not None:
provider_specific_fields["container"] = container
if compaction_blocks is not None:
provider_specific_fields["compaction_blocks"] = compaction_blocks
_message = litellm.Message(
tool_calls=tool_calls,
content=text_content or None,
provider_specific_fields=provider_specific_fields,
thinking_blocks=thinking_blocks,
reasoning_content=reasoning_content,
)
_message.provider_specific_fields = provider_specific_fields
## HANDLE JSON MODE - anthropic returns single function call
json_mode_message = self._transform_response_for_json_mode(
json_mode=json_mode,
tool_calls=tool_calls,
)
if json_mode_message is not None:
completion_response["stop_reason"] = "stop"
_message = json_mode_message
model_response.choices[0].message = _message # type: ignore
model_response._hidden_params["original_response"] = completion_response[
"content"
] # allow user to access raw anthropic tool calling response
model_response.choices[0].finish_reason = cast(
OpenAIChatCompletionFinishReason,
map_finish_reason(completion_response["stop_reason"]),
)
## CALCULATING USAGE
usage = self.calculate_usage(
usage_object=completion_response["usage"],
reasoning_content=reasoning_content,
completion_response=completion_response,
speed=speed,
)
setattr(model_response, "usage", usage) # type: ignore
model_response.created = int(time.time())
model_response.model = completion_response["model"]
model_response._hidden_params = _hidden_params
return model_response
def get_prefix_prompt(self, messages: List[AllMessageValues]) -> Optional[str]:
"""
Get the prefix prompt from the messages.
Check last message
- if it's assistant message, with 'prefix': true, return the content
E.g. : {"role": "assistant", "content": "Argentina", "prefix": True}
"""
if len(messages) == 0:
return None
message = messages[-1]
message_content = message.get("content")
if (
message["role"] == "assistant"
and message.get("prefix", False)
and isinstance(message_content, str)
):
return message_content
return None
def transform_response(
self,
model: str,
raw_response: httpx.Response,
model_response: ModelResponse,
logging_obj: LoggingClass,
request_data: Dict,
messages: List[AllMessageValues],
optional_params: Dict,
litellm_params: dict,
encoding: Any,
api_key: Optional[str] = None,
json_mode: Optional[bool] = None,
) -> ModelResponse:
## LOGGING
logging_obj.post_call(
input=messages,
api_key=api_key,
original_response=raw_response.text,
additional_args={"complete_input_dict": request_data},
)
## RESPONSE OBJECT
try:
completion_response = raw_response.json()
except Exception as e:
response_headers = getattr(raw_response, "headers", None)
raise AnthropicError(
message="Unable to get json response - {}, Original Response: {}".format(
str(e), raw_response.text
),
status_code=raw_response.status_code,
headers=response_headers,
)
prefix_prompt = self.get_prefix_prompt(messages=messages)
speed = optional_params.get("speed")
model_response = self.transform_parsed_response(
completion_response=completion_response,
raw_response=raw_response,
model_response=model_response,
json_mode=json_mode,
prefix_prompt=prefix_prompt,
speed=speed,
)
return model_response
@staticmethod
def _convert_tool_response_to_message(
tool_calls: List[ChatCompletionToolCallChunk],
) -> Optional[LitellmMessage]:
"""
In JSON mode, Anthropic API returns JSON schema as a tool call, we need to convert it to a message to follow the OpenAI format
"""
## HANDLE JSON MODE - anthropic returns single function call
json_mode_content_str: Optional[str] = tool_calls[0]["function"].get(
"arguments"
)
try:
if json_mode_content_str is not None:
args = json.loads(json_mode_content_str)
if (
isinstance(args, dict)
and (values := args.get("values")) is not None
):
_message = litellm.Message(content=json.dumps(values))
return _message
else:
# a lot of the times the `values` key is not present in the tool response
# relevant issue: https://github.com/BerriAI/litellm/issues/6741
_message = litellm.Message(content=json.dumps(args))
return _message
except json.JSONDecodeError:
# json decode error does occur, return the original tool response str
return litellm.Message(content=json_mode_content_str)
return None
def get_error_class(
self, error_message: str, status_code: int, headers: Union[Dict, httpx.Headers]
) -> BaseLLMException:
return AnthropicError(
status_code=status_code,
message=error_message,
headers=cast(httpx.Headers, headers),
)
def _valid_user_id(user_id: str) -> bool:
"""
Validate that user_id is not an email or phone number.
Returns: bool: True if valid (not email or phone), False otherwise
"""
email_pattern = r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$"
phone_pattern = r"^\+?[\d\s\(\)-]{7,}$"
if re.match(email_pattern, user_id):
return False
if re.match(phone_pattern, user_id):
return False
return True