### Hide pydantic namespace conflict warnings globally ### from __future__ import annotations import warnings warnings.filterwarnings("ignore", message=".*conflict with protected namespace.*") # Suppress Pydantic 2.11+ deprecation warning about accessing model_fields on instances # This warning can accumulate during streaming and cause memory leaks warnings.filterwarnings( "ignore", message=".*Accessing the.*attribute on the instance is deprecated.*" ) ### INIT VARIABLES ######################### import threading import os # Load .env before any other litellm imports so env vars (e.g. LITELLM_UI_SESSION_DURATION) are available import dotenv as _dotenv if os.getenv("LITELLM_MODE", "DEV") == "DEV": _dotenv.load_dotenv() from typing import ( Callable, List, Optional, Dict, Union, Any, Literal, get_args, TYPE_CHECKING, Tuple, overload, Type, ) from litellm.types.integrations.datadog import DatadogInitParams from litellm._logging import ( set_verbose, _turn_on_debug, verbose_logger, json_logs, _turn_on_json, log_level, ) import re from litellm.constants import ( DEFAULT_BATCH_SIZE, DEFAULT_FLUSH_INTERVAL_SECONDS, ROUTER_MAX_FALLBACKS, DEFAULT_MAX_RETRIES, DEFAULT_REPLICATE_POLLING_RETRIES, DEFAULT_REPLICATE_POLLING_DELAY_SECONDS, LITELLM_CHAT_PROVIDERS, HUMANLOOP_PROMPT_CACHE_TTL_SECONDS, OPENAI_CHAT_COMPLETION_PARAMS, OPENAI_CHAT_COMPLETION_PARAMS as _openai_completion_params, # backwards compatibility OPENAI_FINISH_REASONS, OPENAI_FINISH_REASONS as _openai_finish_reasons, # backwards compatibility openai_compatible_endpoints, openai_compatible_providers, openai_text_completion_compatible_providers, _openai_like_providers, replicate_models, clarifai_models, huggingface_models, empower_models, together_ai_models, baseten_models, WANDB_MODELS, REPEATED_STREAMING_CHUNK_LIMIT, request_timeout, open_ai_embedding_models, cohere_embedding_models, bedrock_embedding_models, known_tokenizer_config, BEDROCK_INVOKE_PROVIDERS_LITERAL, BEDROCK_EMBEDDING_PROVIDERS_LITERAL, BEDROCK_CONVERSE_MODELS, DEFAULT_MAX_TOKENS, DEFAULT_SOFT_BUDGET, DEFAULT_ALLOWED_FAILS, ) import httpx # register_async_client_cleanup is lazy-loaded and called on first access litellm_mode = os.getenv("LITELLM_MODE", "DEV") # "PRODUCTION", "DEV" #################################################### if set_verbose: _turn_on_debug() #################################################### ### Callbacks /Logging / Success / Failure Handlers ##### CALLBACK_TYPES = Union[str, Callable, "CustomLogger"] # CustomLogger is lazy-loaded input_callback: List[CALLBACK_TYPES] = [] success_callback: List[CALLBACK_TYPES] = [] failure_callback: List[CALLBACK_TYPES] = [] service_callback: List[CALLBACK_TYPES] = [] # logging_callback_manager is lazy-loaded via __getattr__ _custom_logger_compatible_callbacks_literal = Literal[ "lago", "openmeter", "logfire", "literalai", "litellm_agent", "dynamic_rate_limiter", "dynamic_rate_limiter_v3", "langsmith", "prometheus", "otel", "datadog", "datadog_metrics", "datadog_llm_observability", "galileo", "braintrust", "arize", "arize_phoenix", "langtrace", "gcs_bucket", "azure_storage", "opik", "argilla", "mlflow", "langfuse", "langfuse_otel", "weave_otel", "pagerduty", "humanloop", "azure_sentinel", "gcs_pubsub", "agentops", "anthropic_cache_control_hook", "generic_api", "resend_email", "sendgrid_email", "smtp_email", "deepeval", "s3_v2", "aws_sqs", "vector_store_pre_call_hook", "dotprompt", "bitbucket", "gitlab", "cloudzero", "focus", "posthog", "levo", ] cold_storage_custom_logger: Optional[_custom_logger_compatible_callbacks_literal] = None logged_real_time_event_types: Optional[Union[List[str], Literal["*"]]] = None _known_custom_logger_compatible_callbacks: List = list( get_args(_custom_logger_compatible_callbacks_literal) ) callbacks: List[ Union[ Callable, _custom_logger_compatible_callbacks_literal, "CustomLogger" ] # CustomLogger is lazy-loaded ] = [] callback_settings: Dict[str, Dict[str, Any]] = {} initialized_langfuse_clients: int = 0 langfuse_default_tags: Optional[List[str]] = None langsmith_batch_size: Optional[int] = None prometheus_initialize_budget_metrics: Optional[bool] = False require_auth_for_metrics_endpoint: Optional[bool] = False argilla_batch_size: Optional[int] = None datadog_use_v1: Optional[bool] = False # if you want to use v1 datadog logged payload. gcs_pub_sub_use_v1: Optional[ bool ] = False # if you want to use v1 gcs pubsub logged payload generic_api_use_v1: Optional[ bool ] = False # if you want to use v1 generic api logged payload argilla_transformation_object: Optional[Dict[str, Any]] = None _async_input_callback: List[ Union[str, Callable, "CustomLogger"] ] = ( # CustomLogger is lazy-loaded [] ) # internal variable - async custom callbacks are routed here. _async_success_callback: List[ Union[str, Callable, "CustomLogger"] ] = ( # CustomLogger is lazy-loaded [] ) # internal variable - async custom callbacks are routed here. _async_failure_callback: List[ Union[str, Callable, "CustomLogger"] ] = ( # CustomLogger is lazy-loaded [] ) # internal variable - async custom callbacks are routed here. pre_call_rules: List[Callable] = [] post_call_rules: List[Callable] = [] turn_off_message_logging: Optional[bool] = False standard_logging_payload_excluded_fields: Optional[ List[str] ] = None # Fields to exclude from StandardLoggingPayload before callbacks receive it log_raw_request_response: bool = False redact_messages_in_exceptions: Optional[bool] = False redact_user_api_key_info: Optional[bool] = False filter_invalid_headers: Optional[bool] = False add_user_information_to_llm_headers: Optional[ bool ] = None # adds user_id, team_id, token hash (params from StandardLoggingMetadata) to request headers store_audit_logs = False # Enterprise feature, allow users to see audit logs ### end of callbacks ############# email: Optional[ str ] = None # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648 token: Optional[ str ] = None # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648 telemetry = True max_tokens: int = DEFAULT_MAX_TOKENS # OpenAI Defaults drop_params = bool(os.getenv("LITELLM_DROP_PARAMS", False)) modify_params = bool(os.getenv("LITELLM_MODIFY_PARAMS", False)) use_chat_completions_url_for_anthropic_messages: bool = bool( os.getenv("LITELLM_USE_CHAT_COMPLETIONS_URL_FOR_ANTHROPIC_MESSAGES", False) ) # When True, routes OpenAI /v1/messages requests to chat/completions instead of the Responses API retry = True ### AUTH ### api_key: Optional[str] = None openai_key: Optional[str] = None groq_key: Optional[str] = None gigachat_key: Optional[str] = None databricks_key: Optional[str] = None openai_like_key: Optional[str] = None azure_key: Optional[str] = None anthropic_key: Optional[str] = None replicate_key: Optional[str] = None bytez_key: Optional[str] = None cohere_key: Optional[str] = None infinity_key: Optional[str] = None clarifai_key: Optional[str] = None maritalk_key: Optional[str] = None ai21_key: Optional[str] = None ollama_key: Optional[str] = None openrouter_key: Optional[str] = None datarobot_key: Optional[str] = None predibase_key: Optional[str] = None huggingface_key: Optional[str] = None vertex_project: Optional[str] = None vertex_location: Optional[str] = None predibase_tenant_id: Optional[str] = None togetherai_api_key: Optional[str] = None cloudflare_api_key: Optional[str] = None vercel_ai_gateway_key: Optional[str] = None baseten_key: Optional[str] = None llama_api_key: Optional[str] = None aleph_alpha_key: Optional[str] = None nlp_cloud_key: Optional[str] = None novita_api_key: Optional[str] = None snowflake_key: Optional[str] = None gradient_ai_api_key: Optional[str] = None nebius_key: Optional[str] = None wandb_key: Optional[str] = None heroku_key: Optional[str] = None cometapi_key: Optional[str] = None ovhcloud_key: Optional[str] = None lemonade_key: Optional[str] = None sap_service_key: Optional[str] = None amazon_nova_api_key: Optional[str] = None common_cloud_provider_auth_params: dict = { "params": ["project", "region_name", "token"], "providers": ["vertex_ai", "bedrock", "watsonx", "azure", "vertex_ai_beta"], } use_litellm_proxy: bool = ( False # when True, requests will be sent to the specified litellm proxy endpoint ) use_client: bool = False ssl_verify: Union[str, bool] = True ssl_security_level: Optional[str] = None ssl_certificate: Optional[str] = None ssl_ecdh_curve: Optional[ str ] = None # Set to 'X25519' to disable PQC and improve performance disable_streaming_logging: bool = False disable_token_counter: bool = False disable_add_transform_inline_image_block: bool = False disable_add_user_agent_to_request_tags: bool = False disable_anthropic_gemini_context_caching_transform: bool = False extra_spend_tag_headers: Optional[List[str]] = None in_memory_llm_clients_cache: "LLMClientCache" safe_memory_mode: bool = False enable_azure_ad_token_refresh: Optional[bool] = False # Proxy Authentication - auto-obtain/refresh OAuth2/JWT tokens for LiteLLM Proxy proxy_auth: Optional[Any] = None ### DEFAULT AZURE API VERSION ### AZURE_DEFAULT_API_VERSION = "2025-02-01-preview" # this is updated to the latest ### DEFAULT WATSONX API VERSION ### WATSONX_DEFAULT_API_VERSION = "2024-03-13" ### COHERE EMBEDDINGS DEFAULT TYPE ### COHERE_DEFAULT_EMBEDDING_INPUT_TYPE: "COHERE_EMBEDDING_INPUT_TYPES" = "search_document" ### CREDENTIALS ### credential_list: List["CredentialItem"] = [] ### GUARDRAILS ### llamaguard_model_name: Optional[str] = None openai_moderations_model_name: Optional[str] = None presidio_ad_hoc_recognizers: Optional[str] = None google_moderation_confidence_threshold: Optional[float] = None llamaguard_unsafe_content_categories: Optional[str] = None blocked_user_list: Optional[Union[str, List]] = None banned_keywords_list: Optional[Union[str, List]] = None llm_guard_mode: Literal["all", "key-specific", "request-specific"] = "all" guardrail_name_config_map: Dict[str, GuardrailItem] = {} include_cost_in_streaming_usage: bool = False reasoning_auto_summary: bool = False ### PROMPTS #### from litellm.types.prompts.init_prompts import PromptSpec prompt_name_config_map: Dict[str, PromptSpec] = {} ################## ### PREVIEW FEATURES ### enable_preview_features: bool = False return_response_headers: bool = ( False # get response headers from LLM Api providers - example x-remaining-requests, ) enable_json_schema_validation: bool = False enable_key_alias_format_validation: bool = ( False # opt-in validation of key_alias format on /key/generate and /key/update ) #################### logging: bool = True enable_loadbalancing_on_batch_endpoints: Optional[bool] = None enable_caching_on_provider_specific_optional_params: bool = ( False # feature-flag for caching on optional params - e.g. 'top_k' ) caching: bool = False # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648 caching_with_models: bool = False # # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648 cache: Optional[ "Cache" ] = None # cache object <- use this - https://docs.litellm.ai/docs/caching default_in_memory_ttl: Optional[float] = None default_redis_ttl: Optional[float] = None default_redis_batch_cache_expiry: Optional[float] = None model_alias_map: Dict[str, str] = {} model_group_settings: Optional["ModelGroupSettings"] = None max_budget: float = 0.0 # set the max budget across all providers budget_duration: Optional[ str ] = None # proxy only - resets budget after fixed duration. You can set duration as seconds ("30s"), minutes ("30m"), hours ("30h"), days ("30d"). default_soft_budget: float = ( DEFAULT_SOFT_BUDGET # by default all litellm proxy keys have a soft budget of 50.0 ) forward_traceparent_to_llm_provider: bool = False _current_cost = 0.0 # private variable, used if max budget is set error_logs: Dict = {} add_function_to_prompt: bool = False # if function calling not supported by api, append function call details to system prompt client_session: Optional[httpx.Client] = None aclient_session: Optional[httpx.AsyncClient] = None model_fallbacks: Optional[List] = None # Deprecated for 'litellm.fallbacks' model_cost_map_url: str = os.getenv( "LITELLM_MODEL_COST_MAP_URL", "https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json", ) blog_posts_url: str = os.getenv( "LITELLM_BLOG_POSTS_URL", "https://raw.githubusercontent.com/BerriAI/litellm/main/litellm/blog_posts.json", ) anthropic_beta_headers_url: str = os.getenv( "LITELLM_ANTHROPIC_BETA_HEADERS_URL", "https://raw.githubusercontent.com/BerriAI/litellm/main/litellm/anthropic_beta_headers_config.json", ) suppress_debug_info = False dynamodb_table_name: Optional[str] = None s3_callback_params: Optional[Dict] = None datadog_llm_observability_params: Optional[Union[DatadogLLMObsInitParams, Dict]] = None datadog_params: Optional[Union[DatadogInitParams, Dict]] = None aws_sqs_callback_params: Optional[Dict] = None generic_logger_headers: Optional[Dict] = None default_key_generate_params: Optional[Dict] = None upperbound_key_generate_params: Optional[LiteLLM_UpperboundKeyGenerateParams] = None key_generation_settings: Optional["StandardKeyGenerationConfig"] = None default_internal_user_params: Optional[Dict] = None default_team_params: Optional[Union[DefaultTeamSSOParams, Dict]] = None default_team_settings: Optional[List] = None max_user_budget: Optional[float] = None default_max_internal_user_budget: Optional[float] = None max_internal_user_budget: Optional[float] = None max_ui_session_budget: Optional[float] = 0.25 # $0.25 USD budgets for UI Chat sessions internal_user_budget_duration: Optional[str] = None tag_budget_config: Optional[Dict[str, "BudgetConfig"]] = None max_end_user_budget: Optional[float] = None max_end_user_budget_id: Optional[str] = None disable_end_user_cost_tracking: Optional[bool] = None disable_end_user_cost_tracking_prometheus_only: Optional[bool] = None enable_end_user_cost_tracking_prometheus_only: Optional[bool] = None custom_prometheus_metadata_labels: List[str] = [] custom_prometheus_tags: List[str] = [] prometheus_metrics_config: Optional[List] = None prometheus_emit_stream_label: bool = False disable_add_prefix_to_prompt: bool = ( False # used by anthropic, to disable adding prefix to prompt ) disable_copilot_system_to_assistant: bool = False # If false (default), converts all 'system' role messages to 'assistant' for GitHub Copilot compatibility. Set to true to disable this behavior. public_mcp_servers: Optional[List[str]] = None public_model_groups: Optional[List[str]] = None public_agent_groups: Optional[List[str]] = None # Supports both old format (Dict[str, str]) and new format (Dict[str, Dict[str, Any]]) # New format: { "displayName": { "url": "...", "index": 0 } } # Old format: { "displayName": "url" } (for backward compatibility) public_model_groups_links: Dict[str, Union[str, Dict[str, Any]]] = {} #### REQUEST PRIORITIZATION ####### priority_reservation: Optional[ Dict[str, Union[float, "PriorityReservationDict"]] ] = None # priority_reservation_settings is lazy-loaded via __getattr__ # Only declare for type checking - at runtime __getattr__ handles it if TYPE_CHECKING: priority_reservation_settings: Optional["PriorityReservationSettings"] = None ######## Networking Settings ######## use_aiohttp_transport: bool = True # Older variable, aiohttp is now the default. use disable_aiohttp_transport instead. aiohttp_trust_env: bool = False # set to true to use HTTP_ Proxy settings disable_aiohttp_transport: bool = False # Set this to true to use httpx instead disable_aiohttp_trust_env: bool = ( False # When False, aiohttp will respect HTTP(S)_PROXY env vars ) force_ipv4: bool = False # when True, litellm will force ipv4 for all LLM requests. Some users have seen httpx ConnectionError when using ipv6. network_mock: bool = False # When True, use mock transport — no real network calls ####### STOP SEQUENCE LIMIT ####### disable_stop_sequence_limit: bool = False # when True, stop sequence limit is disabled #### RETRIES #### num_retries: Optional[int] = None # per model endpoint max_fallbacks: Optional[int] = None default_fallbacks: Optional[List] = None fallbacks: Optional[List] = None context_window_fallbacks: Optional[List] = None content_policy_fallbacks: Optional[List] = None allowed_fails: int = 3 allow_dynamic_callback_disabling: bool = True num_retries_per_request: Optional[ int ] = None # for the request overall (incl. fallbacks + model retries) ####### SECRET MANAGERS ##################### secret_manager_client: Optional[ Any ] = None # list of instantiated key management clients - e.g. azure kv, infisical, etc. _google_kms_resource_name: Optional[str] = None _key_management_system: Optional["KeyManagementSystem"] = None # Note: KeyManagementSettings must be eagerly imported because _key_management_settings # is accessed during import time in secret_managers/main.py # We'll import it after the lazy import system is set up # We can't define it here because KeyManagementSettings is lazy-loaded #### PII MASKING #### output_parse_pii: bool = False ############################################# from litellm.litellm_core_utils.get_model_cost_map import get_model_cost_map model_cost = get_model_cost_map(url=model_cost_map_url) cost_discount_config: Dict[ str, float ] = {} # Provider-specific cost discounts {"vertex_ai": 0.05} = 5% discount cost_margin_config: Dict[ str, Union[float, Dict[str, float]] ] = {} # Provider-specific or global cost margins. Examples: # Percentage: {"openai": 0.10} = 10% margin # Fixed: {"openai": {"fixed_amount": 0.001}} = $0.001 per request # Global: {"global": 0.05} = 5% global margin on all providers # Combined: {"vertex_ai": {"percentage": 0.08, "fixed_amount": 0.0005}} custom_prompt_dict: Dict[str, dict] = {} check_provider_endpoint = False ####### THREAD-SPECIFIC DATA #################### class MyLocal(threading.local): def __init__(self): self.user = "Hello World" _thread_context = MyLocal() def identify(event_details): # Store user in thread local data if "user" in event_details: _thread_context.user = event_details["user"] ####### ADDITIONAL PARAMS ################### configurable params if you use proxy models like Helicone, map spend to org id, etc. api_base: Optional[str] = None headers = None api_version: Optional[str] = None organization = None project = None config_path = None vertex_ai_safety_settings: Optional[dict] = None ####### COMPLETION MODELS ################### from typing import Set open_ai_chat_completion_models: Set = set() open_ai_text_completion_models: Set = set() cohere_models: Set = set() cohere_chat_models: Set = set() mistral_chat_models: Set = set() text_completion_codestral_models: Set = set() anthropic_models: Set = set() openrouter_models: Set = set() datarobot_models: Set = set() vertex_language_models: Set = set() vertex_vision_models: Set = set() vertex_chat_models: Set = set() vertex_code_chat_models: Set = set() vertex_ai_image_models: Set = set() vertex_ai_video_models: Set = set() vertex_text_models: Set = set() vertex_code_text_models: Set = set() vertex_embedding_models: Set = set() vertex_anthropic_models: Set = set() vertex_llama3_models: Set = set() vertex_deepseek_models: Set = set() vertex_ai_ai21_models: Set = set() vertex_mistral_models: Set = set() vertex_openai_models: Set = set() vertex_minimax_models: Set = set() vertex_moonshot_models: Set = set() vertex_zai_models: Set = set() ai21_models: Set = set() ai21_chat_models: Set = set() nlp_cloud_models: Set = set() aleph_alpha_models: Set = set() bedrock_models: Set = set() bedrock_converse_models: Set = set(BEDROCK_CONVERSE_MODELS) fal_ai_models: Set = set() fireworks_ai_models: Set = set() fireworks_ai_embedding_models: Set = set() deepinfra_models: Set = set() perplexity_models: Set = set() watsonx_models: Set = set() gemini_models: Set = set() xai_models: Set = set() zai_models: Set = set() deepseek_models: Set = set() runwayml_models: Set = set() azure_ai_models: Set = set() jina_ai_models: Set = set() voyage_models: Set = set() infinity_models: Set = set() heroku_models: Set = set() databricks_models: Set = set() cloudflare_models: Set = set() codestral_models: Set = set() friendliai_models: Set = set() featherless_ai_models: Set = set() palm_models: Set = set() groq_models: Set = set() azure_models: Set = set() azure_anthropic_models: Set = set() azure_text_models: Set = set() anyscale_models: Set = set() cerebras_models: Set = set() galadriel_models: Set = set() nvidia_nim_models: Set = set() sambanova_models: Set = set() sambanova_embedding_models: Set = set() novita_models: Set = set() assemblyai_models: Set = set() snowflake_models: Set = set() gradient_ai_models: Set = set() llama_models: Set = set() nscale_models: Set = set() nebius_models: Set = set() nebius_embedding_models: Set = set() aiml_models: Set = set() deepgram_models: Set = set() elevenlabs_models: Set = set() dashscope_models: Set = set() moonshot_models: Set = set() publicai_models: Set = set() v0_models: Set = set() morph_models: Set = set() lambda_ai_models: Set = set() hyperbolic_models: Set = set() black_forest_labs_models: Set = set() recraft_models: Set = set() cometapi_models: Set = set() oci_models: Set = set() vercel_ai_gateway_models: Set = set() volcengine_models: Set = set() wandb_models: Set = set(WANDB_MODELS) ovhcloud_models: Set = set() ovhcloud_embedding_models: Set = set() lemonade_models: Set = set() docker_model_runner_models: Set = set() amazon_nova_models: Set = set() stability_models: Set = set() github_copilot_models: Set = set() chatgpt_models: Set = set() minimax_models: Set = set() aws_polly_models: Set = set() gigachat_models: Set = set() llamagate_models: Set = set() bedrock_mantle_models: Set = set() def is_bedrock_pricing_only_model(key: str) -> bool: """ Excludes keys with the pattern 'bedrock//'. These are in the model_prices_and_context_window.json file for pricing purposes only. Args: key (str): A key to filter. Returns: bool: True if the key matches the Bedrock pattern, False otherwise. """ # Regex to match 'bedrock//' bedrock_pattern = re.compile(r"^bedrock/[a-zA-Z0-9_-]+/.+$") if "month-commitment" in key: return True is_match = bedrock_pattern.match(key) return is_match is not None def is_openai_finetune_model(key: str) -> bool: """ Excludes model cost keys with the pattern 'ft:'. These are in the model_prices_and_context_window.json file for pricing purposes only. Args: key (str): A key to filter. Returns: bool: True if the key matches the OpenAI finetune pattern, False otherwise. """ return key.startswith("ft:") and not key.count(":") > 1 def add_known_models(model_cost_map: Optional[Dict] = None): _map = model_cost_map if model_cost_map is not None else model_cost for key, value in _map.items(): if value.get("litellm_provider") == "openai" and not is_openai_finetune_model( key ): open_ai_chat_completion_models.add(key) elif value.get("litellm_provider") == "text-completion-openai": open_ai_text_completion_models.add(key) elif value.get("litellm_provider") == "azure_text": azure_text_models.add(key) elif value.get("litellm_provider") == "cohere": cohere_models.add(key) elif value.get("litellm_provider") == "cohere_chat": cohere_chat_models.add(key) elif value.get("litellm_provider") == "mistral": mistral_chat_models.add(key) elif value.get("litellm_provider") == "anthropic": anthropic_models.add(key) elif value.get("litellm_provider") == "empower": empower_models.add(key) elif value.get("litellm_provider") == "openrouter": openrouter_models.add(key) elif value.get("litellm_provider") == "vercel_ai_gateway": vercel_ai_gateway_models.add(key) elif value.get("litellm_provider") == "datarobot": datarobot_models.add(key) elif value.get("litellm_provider") == "vertex_ai-text-models": vertex_text_models.add(key) elif value.get("litellm_provider") == "vertex_ai-code-text-models": vertex_code_text_models.add(key) elif value.get("litellm_provider") == "vertex_ai-language-models": vertex_language_models.add(key) elif value.get("litellm_provider") == "vertex_ai-vision-models": vertex_vision_models.add(key) elif value.get("litellm_provider") == "vertex_ai-chat-models": vertex_chat_models.add(key) elif value.get("litellm_provider") == "vertex_ai-code-chat-models": vertex_code_chat_models.add(key) elif value.get("litellm_provider") == "vertex_ai-embedding-models": vertex_embedding_models.add(key) elif value.get("litellm_provider") == "vertex_ai-anthropic_models": key = key.replace("vertex_ai/", "") vertex_anthropic_models.add(key) elif value.get("litellm_provider") == "vertex_ai-llama_models": key = key.replace("vertex_ai/", "") vertex_llama3_models.add(key) elif value.get("litellm_provider") == "vertex_ai-deepseek_models": key = key.replace("vertex_ai/", "") vertex_deepseek_models.add(key) elif value.get("litellm_provider") == "vertex_ai-mistral_models": key = key.replace("vertex_ai/", "") vertex_mistral_models.add(key) elif value.get("litellm_provider") == "vertex_ai-ai21_models": key = key.replace("vertex_ai/", "") vertex_ai_ai21_models.add(key) elif value.get("litellm_provider") == "vertex_ai-image-models": key = key.replace("vertex_ai/", "") vertex_ai_image_models.add(key) elif value.get("litellm_provider") == "vertex_ai-video-models": key = key.replace("vertex_ai/", "") vertex_ai_video_models.add(key) elif value.get("litellm_provider") == "vertex_ai-openai_models": key = key.replace("vertex_ai/", "") vertex_openai_models.add(key) elif value.get("litellm_provider") == "vertex_ai-minimax_models": key = key.replace("vertex_ai/", "") vertex_minimax_models.add(key) elif value.get("litellm_provider") == "vertex_ai-moonshot_models": key = key.replace("vertex_ai/", "") vertex_moonshot_models.add(key) elif value.get("litellm_provider") == "vertex_ai-zai_models": key = key.replace("vertex_ai/", "") vertex_zai_models.add(key) elif value.get("litellm_provider") == "ai21": if value.get("mode") == "chat": ai21_chat_models.add(key) else: ai21_models.add(key) elif value.get("litellm_provider") == "nlp_cloud": nlp_cloud_models.add(key) elif value.get("litellm_provider") == "aleph_alpha": aleph_alpha_models.add(key) elif value.get( "litellm_provider" ) == "bedrock" and not is_bedrock_pricing_only_model(key): bedrock_models.add(key) elif value.get("litellm_provider") == "bedrock_converse": bedrock_converse_models.add(key) elif value.get("litellm_provider") == "deepinfra": deepinfra_models.add(key) elif value.get("litellm_provider") == "perplexity": perplexity_models.add(key) elif value.get("litellm_provider") == "watsonx": watsonx_models.add(key) elif value.get("litellm_provider") == "gemini": gemini_models.add(key) elif value.get("litellm_provider") == "fireworks_ai": # ignore the 'up-to', '-to-' model names -> not real models. just for cost tracking based on model params. if "-to-" not in key and "fireworks-ai-default" not in key: fireworks_ai_models.add(key) elif value.get("litellm_provider") == "fireworks_ai-embedding-models": # ignore the 'up-to', '-to-' model names -> not real models. just for cost tracking based on model params. if "-to-" not in key: fireworks_ai_embedding_models.add(key) elif value.get("litellm_provider") == "text-completion-codestral": text_completion_codestral_models.add(key) elif value.get("litellm_provider") == "xai": xai_models.add(key) elif value.get("litellm_provider") == "zai": zai_models.add(key) elif value.get("litellm_provider") == "fal_ai": fal_ai_models.add(key) elif value.get("litellm_provider") == "deepseek": deepseek_models.add(key) elif value.get("litellm_provider") == "runwayml": runwayml_models.add(key) elif value.get("litellm_provider") == "meta_llama": llama_models.add(key) elif value.get("litellm_provider") == "nscale": nscale_models.add(key) elif value.get("litellm_provider") == "azure_ai": azure_ai_models.add(key) elif value.get("litellm_provider") == "voyage": voyage_models.add(key) elif value.get("litellm_provider") == "infinity": infinity_models.add(key) elif value.get("litellm_provider") == "databricks": databricks_models.add(key) elif value.get("litellm_provider") == "cloudflare": cloudflare_models.add(key) elif value.get("litellm_provider") == "codestral": codestral_models.add(key) elif value.get("litellm_provider") == "friendliai": friendliai_models.add(key) elif value.get("litellm_provider") == "palm": palm_models.add(key) elif value.get("litellm_provider") == "groq": groq_models.add(key) elif value.get("litellm_provider") == "azure": azure_models.add(key) elif value.get("litellm_provider") == "azure_anthropic": azure_anthropic_models.add(key) elif value.get("litellm_provider") == "anyscale": anyscale_models.add(key) elif value.get("litellm_provider") == "cerebras": cerebras_models.add(key) elif value.get("litellm_provider") == "galadriel": galadriel_models.add(key) elif value.get("litellm_provider") == "nvidia_nim": nvidia_nim_models.add(key) elif value.get("litellm_provider") == "sambanova": sambanova_models.add(key) elif value.get("litellm_provider") == "sambanova-embedding-models": sambanova_embedding_models.add(key) elif value.get("litellm_provider") == "novita": novita_models.add(key) elif value.get("litellm_provider") == "nebius-chat-models": nebius_models.add(key) elif value.get("litellm_provider") == "nebius-embedding-models": nebius_embedding_models.add(key) elif value.get("litellm_provider") == "aiml": aiml_models.add(key) elif value.get("litellm_provider") == "assemblyai": assemblyai_models.add(key) elif value.get("litellm_provider") == "jina_ai": jina_ai_models.add(key) elif value.get("litellm_provider") == "snowflake": snowflake_models.add(key) elif value.get("litellm_provider") == "gradient_ai": gradient_ai_models.add(key) elif value.get("litellm_provider") == "featherless_ai": featherless_ai_models.add(key) elif value.get("litellm_provider") == "deepgram": deepgram_models.add(key) elif value.get("litellm_provider") == "elevenlabs": elevenlabs_models.add(key) elif value.get("litellm_provider") == "heroku": heroku_models.add(key) elif value.get("litellm_provider") == "dashscope": dashscope_models.add(key) elif value.get("litellm_provider") == "moonshot": moonshot_models.add(key) elif value.get("litellm_provider") == "publicai": publicai_models.add(key) elif value.get("litellm_provider") == "v0": v0_models.add(key) elif value.get("litellm_provider") == "morph": morph_models.add(key) elif value.get("litellm_provider") == "lambda_ai": lambda_ai_models.add(key) elif value.get("litellm_provider") == "hyperbolic": hyperbolic_models.add(key) elif value.get("litellm_provider") == "black_forest_labs": black_forest_labs_models.add(key) elif value.get("litellm_provider") == "recraft": recraft_models.add(key) elif value.get("litellm_provider") == "cometapi": cometapi_models.add(key) elif value.get("litellm_provider") == "oci": oci_models.add(key) elif value.get("litellm_provider") == "volcengine": volcengine_models.add(key) elif value.get("litellm_provider") == "wandb": wandb_models.add(key) elif value.get("litellm_provider") == "ovhcloud": ovhcloud_models.add(key) elif value.get("litellm_provider") == "ovhcloud-embedding-models": ovhcloud_embedding_models.add(key) elif value.get("litellm_provider") == "lemonade": lemonade_models.add(key) elif value.get("litellm_provider") == "docker_model_runner": docker_model_runner_models.add(key) elif value.get("litellm_provider") == "amazon_nova": amazon_nova_models.add(key) elif value.get("litellm_provider") == "stability": stability_models.add(key) elif value.get("litellm_provider") == "github_copilot": github_copilot_models.add(key) elif value.get("litellm_provider") == "chatgpt": chatgpt_models.add(key) elif value.get("litellm_provider") == "minimax": minimax_models.add(key) elif value.get("litellm_provider") == "aws_polly": aws_polly_models.add(key) elif value.get("litellm_provider") == "gigachat": gigachat_models.add(key) elif value.get("litellm_provider") == "llamagate": llamagate_models.add(key) elif value.get("litellm_provider") == "bedrock_mantle": bedrock_mantle_models.add(key) add_known_models() # known openai compatible endpoints - we'll eventually move this list to the model_prices_and_context_window.json dictionary # this is maintained for Exception Mapping # used for Cost Tracking & Token counting # https://azure.microsoft.com/en-in/pricing/details/cognitive-services/openai-service/ # Azure returns gpt-35-turbo in their responses, we need to map this to azure/gpt-3.5-turbo for token counting azure_llms = { "gpt-35-turbo": "azure/gpt-35-turbo", "gpt-35-turbo-16k": "azure/gpt-35-turbo-16k", "gpt-35-turbo-instruct": "azure/gpt-35-turbo-instruct", "azure/gpt-41": "gpt-4.1", "azure/gpt-41-mini": "gpt-4.1-mini", "azure/gpt-41-nano": "gpt-4.1-nano", } azure_embedding_models = { "ada": "azure/ada", } petals_models = [ "petals-team/StableBeluga2", ] ollama_models = ["llama2"] maritalk_models = ["maritalk"] model_list = list( open_ai_chat_completion_models | open_ai_text_completion_models | cohere_models | cohere_chat_models | anthropic_models | set(replicate_models) | openrouter_models | datarobot_models | set(huggingface_models) | vertex_chat_models | vertex_text_models | ai21_models | ai21_chat_models | set(together_ai_models) | set(baseten_models) | aleph_alpha_models | nlp_cloud_models | set(ollama_models) | bedrock_models | deepinfra_models | perplexity_models | set(maritalk_models) | runwayml_models | vertex_language_models | watsonx_models | gemini_models | text_completion_codestral_models | xai_models | zai_models | fal_ai_models | deepseek_models | azure_ai_models | voyage_models | infinity_models | databricks_models | cloudflare_models | codestral_models | friendliai_models | palm_models | groq_models | azure_models | azure_anthropic_models | anyscale_models | cerebras_models | galadriel_models | nvidia_nim_models | sambanova_models | azure_text_models | novita_models | assemblyai_models | jina_ai_models | snowflake_models | gradient_ai_models | llama_models | featherless_ai_models | nscale_models | deepgram_models | elevenlabs_models | dashscope_models | moonshot_models | publicai_models | v0_models | morph_models | lambda_ai_models | black_forest_labs_models | recraft_models | cometapi_models | oci_models | heroku_models | vercel_ai_gateway_models | volcengine_models | wandb_models | ovhcloud_models | lemonade_models | docker_model_runner_models | bedrock_mantle_models | set(clarifai_models) ) model_list_set = set(model_list) # provider_list is lazy-loaded via __getattr__ to avoid importing LlmProviders at import time models_by_provider: dict = { "openai": open_ai_chat_completion_models | open_ai_text_completion_models, "text-completion-openai": open_ai_text_completion_models, "cohere": cohere_models | cohere_chat_models, "cohere_chat": cohere_chat_models, "anthropic": anthropic_models, "replicate": replicate_models, "huggingface": huggingface_models, "together_ai": together_ai_models, "baseten": baseten_models, "openrouter": openrouter_models, "vercel_ai_gateway": vercel_ai_gateway_models, "datarobot": datarobot_models, "vertex_ai": vertex_chat_models | vertex_text_models | vertex_anthropic_models | vertex_vision_models | vertex_language_models | vertex_deepseek_models | vertex_minimax_models | vertex_moonshot_models | vertex_zai_models, "ai21": ai21_models, "bedrock": bedrock_models | bedrock_converse_models, "petals": petals_models, "ollama": ollama_models, "ollama_chat": ollama_models, "deepinfra": deepinfra_models, "perplexity": perplexity_models, "maritalk": maritalk_models, "watsonx": watsonx_models, "gemini": gemini_models, "fireworks_ai": fireworks_ai_models | fireworks_ai_embedding_models, "aleph_alpha": aleph_alpha_models, "text-completion-codestral": text_completion_codestral_models, "xai": xai_models, "zai": zai_models, "fal_ai": fal_ai_models, "deepseek": deepseek_models, "runwayml": runwayml_models, "mistral": mistral_chat_models, "azure_ai": azure_ai_models, "voyage": voyage_models, "infinity": infinity_models, "databricks": databricks_models, "cloudflare": cloudflare_models, "codestral": codestral_models, "nlp_cloud": nlp_cloud_models, "friendliai": friendliai_models, "palm": palm_models, "groq": groq_models, "azure": azure_models | azure_text_models, "azure_anthropic": azure_anthropic_models, "azure_text": azure_text_models, "anyscale": anyscale_models, "cerebras": cerebras_models, "galadriel": galadriel_models, "nvidia_nim": nvidia_nim_models, "sambanova": sambanova_models | sambanova_embedding_models, "novita": novita_models, "nebius": nebius_models | nebius_embedding_models, "aiml": aiml_models, "assemblyai": assemblyai_models, "jina_ai": jina_ai_models, "snowflake": snowflake_models, "gradient_ai": gradient_ai_models, "meta_llama": llama_models, "nscale": nscale_models, "featherless_ai": featherless_ai_models, "deepgram": deepgram_models, "elevenlabs": elevenlabs_models, "heroku": heroku_models, "dashscope": dashscope_models, "moonshot": moonshot_models, "publicai": publicai_models, "v0": v0_models, "morph": morph_models, "lambda_ai": lambda_ai_models, "hyperbolic": hyperbolic_models, "black_forest_labs": black_forest_labs_models, "recraft": recraft_models, "cometapi": cometapi_models, "oci": oci_models, "volcengine": volcengine_models, "wandb": wandb_models, "ovhcloud": ovhcloud_models | ovhcloud_embedding_models, "lemonade": lemonade_models, "clarifai": clarifai_models, "amazon_nova": amazon_nova_models, "stability": stability_models, "github_copilot": github_copilot_models, "chatgpt": chatgpt_models, "minimax": minimax_models, "aws_polly": aws_polly_models, "gigachat": gigachat_models, "llamagate": llamagate_models, "bedrock_mantle": bedrock_mantle_models, } # mapping for those models which have larger equivalents longer_context_model_fallback_dict: dict = { # openai chat completion models "gpt-3.5-turbo": "gpt-3.5-turbo-16k", "gpt-3.5-turbo-0301": "gpt-3.5-turbo-16k-0301", "gpt-3.5-turbo-0613": "gpt-3.5-turbo-16k-0613", "gpt-4": "gpt-4-32k", "gpt-4-0314": "gpt-4-32k-0314", "gpt-4-0613": "gpt-4-32k-0613", # anthropic "claude-instant-1": "claude-2", "claude-instant-1.2": "claude-2", # vertexai "chat-bison": "chat-bison-32k", "chat-bison@001": "chat-bison-32k", "codechat-bison": "codechat-bison-32k", "codechat-bison@001": "codechat-bison-32k", # openrouter "openrouter/openai/gpt-3.5-turbo": "openrouter/openai/gpt-3.5-turbo-16k", "openrouter/anthropic/claude-instant-v1": "openrouter/anthropic/claude-2", } ####### EMBEDDING MODELS ################### all_embedding_models = ( open_ai_embedding_models | set(cohere_embedding_models) | set(bedrock_embedding_models) | vertex_embedding_models | fireworks_ai_embedding_models | nebius_embedding_models | sambanova_embedding_models | ovhcloud_embedding_models ) ####### IMAGE GENERATION MODELS ################### openai_image_generation_models = ["dall-e-2", "dall-e-3"] ####### VIDEO GENERATION MODELS ################### openai_video_generation_models = ["sora-2"] # timeout is lazy-loaded via __getattr__ # get_llm_provider is lazy-loaded via __getattr__ # remove_index_from_tool_calls is lazy-loaded via __getattr__ # Import KeyManagementSettings here (before utils import) because _key_management_settings # is accessed during import time in secret_managers/main.py (via dd_tracing -> datadog -> _service_logger -> utils) from litellm.types.secret_managers.main import KeyManagementSettings _key_management_settings: KeyManagementSettings = KeyManagementSettings() # client must be imported immediately as it's used as a decorator at function definition time from .utils import client # Note: Most other utils imports are lazy-loaded via __getattr__ to avoid loading utils.py # (which imports tiktoken) at import time from .llms.custom_llm import CustomLLM from .llms.anthropic.common_utils import AnthropicModelInfo from .llms.ai21.chat.transformation import AI21ChatConfig, AI21ChatConfig as AI21Config from .llms.deprecated_providers.palm import ( PalmConfig, ) # here to prevent breaking changes from .llms.deprecated_providers.aleph_alpha import AlephAlphaConfig from .llms.gemini.common_utils import GeminiModelInfo from .llms.vertex_ai.vertex_embeddings.transformation import ( VertexAITextEmbeddingConfig, ) vertexAITextEmbeddingConfig = VertexAITextEmbeddingConfig() from .llms.bedrock.embed.amazon_titan_v2_transformation import ( AmazonTitanV2Config, ) from .llms.topaz.common_utils import TopazModelInfo # OpenAIOSeriesConfig is lazy loaded - openaiOSeriesConfig will be created on first access # OpenAIGPTConfig, OpenAIGPT5Config, etc. are lazy loaded - instances will be created on first access from .llms.xai.common_utils import XAIModelInfo # PublicAI now uses JSON-based configuration (see litellm/llms/openai_like/providers.json) # All remaining configs are now lazy loaded - see _lazy_imports_registry.py # Import LlmProviders here (before main import) because it's imported during import time # in multiple places including openai.py (via main import) from litellm.types.utils import LlmProviders ## Lazy loading this is not straightforward, will leave it here for now. from .main import * # type: ignore # Skills API from .skills.main import ( create_skill, acreate_skill, list_skills, alist_skills, get_skill, aget_skill, delete_skill, adelete_skill, ) from .evals.main import ( create_eval, acreate_eval, list_evals, alist_evals, get_eval, aget_eval, delete_eval, adelete_eval, cancel_eval, acancel_eval, create_run, acreate_run, list_runs, alist_runs, get_run, aget_run, delete_run, adelete_run, cancel_run, acancel_run, ) from .integrations import * from .llms.custom_httpx.async_client_cleanup import close_litellm_async_clients from .exceptions import ( AuthenticationError, InvalidRequestError, BadRequestError, ImageFetchError, NotFoundError, PermissionDeniedError, RateLimitError, ServiceUnavailableError, BadGatewayError, OpenAIError, ContextWindowExceededError, ContentPolicyViolationError, BudgetExceededError, APIError, Timeout, APIConnectionError, UnsupportedParamsError, APIResponseValidationError, UnprocessableEntityError, InternalServerError, JSONSchemaValidationError, LITELLM_EXCEPTION_TYPES, MockException, ) from .budget_manager import BudgetManager from .proxy.proxy_cli import run_server from .router import Router from .assistants.main import * from .batches.main import * from .images.main import * from .videos.main import * from .batch_completion.main import * # type: ignore from .rerank_api.main import * from .llms.anthropic.experimental_pass_through.messages.handler import * from .responses.main import * # Interactions API is available as litellm.interactions module # Usage: litellm.interactions.create(), litellm.interactions.get(), etc. from . import interactions from .skills.main import ( create_skill, acreate_skill, list_skills, alist_skills, get_skill, aget_skill, delete_skill, adelete_skill, ) from .containers.main import * from .ocr.main import * from .rag.main import * from .search.main import * from .realtime_api.main import ( _arealtime, acreate_realtime_client_secret, arealtime_calls, ) from .responses.main import _aresponses_websocket from .fine_tuning.main import * from .files.main import * from .vector_store_files.main import ( acreate as avector_store_file_create, adelete as avector_store_file_delete, alist as avector_store_file_list, aretrieve as avector_store_file_retrieve, aretrieve_content as avector_store_file_content, aupdate as avector_store_file_update, create as vector_store_file_create, delete as vector_store_file_delete, list as vector_store_file_list, retrieve as vector_store_file_retrieve, retrieve_content as vector_store_file_content, update as vector_store_file_update, ) from .scheduler import * ### ADAPTERS ### from .types.adapter import AdapterItem import litellm.anthropic_interface as anthropic adapters: List[AdapterItem] = [] ### Vector Store Registry ### from .vector_stores.vector_store_registry import ( VectorStoreRegistry, VectorStoreIndexRegistry, ) vector_store_registry: Optional[VectorStoreRegistry] = None vector_store_index_registry: Optional[VectorStoreIndexRegistry] = None ### RAG ### from . import rag ### CUSTOM LLMs ### from .types.llms.custom_llm import CustomLLMItem custom_provider_map: List[CustomLLMItem] = [] _custom_providers: List[ str ] = [] # internal helper util, used to track names of custom providers disable_hf_tokenizer_download: Optional[ bool ] = None # disable huggingface tokenizer download. Defaults to openai clk100 global_disable_no_log_param: bool = False ### CLI UTILITIES ### from litellm.litellm_core_utils.cli_token_utils import get_litellm_gateway_api_key ### PASSTHROUGH ### from .passthrough import allm_passthrough_route, llm_passthrough_route from .google_genai import agenerate_content ### GLOBAL CONFIG ### global_bitbucket_config: Optional[Dict[str, Any]] = None def set_global_bitbucket_config(config: Dict[str, Any]) -> None: """Set global BitBucket configuration for prompt management.""" global global_bitbucket_config global_bitbucket_config = config ### GLOBAL CONFIG ### global_gitlab_config: Optional[Dict[str, Any]] = None def set_global_gitlab_config(config: Dict[str, Any]) -> None: """Set global BitBucket configuration for prompt management.""" global global_gitlab_config global_gitlab_config = config # Lazy loading system for heavy modules to reduce initial import time and memory usage if TYPE_CHECKING: from litellm.types.utils import ModelInfo as _ModelInfoType from litellm.types.utils import PriorityReservationSettings from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler from litellm.caching.caching import Cache # Type stubs for lazy-loaded configs to help mypy from .llms.bedrock.chat.converse_transformation import ( AmazonConverseConfig as AmazonConverseConfig, ) from .llms.openai_like.chat.handler import ( OpenAILikeChatConfig as OpenAILikeChatConfig, ) from .llms.galadriel.chat.transformation import ( GaladrielChatConfig as GaladrielChatConfig, ) from .llms.github.chat.transformation import GithubChatConfig as GithubChatConfig from .llms.azure_ai.anthropic.transformation import ( AzureAnthropicConfig as AzureAnthropicConfig, ) from .llms.bytez.chat.transformation import BytezChatConfig as BytezChatConfig from .llms.compactifai.chat.transformation import ( CompactifAIChatConfig as CompactifAIChatConfig, ) from .llms.empower.chat.transformation import EmpowerChatConfig as EmpowerChatConfig from .llms.minimax.chat.transformation import MinimaxChatConfig as MinimaxChatConfig from .llms.aiohttp_openai.chat.transformation import ( AiohttpOpenAIChatConfig as AiohttpOpenAIChatConfig, ) from .llms.huggingface.chat.transformation import ( HuggingFaceChatConfig as HuggingFaceChatConfig, ) from .llms.huggingface.embedding.transformation import ( HuggingFaceEmbeddingConfig as HuggingFaceEmbeddingConfig, ) from .llms.oobabooga.chat.transformation import OobaboogaConfig as OobaboogaConfig from .llms.maritalk import MaritalkConfig as MaritalkConfig from .llms.openrouter.chat.transformation import ( OpenrouterConfig as OpenrouterConfig, ) from .llms.datarobot.chat.transformation import DataRobotConfig as DataRobotConfig from .llms.anthropic.chat.transformation import AnthropicConfig as AnthropicConfig from .llms.anthropic.completion.transformation import ( AnthropicTextConfig as AnthropicTextConfig, ) from .llms.groq.stt.transformation import GroqSTTConfig as GroqSTTConfig from .llms.triton.completion.transformation import TritonConfig as TritonConfig from .llms.triton.completion.transformation import ( TritonGenerateConfig as TritonGenerateConfig, ) from .llms.triton.completion.transformation import ( TritonInferConfig as TritonInferConfig, ) from .llms.triton.embedding.transformation import ( TritonEmbeddingConfig as TritonEmbeddingConfig, ) from .llms.huggingface.rerank.transformation import ( HuggingFaceRerankConfig as HuggingFaceRerankConfig, ) from .llms.databricks.chat.transformation import ( DatabricksConfig as DatabricksConfig, ) from .llms.databricks.embed.transformation import ( DatabricksEmbeddingConfig as DatabricksEmbeddingConfig, ) from .llms.predibase.chat.transformation import PredibaseConfig as PredibaseConfig from .llms.replicate.chat.transformation import ReplicateConfig as ReplicateConfig from .llms.snowflake.chat.transformation import SnowflakeConfig as SnowflakeConfig from .llms.cohere.rerank.transformation import ( CohereRerankConfig as CohereRerankConfig, ) from .llms.cohere.rerank_v2.transformation import ( CohereRerankV2Config as CohereRerankV2Config, ) from .llms.azure_ai.rerank.transformation import ( AzureAIRerankConfig as AzureAIRerankConfig, ) from .llms.infinity.rerank.transformation import ( InfinityRerankConfig as InfinityRerankConfig, ) from .llms.jina_ai.rerank.transformation import ( JinaAIRerankConfig as JinaAIRerankConfig, ) from .llms.deepinfra.rerank.transformation import ( DeepinfraRerankConfig as DeepinfraRerankConfig, ) from .llms.hosted_vllm.rerank.transformation import ( HostedVLLMRerankConfig as HostedVLLMRerankConfig, ) from .llms.nvidia_nim.rerank.transformation import ( NvidiaNimRerankConfig as NvidiaNimRerankConfig, ) from .llms.nvidia_nim.rerank.ranking_transformation import ( NvidiaNimRankingConfig as NvidiaNimRankingConfig, ) from .llms.vertex_ai.rerank.transformation import ( VertexAIRerankConfig as VertexAIRerankConfig, ) from .llms.fireworks_ai.rerank.transformation import ( FireworksAIRerankConfig as FireworksAIRerankConfig, ) from .llms.voyage.rerank.transformation import ( VoyageRerankConfig as VoyageRerankConfig, ) from .llms.watsonx.rerank.transformation import ( IBMWatsonXRerankConfig as IBMWatsonXRerankConfig, ) from .llms.clarifai.chat.transformation import ClarifaiConfig as ClarifaiConfig from .llms.ai21.chat.transformation import AI21ChatConfig as AI21ChatConfig from .llms.meta_llama.chat.transformation import LlamaAPIConfig as LlamaAPIConfig from .llms.together_ai.completion.transformation import ( TogetherAITextCompletionConfig as TogetherAITextCompletionConfig, ) from .llms.cloudflare.chat.transformation import ( CloudflareChatConfig as CloudflareChatConfig, ) from .llms.novita.chat.transformation import NovitaConfig as NovitaConfig from .llms.petals.completion.transformation import PetalsConfig as PetalsConfig from .llms.ollama.chat.transformation import OllamaChatConfig as OllamaChatConfig from .llms.ollama.completion.transformation import OllamaConfig as OllamaConfig from .llms.sagemaker.completion.transformation import ( SagemakerConfig as SagemakerConfig, ) from .llms.sagemaker.chat.transformation import ( SagemakerChatConfig as SagemakerChatConfig, ) from .llms.cohere.chat.transformation import CohereChatConfig as CohereChatConfig from .llms.anthropic.experimental_pass_through.messages.transformation import ( AnthropicMessagesConfig as AnthropicMessagesConfig, ) from .llms.bedrock.messages.invoke_transformations.anthropic_claude3_transformation import ( AmazonAnthropicClaudeMessagesConfig as AmazonAnthropicClaudeMessagesConfig, ) from .llms.together_ai.chat import TogetherAIConfig as TogetherAIConfig from .llms.nlp_cloud.chat.handler import NLPCloudConfig as NLPCloudConfig from .llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini import ( VertexGeminiConfig as VertexGeminiConfig, ) from .llms.gemini.chat.transformation import ( GoogleAIStudioGeminiConfig as GoogleAIStudioGeminiConfig, ) from .llms.vertex_ai.vertex_ai_partner_models.anthropic.transformation import ( VertexAIAnthropicConfig as VertexAIAnthropicConfig, ) from .llms.vertex_ai.vertex_ai_partner_models.llama3.transformation import ( VertexAILlama3Config as VertexAILlama3Config, ) from .llms.vertex_ai.vertex_ai_partner_models.ai21.transformation import ( VertexAIAi21Config as VertexAIAi21Config, ) from .llms.bedrock.chat.invoke_handler import ( AmazonCohereChatConfig as AmazonCohereChatConfig, ) from .llms.bedrock.common_utils import ( AmazonBedrockGlobalConfig as AmazonBedrockGlobalConfig, ) from .llms.bedrock.chat.invoke_transformations.amazon_ai21_transformation import ( AmazonAI21Config as AmazonAI21Config, ) from .llms.bedrock.chat.invoke_transformations.amazon_nova_transformation import ( AmazonInvokeNovaConfig as AmazonInvokeNovaConfig, ) from .llms.bedrock.chat.invoke_transformations.amazon_qwen2_transformation import ( AmazonQwen2Config as AmazonQwen2Config, ) from .llms.bedrock.chat.invoke_transformations.amazon_qwen3_transformation import ( AmazonQwen3Config as AmazonQwen3Config, ) from .llms.bedrock.chat.invoke_transformations.anthropic_claude2_transformation import ( AmazonAnthropicConfig as AmazonAnthropicConfig, ) from .llms.bedrock.chat.invoke_transformations.anthropic_claude3_transformation import ( AmazonAnthropicClaudeConfig as AmazonAnthropicClaudeConfig, ) from .llms.bedrock.chat.invoke_transformations.amazon_cohere_transformation import ( AmazonCohereConfig as AmazonCohereConfig, ) from .llms.bedrock.chat.invoke_transformations.amazon_llama_transformation import ( AmazonLlamaConfig as AmazonLlamaConfig, ) from .llms.bedrock.chat.invoke_transformations.amazon_deepseek_transformation import ( AmazonDeepSeekR1Config as AmazonDeepSeekR1Config, ) from .llms.bedrock.chat.invoke_transformations.amazon_mistral_transformation import ( AmazonMistralConfig as AmazonMistralConfig, ) from .llms.bedrock.chat.invoke_transformations.amazon_moonshot_transformation import ( AmazonMoonshotConfig as AmazonMoonshotConfig, ) from .llms.bedrock.chat.invoke_transformations.amazon_titan_transformation import ( AmazonTitanConfig as AmazonTitanConfig, ) from .llms.bedrock.chat.invoke_transformations.amazon_twelvelabs_pegasus_transformation import ( AmazonTwelveLabsPegasusConfig as AmazonTwelveLabsPegasusConfig, ) from .llms.bedrock.chat.invoke_transformations.base_invoke_transformation import ( AmazonInvokeConfig as AmazonInvokeConfig, ) from .llms.bedrock.chat.invoke_transformations.amazon_openai_transformation import ( AmazonBedrockOpenAIConfig as AmazonBedrockOpenAIConfig, ) from .llms.bedrock.image_generation.amazon_stability1_transformation import ( AmazonStabilityConfig as AmazonStabilityConfig, ) from .llms.bedrock.image_generation.amazon_stability3_transformation import ( AmazonStability3Config as AmazonStability3Config, ) from .llms.bedrock.image_generation.amazon_nova_canvas_transformation import ( AmazonNovaCanvasConfig as AmazonNovaCanvasConfig, ) from .llms.bedrock.embed.amazon_titan_g1_transformation import ( AmazonTitanG1Config as AmazonTitanG1Config, ) from .llms.bedrock.embed.amazon_titan_multimodal_transformation import ( AmazonTitanMultimodalEmbeddingG1Config as AmazonTitanMultimodalEmbeddingG1Config, ) from .llms.cohere.chat.v2_transformation import ( CohereV2ChatConfig as CohereV2ChatConfig, ) from .llms.bedrock.embed.cohere_transformation import ( BedrockCohereEmbeddingConfig as BedrockCohereEmbeddingConfig, ) from .llms.bedrock.embed.twelvelabs_marengo_transformation import ( TwelveLabsMarengoEmbeddingConfig as TwelveLabsMarengoEmbeddingConfig, ) from .llms.bedrock.embed.amazon_nova_transformation import ( AmazonNovaEmbeddingConfig as AmazonNovaEmbeddingConfig, ) from .llms.openai.openai import ( OpenAIConfig as OpenAIConfig, MistralEmbeddingConfig as MistralEmbeddingConfig, ) from .llms.openai.image_variations.transformation import ( OpenAIImageVariationConfig as OpenAIImageVariationConfig, ) from .llms.deepgram.audio_transcription.transformation import ( DeepgramAudioTranscriptionConfig as DeepgramAudioTranscriptionConfig, ) from .llms.topaz.image_variations.transformation import ( TopazImageVariationConfig as TopazImageVariationConfig, ) from litellm.llms.openai.completion.transformation import ( OpenAITextCompletionConfig as OpenAITextCompletionConfig, ) from .llms.groq.chat.transformation import GroqChatConfig as GroqChatConfig from .llms.bedrock_mantle.chat.transformation import ( BedrockMantleChatConfig as BedrockMantleChatConfig, ) from .llms.a2a.chat.transformation import A2AConfig as A2AConfig from .llms.voyage.embedding.transformation import ( VoyageEmbeddingConfig as VoyageEmbeddingConfig, ) from .llms.voyage.embedding.transformation_contextual import ( VoyageContextualEmbeddingConfig as VoyageContextualEmbeddingConfig, ) from .llms.infinity.embedding.transformation import ( InfinityEmbeddingConfig as InfinityEmbeddingConfig, ) from .llms.perplexity.embedding.transformation import ( PerplexityEmbeddingConfig as PerplexityEmbeddingConfig, ) from .llms.azure_ai.chat.transformation import ( AzureAIStudioConfig as AzureAIStudioConfig, ) from .llms.mistral.chat.transformation import MistralConfig as MistralConfig from .llms.openai.responses.transformation import ( OpenAIResponsesAPIConfig as OpenAIResponsesAPIConfig, ) from .llms.azure.responses.transformation import ( AzureOpenAIResponsesAPIConfig as AzureOpenAIResponsesAPIConfig, ) from .llms.azure.responses.o_series_transformation import ( AzureOpenAIOSeriesResponsesAPIConfig as AzureOpenAIOSeriesResponsesAPIConfig, ) from .llms.xai.responses.transformation import ( XAIResponsesAPIConfig as XAIResponsesAPIConfig, ) from .llms.litellm_proxy.responses.transformation import ( LiteLLMProxyResponsesAPIConfig as LiteLLMProxyResponsesAPIConfig, ) from .llms.volcengine.responses.transformation import ( VolcEngineResponsesAPIConfig as VolcEngineResponsesAPIConfig, ) from .llms.manus.responses.transformation import ( ManusResponsesAPIConfig as ManusResponsesAPIConfig, ) from .llms.perplexity.responses.transformation import ( PerplexityResponsesConfig as PerplexityResponsesConfig, ) from .llms.databricks.responses.transformation import ( DatabricksResponsesAPIConfig as DatabricksResponsesAPIConfig, ) from .llms.openrouter.responses.transformation import ( OpenRouterResponsesAPIConfig as OpenRouterResponsesAPIConfig, ) from .llms.gemini.interactions.transformation import ( GoogleAIStudioInteractionsConfig as GoogleAIStudioInteractionsConfig, ) from .llms.openai.chat.o_series_transformation import ( OpenAIOSeriesConfig as OpenAIOSeriesConfig, OpenAIOSeriesConfig as OpenAIO1Config, ) from .llms.anthropic.skills.transformation import ( AnthropicSkillsConfig as AnthropicSkillsConfig, ) from .llms.base_llm.skills.transformation import ( BaseSkillsAPIConfig as BaseSkillsAPIConfig, ) from .llms.gradient_ai.chat.transformation import ( GradientAIConfig as GradientAIConfig, ) from .llms.openai.chat.gpt_transformation import OpenAIGPTConfig as OpenAIGPTConfig from .llms.openai.chat.gpt_5_transformation import ( OpenAIGPT5Config as OpenAIGPT5Config, ) from .llms.openai.transcriptions.whisper_transformation import ( OpenAIWhisperAudioTranscriptionConfig as OpenAIWhisperAudioTranscriptionConfig, ) from .llms.openai.transcriptions.gpt_transformation import ( OpenAIGPTAudioTranscriptionConfig as OpenAIGPTAudioTranscriptionConfig, ) from .llms.openai.chat.gpt_audio_transformation import ( OpenAIGPTAudioConfig as OpenAIGPTAudioConfig, ) from .llms.nvidia_nim.chat.transformation import NvidiaNimConfig as NvidiaNimConfig from .llms.nvidia_nim.embed import ( NvidiaNimEmbeddingConfig as NvidiaNimEmbeddingConfig, ) # Type stubs for lazy-loaded config instances openaiOSeriesConfig: OpenAIOSeriesConfig openAIGPTConfig: OpenAIGPTConfig openAIGPTAudioConfig: OpenAIGPTAudioConfig openAIGPT5Config: OpenAIGPT5Config nvidiaNimConfig: NvidiaNimConfig nvidiaNimEmbeddingConfig: NvidiaNimEmbeddingConfig # Import config classes that need type stubs (for mypy) - import with _ prefix to avoid circular reference from .llms.vllm.completion.transformation import VLLMConfig as _VLLMConfig from .llms.deepseek.chat.transformation import ( DeepSeekChatConfig as _DeepSeekChatConfig, ) from .llms.sap.chat.transformation import ( GenAIHubOrchestrationConfig as _GenAIHubOrchestrationConfig, ) from .llms.sap.embed.transformation import ( GenAIHubEmbeddingConfig as _GenAIHubEmbeddingConfig, ) from .llms.azure.chat.o_series_transformation import ( AzureOpenAIO1Config as _AzureOpenAIO1Config, ) from .llms.perplexity.chat.transformation import ( PerplexityChatConfig as _PerplexityChatConfig, ) from .llms.nscale.chat.transformation import NscaleConfig as _NscaleConfig from .llms.watsonx.chat.transformation import ( IBMWatsonXChatConfig as _IBMWatsonXChatConfig, ) from .llms.watsonx.completion.transformation import ( IBMWatsonXAIConfig as _IBMWatsonXAIConfig, ) from .llms.litellm_proxy.chat.transformation import ( LiteLLMProxyChatConfig as _LiteLLMProxyChatConfig, ) from .llms.deepinfra.chat.transformation import DeepInfraConfig as _DeepInfraConfig from .llms.llamafile.chat.transformation import ( LlamafileChatConfig as _LlamafileChatConfig, ) from .llms.lm_studio.chat.transformation import ( LMStudioChatConfig as _LMStudioChatConfig, ) from .llms.lm_studio.embed.transformation import ( LmStudioEmbeddingConfig as _LmStudioEmbeddingConfig, ) from .llms.watsonx.embed.transformation import ( IBMWatsonXEmbeddingConfig as _IBMWatsonXEmbeddingConfig, ) from .llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini import ( VertexGeminiConfig as _VertexGeminiConfig, ) # Type stubs for lazy-loaded config classes (to help mypy understand types) VLLMConfig: Type[_VLLMConfig] DeepSeekChatConfig: Type[_DeepSeekChatConfig] GenAIHubOrchestrationConfig: Type[_GenAIHubOrchestrationConfig] GenAIHubEmbeddingConfig: Type[_GenAIHubEmbeddingConfig] AzureOpenAIO1Config: Type[_AzureOpenAIO1Config] PerplexityChatConfig: Type[_PerplexityChatConfig] NscaleConfig: Type[_NscaleConfig] IBMWatsonXChatConfig: Type[_IBMWatsonXChatConfig] IBMWatsonXAIConfig: Type[_IBMWatsonXAIConfig] LiteLLMProxyChatConfig: Type[_LiteLLMProxyChatConfig] DeepInfraConfig: Type[_DeepInfraConfig] LlamafileChatConfig: Type[_LlamafileChatConfig] LMStudioChatConfig: Type[_LMStudioChatConfig] LmStudioEmbeddingConfig: Type[_LmStudioEmbeddingConfig] IBMWatsonXEmbeddingConfig: Type[_IBMWatsonXEmbeddingConfig] VertexAIConfig: Type[_VertexGeminiConfig] # Alias for VertexGeminiConfig from .llms.featherless_ai.chat.transformation import ( FeatherlessAIConfig as FeatherlessAIConfig, ) from .llms.cerebras.chat import CerebrasConfig as CerebrasConfig from .llms.baseten.chat import BasetenConfig as BasetenConfig from .llms.sambanova.chat import SambanovaConfig as SambanovaConfig from .llms.sambanova.embedding.transformation import ( SambaNovaEmbeddingConfig as SambaNovaEmbeddingConfig, ) from .llms.fireworks_ai.chat.transformation import ( FireworksAIConfig as FireworksAIConfig, ) from .llms.fireworks_ai.completion.transformation import ( FireworksAITextCompletionConfig as FireworksAITextCompletionConfig, ) from .llms.fireworks_ai.audio_transcription.transformation import ( FireworksAIAudioTranscriptionConfig as FireworksAIAudioTranscriptionConfig, ) from .llms.fireworks_ai.embed.fireworks_ai_transformation import ( FireworksAIEmbeddingConfig as FireworksAIEmbeddingConfig, ) from .llms.friendliai.chat.transformation import ( FriendliaiChatConfig as FriendliaiChatConfig, ) from .llms.jina_ai.embedding.transformation import ( JinaAIEmbeddingConfig as JinaAIEmbeddingConfig, ) from .llms.xai.chat.transformation import XAIChatConfig as XAIChatConfig from .llms.zai.chat.transformation import ZAIChatConfig as ZAIChatConfig from .llms.aiml.chat.transformation import AIMLChatConfig as AIMLChatConfig from .llms.volcengine.chat.transformation import ( VolcEngineChatConfig as VolcEngineChatConfig, VolcEngineChatConfig as VolcEngineConfig, ) from .llms.codestral.completion.transformation import ( CodestralTextCompletionConfig as CodestralTextCompletionConfig, ) from .llms.azure.azure import ( AzureOpenAIAssistantsAPIConfig as AzureOpenAIAssistantsAPIConfig, ) from .llms.heroku.chat.transformation import HerokuChatConfig as HerokuChatConfig from .llms.cometapi.chat.transformation import CometAPIConfig as CometAPIConfig from .llms.azure.chat.gpt_transformation import ( AzureOpenAIConfig as AzureOpenAIConfig, ) from .llms.azure.chat.gpt_5_transformation import ( AzureOpenAIGPT5Config as AzureOpenAIGPT5Config, ) from .llms.azure.completion.transformation import ( AzureOpenAITextConfig as AzureOpenAITextConfig, ) from .llms.hosted_vllm.chat.transformation import ( HostedVLLMChatConfig as HostedVLLMChatConfig, ) from .llms.hosted_vllm.embedding.transformation import ( HostedVLLMEmbeddingConfig as HostedVLLMEmbeddingConfig, ) from .llms.hosted_vllm.responses.transformation import ( HostedVLLMResponsesAPIConfig as HostedVLLMResponsesAPIConfig, ) from .llms.github_copilot.chat.transformation import ( GithubCopilotConfig as GithubCopilotConfig, ) from .llms.github_copilot.responses.transformation import ( GithubCopilotResponsesAPIConfig as GithubCopilotResponsesAPIConfig, ) from .llms.github_copilot.embedding.transformation import ( GithubCopilotEmbeddingConfig as GithubCopilotEmbeddingConfig, ) from .llms.chatgpt.chat.transformation import ChatGPTConfig as ChatGPTConfig from .llms.chatgpt.responses.transformation import ( ChatGPTResponsesAPIConfig as ChatGPTResponsesAPIConfig, ) from .llms.gigachat.chat.transformation import GigaChatConfig as GigaChatConfig from .llms.gigachat.embedding.transformation import ( GigaChatEmbeddingConfig as GigaChatEmbeddingConfig, ) from .llms.nebius.chat.transformation import NebiusConfig as NebiusConfig from .llms.wandb.chat.transformation import WandbConfig as WandbConfig from .llms.dashscope.chat.transformation import ( DashScopeChatConfig as DashScopeChatConfig, ) from .llms.moonshot.chat.transformation import ( MoonshotChatConfig as MoonshotChatConfig, ) from .llms.docker_model_runner.chat.transformation import ( DockerModelRunnerChatConfig as DockerModelRunnerChatConfig, ) from .llms.v0.chat.transformation import V0ChatConfig as V0ChatConfig from .llms.oci.chat.transformation import OCIChatConfig as OCIChatConfig from .llms.morph.chat.transformation import MorphChatConfig as MorphChatConfig from .llms.ragflow.chat.transformation import RAGFlowConfig as RAGFlowConfig from .llms.lambda_ai.chat.transformation import ( LambdaAIChatConfig as LambdaAIChatConfig, ) from .llms.hyperbolic.chat.transformation import ( HyperbolicChatConfig as HyperbolicChatConfig, ) from .llms.vercel_ai_gateway.chat.transformation import ( VercelAIGatewayConfig as VercelAIGatewayConfig, ) from .llms.ovhcloud.chat.transformation import ( OVHCloudChatConfig as OVHCloudChatConfig, ) from .llms.ovhcloud.embedding.transformation import ( OVHCloudEmbeddingConfig as OVHCloudEmbeddingConfig, ) from .llms.cometapi.embed.transformation import ( CometAPIEmbeddingConfig as CometAPIEmbeddingConfig, ) from .llms.lemonade.chat.transformation import ( LemonadeChatConfig as LemonadeChatConfig, ) from .llms.snowflake.embedding.transformation import ( SnowflakeEmbeddingConfig as SnowflakeEmbeddingConfig, ) from .llms.amazon_nova.chat.transformation import ( AmazonNovaChatConfig as AmazonNovaChatConfig, ) from litellm.caching.llm_caching_handler import LLMClientCache from litellm.types.llms.bedrock import COHERE_EMBEDDING_INPUT_TYPES from litellm.types.utils import ( BudgetConfig, CredentialItem, PriorityReservationDict, StandardKeyGenerationConfig, ) from litellm.types.guardrails import GuardrailItem from litellm.types.proxy.management_endpoints.ui_sso import ( DefaultTeamSSOParams, LiteLLM_UpperboundKeyGenerateParams, ) # Cost calculator functions cost_per_token: Callable[..., Tuple[float, float]] completion_cost: Callable[..., float] response_cost_calculator: Any modify_integration: Any # Utils functions - type stubs for truly lazy loaded functions only # (functions NOT imported via "from .main import *") get_response_string: Callable[..., str] supports_function_calling: Callable[..., bool] supports_web_search: Callable[..., bool] supports_url_context: Callable[..., bool] supports_response_schema: Callable[..., bool] supports_parallel_function_calling: Callable[..., bool] supports_vision: Callable[..., bool] supports_audio_input: Callable[..., bool] supports_audio_output: Callable[..., bool] supports_system_messages: Callable[..., bool] supports_reasoning: Callable[..., bool] acreate: Callable[..., Any] get_max_tokens: Callable[..., int] get_model_info: Callable[..., _ModelInfoType] # type: ignore[no-redef] register_prompt_template: Callable[..., None] validate_environment: Callable[..., dict] check_valid_key: Callable[..., bool] register_model: Callable[..., None] encode: Callable[..., list] decode: Callable[..., str] _calculate_retry_after: Callable[..., float] _should_retry: Callable[..., bool] get_supported_openai_params: Callable[..., Optional[list]] get_api_base: Callable[..., Optional[str]] get_first_chars_messages: Callable[..., str] get_provider_fields: Callable[..., List] get_valid_models: Callable[..., list] remove_index_from_tool_calls: Callable[..., None] # Response types - truly lazy loaded only (not in main.py or elsewhere) ModelResponseListIterator: Type[Any] # HTTP handler singletons (created lazily via __getattr__ at runtime) module_level_aclient: AsyncHTTPHandler module_level_client: HTTPHandler # Bedrock tool name mappings instance (lazy-loaded) from litellm.caching.caching import InMemoryCache bedrock_tool_name_mappings: InMemoryCache # Azure exception class (lazy-loaded) from litellm.llms.azure.common_utils import AzureOpenAIError # Secret manager types (lazy-loaded) from litellm.types.secret_managers.main import ( KeyManagementSystem, KeyManagementSettings, # Not lazy-loaded - needed for _key_management_settings initialization ) # Custom logger class (lazy-loaded) from litellm.integrations.custom_logger import CustomLogger # Datadog LLM observability params (lazy-loaded) from litellm.types.integrations.datadog_llm_obs import DatadogLLMObsInitParams # Logging callback manager class and instance (lazy-loaded) from litellm.litellm_core_utils.logging_callback_manager import ( LoggingCallbackManager, ) logging_callback_manager: LoggingCallbackManager # provider_list is lazy-loaded from litellm.types.utils import LlmProviders provider_list: List[Union[LlmProviders, str]] # Note: AmazonConverseConfig and OpenAILikeChatConfig are imported above in TYPE_CHECKING block # Track if async client cleanup has been registered (for lazy loading) _async_client_cleanup_registered = False # Eager loading for backwards compatibility with VCR and other HTTP recording tools # When LITELLM_DISABLE_LAZY_LOADING is set, lazy-loaded attributes are loaded at import time # For now, this only affects encoding (tiktoken) as it was the only reported issue # See: https://github.com/BerriAI/litellm/issues/18659 # This ensures encoding is initialized before VCR starts recording HTTP requests if os.getenv("LITELLM_DISABLE_LAZY_LOADING", "").lower() in ("1", "true", "yes", "on"): # Load encoding at import time (pre-#18070 behavior) # This ensures encoding is initialized before VCR starts recording from .main import encoding def __getattr__(name: str) -> Any: """Lazy import handler with cached registry for improved performance.""" global _async_client_cleanup_registered # Register async client cleanup on first access (only once) if not _async_client_cleanup_registered: from litellm.llms.custom_httpx.async_client_cleanup import ( register_async_client_cleanup, ) register_async_client_cleanup() _async_client_cleanup_registered = True # Use cached registry from _lazy_imports instead of importing tuples every time from ._lazy_imports import _get_lazy_import_registry registry = _get_lazy_import_registry() # Check if name is in registry and call the cached handler function if name in registry: handler_func = registry[name] return handler_func(name) # Lazy load encoding from main.py to avoid heavy tiktoken import if name == "encoding": from ._lazy_imports import _get_litellm_globals _globals = _get_litellm_globals() # Check if already cached if "encoding" not in _globals: from .main import encoding as _encoding _globals["encoding"] = _encoding return _globals["encoding"] # Lazy load bedrock_tool_name_mappings instance if name == "bedrock_tool_name_mappings": from ._lazy_imports import _get_litellm_globals _globals = _get_litellm_globals() # Check if already cached if "bedrock_tool_name_mappings" not in _globals: from .llms.bedrock.chat.invoke_handler import ( bedrock_tool_name_mappings as _bedrock_tool_name_mappings, ) _globals["bedrock_tool_name_mappings"] = _bedrock_tool_name_mappings return _globals["bedrock_tool_name_mappings"] # Lazy load AzureOpenAIError exception class if name == "AzureOpenAIError": from ._lazy_imports import _get_litellm_globals _globals = _get_litellm_globals() # Check if already cached if "AzureOpenAIError" not in _globals: from .llms.azure.common_utils import AzureOpenAIError as _AzureOpenAIError _globals["AzureOpenAIError"] = _AzureOpenAIError return _globals["AzureOpenAIError"] # Lazy load openaiOSeriesConfig instance if name == "openaiOSeriesConfig": from ._lazy_imports import _get_litellm_globals _globals = _get_litellm_globals() if "openaiOSeriesConfig" not in _globals: # Import the config class and instantiate it config_class = __getattr__("OpenAIOSeriesConfig") _globals["openaiOSeriesConfig"] = config_class() return _globals["openaiOSeriesConfig"] # Lazy load other config instances _config_instances = { "openAIGPTConfig": "OpenAIGPTConfig", "openAIGPTAudioConfig": "OpenAIGPTAudioConfig", "openAIGPT5Config": "OpenAIGPT5Config", "nvidiaNimConfig": "NvidiaNimConfig", "nvidiaNimEmbeddingConfig": "NvidiaNimEmbeddingConfig", } if name in _config_instances: from ._lazy_imports import _get_litellm_globals _globals = _get_litellm_globals() if name not in _globals: # Import the config class and instantiate it config_class = __getattr__(_config_instances[name]) _globals[name] = config_class() return _globals[name] # Handle OpenAIO1Config alias if name == "OpenAIO1Config": return __getattr__("OpenAIOSeriesConfig") # Lazy load provider_list if name == "provider_list": from ._lazy_imports import _get_litellm_globals _globals = _get_litellm_globals() # Check if already cached if "provider_list" not in _globals: # LlmProviders is eagerly imported above, so we can import it directly from litellm.types.utils import LlmProviders _globals["provider_list"] = list(LlmProviders) return _globals["provider_list"] # Lazy load priority_reservation_settings instance if name == "priority_reservation_settings": from ._lazy_imports import _get_litellm_globals _globals = _get_litellm_globals() # Check if already cached if "priority_reservation_settings" not in _globals: # Import the class and instantiate it PriorityReservationSettings = __getattr__("PriorityReservationSettings") _globals["priority_reservation_settings"] = PriorityReservationSettings() return _globals["priority_reservation_settings"] # Lazy load logging_callback_manager instance if name == "logging_callback_manager": from ._lazy_imports import _get_litellm_globals _globals = _get_litellm_globals() # Check if already cached if "logging_callback_manager" not in _globals: # Import the class and instantiate it LoggingCallbackManager = __getattr__("LoggingCallbackManager") _globals["logging_callback_manager"] = LoggingCallbackManager() return _globals["logging_callback_manager"] # Lazy load _service_logger module if name == "_service_logger": from ._lazy_imports import _get_litellm_globals _globals = _get_litellm_globals() # Check if already cached if "_service_logger" not in _globals: # Import the module lazily import litellm._service_logger _globals["_service_logger"] = litellm._service_logger return _globals["_service_logger"] # Lazy load evals module functions if name in [ "acreate_eval", "alist_evals", "aget_eval", "aupdate_eval", "adelete_eval", "acancel_eval", "create_eval", "list_evals", "get_eval", "update_eval", "delete_eval", "cancel_eval", "acreate_run", "alist_runs", "aget_run", "acancel_run", "adelete_run", "create_run", "list_runs", "get_run", "cancel_run", "delete_run", ]: from litellm.evals.main import ( acreate_eval, alist_evals, aget_eval, aupdate_eval, adelete_eval, acancel_eval, create_eval, list_evals, get_eval, update_eval, delete_eval, cancel_eval, acreate_run, alist_runs, aget_run, acancel_run, adelete_run, create_run, list_runs, get_run, cancel_run, delete_run, ) return locals()[name] raise AttributeError(f"module {__name__!r} has no attribute {name!r}") # ALL_LITELLM_RESPONSE_TYPES is lazy-loaded via __getattr__ to avoid loading utils at import time