Files
lijiaoqiao/llm-gateway-competitors/litellm-wheel-src/litellm/realtime_api/main.py
2026-03-26 20:06:14 +08:00

537 lines
19 KiB
Python

"""Abstraction function for OpenAI's realtime API"""
import os
from typing import Any, Dict, Optional, cast
import litellm
from litellm.constants import REALTIME_WEBSOCKET_MAX_MESSAGE_SIZE_BYTES, request_timeout
from litellm.litellm_core_utils.get_llm_provider_logic import get_llm_provider
from litellm.llms.base_llm.realtime.transformation import BaseRealtimeConfig
from litellm.llms.custom_httpx.llm_http_handler import BaseLLMHTTPHandler
from litellm.secret_managers.main import get_secret_str
from litellm.types.realtime import (
RealtimeClientSecretRequest,
RealtimeExpiresAfter,
RealtimeQueryParams,
RealtimeSessionConfig,
)
from litellm.types.router import GenericLiteLLMParams
from litellm.types.utils import LlmProviders
from litellm.utils import ProviderConfigManager
from ..litellm_core_utils.get_litellm_params import get_litellm_params
from ..litellm_core_utils.litellm_logging import Logging as LiteLLMLogging
from ..llms.azure.realtime.handler import AzureOpenAIRealtime
from ..llms.bedrock.realtime.handler import BedrockRealtime
from ..llms.custom_httpx.http_handler import get_shared_realtime_ssl_context
from ..llms.openai.realtime.handler import OpenAIRealtime
from ..llms.vertex_ai.realtime.transformation import VertexAIRealtimeConfig
from ..llms.vertex_ai.vertex_llm_base import VertexBase
from ..llms.xai.realtime.handler import XAIRealtime
from ..utils import client as wrapper_client
azure_realtime = AzureOpenAIRealtime()
openai_realtime = OpenAIRealtime()
bedrock_realtime = BedrockRealtime()
xai_realtime = XAIRealtime()
vertex_llm_base = VertexBase()
base_llm_http_handler = BaseLLMHTTPHandler()
def _build_litellm_metadata(kwargs: dict) -> dict:
"""Build the litellm_metadata dict for guardrail checking (internal only, not forwarded to provider)."""
metadata: dict = {**(kwargs.get("litellm_metadata") or {})}
guardrails = (
(kwargs.get("metadata") or {}).get("guardrails")
or kwargs.get("guardrails")
or []
)
if guardrails:
metadata["guardrails"] = guardrails
return metadata
def _get_realtime_http_provider_config(
custom_llm_provider: str,
dynamic_api_base: Optional[str],
dynamic_api_key: Optional[str],
litellm_params: GenericLiteLLMParams,
) -> tuple[Any, str, str]:
"""
Return (provider_config, resolved_api_base, resolved_api_key) for the
realtime HTTP endpoints (client_secrets / realtime_calls).
Uses ProviderConfigManager so each provider keeps its credential-resolution
and URL-construction logic in its own transformation class.
"""
from litellm.llms.base_llm.realtime.http_transformation import (
BaseRealtimeHTTPConfig,
)
provider_config: Optional[BaseRealtimeHTTPConfig] = None
if custom_llm_provider in LlmProviders._member_map_.values():
provider_config = ProviderConfigManager.get_provider_realtime_http_config(
model="",
provider=LlmProviders(custom_llm_provider),
)
raw_api_base = dynamic_api_base or litellm_params.api_base
raw_api_key = dynamic_api_key or litellm_params.api_key
if provider_config is not None:
resolved_api_base = provider_config.get_api_base(api_base=raw_api_base)
resolved_api_key = provider_config.get_api_key(api_key=raw_api_key)
else:
# Fallback for providers without a dedicated HTTP config (treated as OpenAI-compatible).
resolved_api_base = raw_api_base or litellm.api_base or "https://api.openai.com"
resolved_api_key = (
raw_api_key
or litellm.api_key
or litellm.openai_key
or get_secret_str("OPENAI_API_KEY")
or ""
)
return provider_config, resolved_api_base.rstrip("/"), resolved_api_key
@wrapper_client
async def acreate_realtime_client_secret(
model: Optional[str] = None,
session: Optional[Dict[str, Any]] = None,
expires_after: Optional[Dict[str, Any]] = None,
timeout: Optional[float] = None,
**kwargs,
):
req = RealtimeClientSecretRequest(
model=model,
session=RealtimeSessionConfig(**session) if session else None,
expires_after=RealtimeExpiresAfter(**expires_after) if expires_after else None,
)
model_name = (
(req.session.model if req.session is not None else None)
or req.model
or "gpt-4o-realtime-preview"
)
litellm_logging_obj: LiteLLMLogging = kwargs.get("litellm_logging_obj") # type: ignore
litellm_params = GenericLiteLLMParams(**kwargs)
(
model_name,
custom_llm_provider,
dynamic_api_key,
dynamic_api_base,
) = get_llm_provider(
model=model_name,
api_base=litellm_params.api_base,
api_key=litellm_params.api_key,
)
(
provider_config,
resolved_api_base,
resolved_api_key,
) = _get_realtime_http_provider_config(
custom_llm_provider=custom_llm_provider,
dynamic_api_base=dynamic_api_base,
dynamic_api_key=dynamic_api_key,
litellm_params=litellm_params,
)
litellm_logging_obj.update_environment_variables(
model=model_name,
optional_params={"expires_after": expires_after, "session": session},
litellm_params={"api_base": resolved_api_base},
custom_llm_provider=custom_llm_provider,
)
request_data = req.model_dump(exclude_none=True, exclude={"model"})
return await base_llm_http_handler.async_realtime_client_secret_handler(
api_base=resolved_api_base,
api_key=resolved_api_key,
request_data=request_data,
logging_obj=litellm_logging_obj,
timeout=timeout or request_timeout,
provider_config=provider_config,
model=model_name,
extra_headers=kwargs.get("extra_headers"),
client=kwargs.get("client"),
api_version=litellm_params.api_version,
)
@wrapper_client
async def arealtime_calls(
openai_ephemeral_key: str,
sdp_body: bytes,
model: Optional[str] = None,
session: Optional[Dict[str, Any]] = None,
timeout: Optional[float] = None,
**kwargs,
):
model_name = model or "gpt-4o-realtime-preview"
litellm_logging_obj: LiteLLMLogging = kwargs.get("litellm_logging_obj") # type: ignore
litellm_params = GenericLiteLLMParams(**kwargs)
(
model_name,
custom_llm_provider,
dynamic_api_key,
dynamic_api_base,
) = get_llm_provider(
model=model_name,
api_base=litellm_params.api_base,
api_key=litellm_params.api_key,
)
provider_config, resolved_api_base, _ = _get_realtime_http_provider_config(
custom_llm_provider=custom_llm_provider,
dynamic_api_base=dynamic_api_base,
dynamic_api_key=dynamic_api_key,
litellm_params=litellm_params,
)
litellm_logging_obj.update_environment_variables(
model=model_name,
optional_params={"realtime_calls": True, "session": session},
litellm_params={"api_base": resolved_api_base},
custom_llm_provider=custom_llm_provider,
)
return await base_llm_http_handler.async_realtime_calls_handler(
api_base=resolved_api_base,
openai_ephemeral_key=openai_ephemeral_key,
sdp_body=sdp_body,
logging_obj=litellm_logging_obj,
timeout=timeout or request_timeout,
provider_config=provider_config,
model=model_name,
session_config=session,
extra_headers=kwargs.get("extra_headers"),
client=kwargs.get("client"),
api_version=litellm_params.api_version,
)
@wrapper_client
async def _arealtime( # noqa: PLR0915
model: str,
websocket: Any, # fastapi websocket
api_base: Optional[str] = None,
api_key: Optional[str] = None,
api_version: Optional[str] = None,
azure_ad_token: Optional[str] = None,
client: Optional[Any] = None,
timeout: Optional[float] = None,
query_params: Optional[RealtimeQueryParams] = None,
**kwargs,
):
"""
Private function to handle the realtime API call.
For PROXY use only.
"""
headers = cast(Optional[dict], kwargs.get("headers"))
extra_headers = cast(Optional[dict], kwargs.get("extra_headers"))
if headers is None:
headers = {}
if extra_headers is not None:
headers.update(extra_headers)
litellm_logging_obj: LiteLLMLogging = kwargs.get("litellm_logging_obj") # type: ignore
user = kwargs.get("user", None)
litellm_params = GenericLiteLLMParams(**kwargs)
litellm_params_dict = get_litellm_params(**kwargs)
model, _custom_llm_provider, dynamic_api_key, dynamic_api_base = get_llm_provider(
model=model,
api_base=api_base,
api_key=api_key,
)
# Ensure query params use the normalized provider model (no proxy aliases).
if query_params is not None:
query_params = {**query_params, "model": model}
litellm_logging_obj.update_environment_variables(
model=model,
user=user,
optional_params={},
litellm_params=litellm_params_dict,
custom_llm_provider=_custom_llm_provider,
)
provider_config: Optional[BaseRealtimeConfig] = None
if _custom_llm_provider in LlmProviders._member_map_.values():
provider_config = ProviderConfigManager.get_provider_realtime_config(
model=model,
provider=LlmProviders(_custom_llm_provider),
)
if provider_config is not None:
await base_llm_http_handler.async_realtime(
model=model,
websocket=websocket,
logging_obj=litellm_logging_obj,
provider_config=provider_config,
api_base=api_base,
api_key=api_key,
client=client,
timeout=timeout,
headers=headers,
user_api_key_dict=kwargs.get("user_api_key_dict"),
litellm_metadata=_build_litellm_metadata(kwargs),
)
elif _custom_llm_provider == "azure":
api_base = (
dynamic_api_base
or litellm_params.api_base
or litellm.api_base
or get_secret_str("AZURE_API_BASE")
)
# set API KEY
api_key = (
dynamic_api_key
or litellm.api_key
or litellm.openai_key
or get_secret_str("AZURE_API_KEY")
)
api_version = api_version or litellm_params.api_version or "2024-10-01-preview"
realtime_protocol = (
kwargs.get("realtime_protocol")
or litellm_params.get("realtime_protocol")
or os.environ.get("LITELLM_AZURE_REALTIME_PROTOCOL")
or "beta"
)
await azure_realtime.async_realtime(
model=model,
websocket=websocket,
api_base=api_base,
api_key=api_key,
api_version=api_version,
azure_ad_token=None,
client=None,
timeout=timeout,
logging_obj=litellm_logging_obj,
realtime_protocol=realtime_protocol,
user_api_key_dict=kwargs.get("user_api_key_dict"),
litellm_metadata=_build_litellm_metadata(kwargs),
)
elif _custom_llm_provider == "openai":
api_base = (
dynamic_api_base
or litellm_params.api_base
or litellm.api_base
or "https://api.openai.com/"
)
# set API KEY
api_key = (
dynamic_api_key
or litellm.api_key
or litellm.openai_key
or get_secret_str("OPENAI_API_KEY")
)
await openai_realtime.async_realtime(
model=model,
websocket=websocket,
logging_obj=litellm_logging_obj,
api_base=api_base,
api_key=api_key,
client=None,
timeout=timeout,
query_params=query_params,
user_api_key_dict=kwargs.get("user_api_key_dict"),
litellm_metadata=_build_litellm_metadata(kwargs),
)
elif _custom_llm_provider == "bedrock":
# Extract AWS parameters from kwargs
aws_region_name = kwargs.get("aws_region_name")
aws_access_key_id = kwargs.get("aws_access_key_id")
aws_secret_access_key = kwargs.get("aws_secret_access_key")
aws_session_token = kwargs.get("aws_session_token")
aws_role_name = kwargs.get("aws_role_name")
aws_session_name = kwargs.get("aws_session_name")
aws_profile_name = kwargs.get("aws_profile_name")
aws_web_identity_token = kwargs.get("aws_web_identity_token")
aws_sts_endpoint = kwargs.get("aws_sts_endpoint")
aws_bedrock_runtime_endpoint = kwargs.get("aws_bedrock_runtime_endpoint")
aws_external_id = kwargs.get("aws_external_id")
await bedrock_realtime.async_realtime(
model=model,
websocket=websocket,
logging_obj=litellm_logging_obj,
api_base=dynamic_api_base or api_base,
api_key=dynamic_api_key or api_key,
timeout=timeout,
aws_region_name=aws_region_name,
aws_access_key_id=aws_access_key_id,
aws_secret_access_key=aws_secret_access_key,
aws_session_token=aws_session_token,
aws_role_name=aws_role_name,
aws_session_name=aws_session_name,
aws_profile_name=aws_profile_name,
aws_web_identity_token=aws_web_identity_token,
aws_sts_endpoint=aws_sts_endpoint,
aws_bedrock_runtime_endpoint=aws_bedrock_runtime_endpoint,
aws_external_id=aws_external_id,
)
elif _custom_llm_provider == "xai":
api_base = (
dynamic_api_base
or litellm_params.api_base
or get_secret_str("XAI_API_BASE")
or "https://api.x.ai/v1"
)
# set API KEY
api_key = dynamic_api_key or litellm.api_key or get_secret_str("XAI_API_KEY")
await xai_realtime.async_realtime(
model=model,
websocket=websocket,
logging_obj=litellm_logging_obj,
api_base=api_base,
api_key=api_key,
client=None,
timeout=timeout,
query_params=query_params,
user_api_key_dict=kwargs.get("user_api_key_dict"),
litellm_metadata=_build_litellm_metadata(kwargs),
)
elif _custom_llm_provider == "vertex_ai":
vertex_credentials = (
kwargs.get("vertex_credentials")
or kwargs.get("vertex_ai_credentials")
or get_secret_str("VERTEXAI_CREDENTIALS")
)
vertex_project = (
kwargs.get("vertex_project")
or kwargs.get("vertex_ai_project")
or litellm.vertex_project
or get_secret_str("VERTEXAI_PROJECT")
)
vertex_location = (
kwargs.get("vertex_location")
or kwargs.get("vertex_ai_location")
or litellm.vertex_location
or get_secret_str("VERTEXAI_LOCATION")
)
resolved_location = vertex_llm_base.get_vertex_region(
vertex_region=vertex_location, model=model
)
(
access_token,
resolved_project,
) = await vertex_llm_base._ensure_access_token_async(
credentials=vertex_credentials,
project_id=vertex_project,
custom_llm_provider="vertex_ai",
)
vertex_realtime_config = VertexAIRealtimeConfig(
access_token=access_token,
project=resolved_project,
location=resolved_location,
)
await base_llm_http_handler.async_realtime(
model=model,
websocket=websocket,
logging_obj=litellm_logging_obj,
provider_config=vertex_realtime_config,
api_base=dynamic_api_base or litellm_params.api_base,
api_key=None,
client=client,
timeout=timeout,
headers=headers,
user_api_key_dict=kwargs.get("user_api_key_dict"),
litellm_metadata=_build_litellm_metadata(kwargs),
)
else:
raise ValueError(f"Unsupported model: {model}")
async def _realtime_health_check(
model: str,
custom_llm_provider: str,
api_key: Optional[str],
api_base: Optional[str] = None,
api_version: Optional[str] = None,
realtime_protocol: Optional[str] = None,
):
"""
Health check for realtime API - tries connection to the realtime API websocket
Args:
model: str - model name
api_base: str - api base
api_version: Optional[str] - api version
api_key: str - api key
custom_llm_provider: str - custom llm provider
realtime_protocol: Optional[str] - protocol version ("GA"/"v1" for GA path, "beta"/None for beta path)
Returns:
bool - True if connection is successful, False otherwise
Raises:
Exception - if the connection is not successful
"""
import websockets
url: Optional[str] = None
if custom_llm_provider == "azure":
url = azure_realtime._construct_url(
api_base=api_base or "",
model=model,
api_version=api_version or "2024-10-01-preview",
realtime_protocol=realtime_protocol,
)
elif custom_llm_provider == "openai":
url = openai_realtime._construct_url(
api_base=api_base or "https://api.openai.com/",
query_params={"model": model},
)
elif custom_llm_provider == "xai":
url = xai_realtime._construct_url(
api_base=api_base or "https://api.x.ai/v1", query_params={"model": model}
)
elif custom_llm_provider == "vertex_ai":
vertex_location = litellm.vertex_location or get_secret_str("VERTEXAI_LOCATION")
resolved_location = vertex_llm_base.get_vertex_region(
vertex_region=vertex_location, model=model
)
(
access_token,
resolved_project,
) = await vertex_llm_base._ensure_access_token_async(
credentials=None,
project_id=litellm.vertex_project or get_secret_str("VERTEXAI_PROJECT"),
custom_llm_provider="vertex_ai",
)
vertex_realtime_config = VertexAIRealtimeConfig(
access_token=access_token,
project=resolved_project,
location=resolved_location,
)
url = vertex_realtime_config.get_complete_url(api_base=api_base, model=model)
ssl_context = get_shared_realtime_ssl_context()
headers = vertex_realtime_config.validate_environment(
headers={}, model=model, api_key=None
)
async with websockets.connect( # type: ignore
url,
additional_headers=headers,
max_size=REALTIME_WEBSOCKET_MAX_MESSAGE_SIZE_BYTES,
ssl=ssl_context,
):
return True
else:
raise ValueError(f"Unsupported model: {model}")
ssl_context = get_shared_realtime_ssl_context()
async with websockets.connect( # type: ignore
url,
additional_headers={
"api-key": api_key, # type: ignore
},
max_size=REALTIME_WEBSOCKET_MAX_MESSAGE_SIZE_BYTES,
ssl=ssl_context,
):
return True