1304 lines
47 KiB
Python
1304 lines
47 KiB
Python
|
|
import asyncio
|
|||
|
|
import os
|
|||
|
|
import ssl
|
|||
|
|
import sys
|
|||
|
|
import time
|
|||
|
|
from typing import (
|
|||
|
|
TYPE_CHECKING,
|
|||
|
|
Any,
|
|||
|
|
Callable,
|
|||
|
|
Dict,
|
|||
|
|
List,
|
|||
|
|
Mapping,
|
|||
|
|
Optional,
|
|||
|
|
Tuple,
|
|||
|
|
Union,
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
import certifi
|
|||
|
|
import httpx
|
|||
|
|
from aiohttp import ClientSession, TCPConnector
|
|||
|
|
from httpx import USE_CLIENT_DEFAULT, AsyncHTTPTransport, HTTPTransport
|
|||
|
|
from httpx._types import RequestFiles
|
|||
|
|
|
|||
|
|
import litellm
|
|||
|
|
from litellm._logging import verbose_logger
|
|||
|
|
from litellm.constants import (
|
|||
|
|
_DEFAULT_TTL_FOR_HTTPX_CLIENTS,
|
|||
|
|
AIOHTTP_CONNECTOR_LIMIT,
|
|||
|
|
AIOHTTP_CONNECTOR_LIMIT_PER_HOST,
|
|||
|
|
AIOHTTP_KEEPALIVE_TIMEOUT,
|
|||
|
|
AIOHTTP_NEEDS_CLEANUP_CLOSED,
|
|||
|
|
AIOHTTP_TTL_DNS_CACHE,
|
|||
|
|
DEFAULT_SSL_CIPHERS,
|
|||
|
|
)
|
|||
|
|
from litellm.litellm_core_utils.logging_utils import track_llm_api_timing
|
|||
|
|
from litellm.types.llms.custom_http import *
|
|||
|
|
|
|||
|
|
if TYPE_CHECKING:
|
|||
|
|
from litellm import LlmProviders
|
|||
|
|
from litellm.litellm_core_utils.litellm_logging import (
|
|||
|
|
Logging as LiteLLMLoggingObject,
|
|||
|
|
)
|
|||
|
|
from litellm.llms.custom_httpx.aiohttp_transport import LiteLLMAiohttpTransport
|
|||
|
|
else:
|
|||
|
|
LlmProviders = Any
|
|||
|
|
LiteLLMLoggingObject = Any
|
|||
|
|
LiteLLMAiohttpTransport = Any
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
from litellm._version import version
|
|||
|
|
except Exception:
|
|||
|
|
version = "0.0.0"
|
|||
|
|
|
|||
|
|
|
|||
|
|
def get_default_headers() -> dict:
|
|||
|
|
"""
|
|||
|
|
Get default headers for HTTP requests.
|
|||
|
|
|
|||
|
|
- Default: `User-Agent: litellm/{version}`
|
|||
|
|
- Override: set `LITELLM_USER_AGENT` to fully override the header value.
|
|||
|
|
"""
|
|||
|
|
user_agent = os.environ.get("LITELLM_USER_AGENT")
|
|||
|
|
if user_agent is not None:
|
|||
|
|
return {"User-Agent": user_agent}
|
|||
|
|
|
|||
|
|
return {"User-Agent": f"litellm/{version}"}
|
|||
|
|
|
|||
|
|
|
|||
|
|
# Initialize headers (User-Agent)
|
|||
|
|
headers = get_default_headers()
|
|||
|
|
|
|||
|
|
# https://www.python-httpx.org/advanced/timeouts
|
|||
|
|
_DEFAULT_TIMEOUT = httpx.Timeout(timeout=5.0, connect=5.0)
|
|||
|
|
|
|||
|
|
|
|||
|
|
def _prepare_request_data_and_content(
|
|||
|
|
data: Optional[Union[dict, str, bytes]] = None,
|
|||
|
|
content: Any = None,
|
|||
|
|
) -> Tuple[Optional[Union[dict, Mapping]], Any]:
|
|||
|
|
"""
|
|||
|
|
Helper function to route data/content parameters correctly for httpx requests
|
|||
|
|
|
|||
|
|
This prevents httpx DeprecationWarnings that cause memory leaks.
|
|||
|
|
|
|||
|
|
Background:
|
|||
|
|
- httpx shows a DeprecationWarning when you pass bytes/str to `data=`
|
|||
|
|
- It wants you to use `content=` instead for bytes/str
|
|||
|
|
- The warning itself leaks memory when triggered repeatedly
|
|||
|
|
|
|||
|
|
Solution:
|
|||
|
|
- Move bytes/str from `data=` to `content=` before calling build_request
|
|||
|
|
- Keep dicts in `data=` (that's still the correct parameter for dicts)
|
|||
|
|
|
|||
|
|
Args:
|
|||
|
|
data: Request data (can be dict, str, or bytes)
|
|||
|
|
content: Request content (raw bytes/str)
|
|||
|
|
|
|||
|
|
Returns:
|
|||
|
|
Tuple of (request_data, request_content) properly routed for httpx
|
|||
|
|
"""
|
|||
|
|
request_data = None
|
|||
|
|
request_content = content
|
|||
|
|
|
|||
|
|
if data is not None:
|
|||
|
|
if isinstance(data, (bytes, str)):
|
|||
|
|
# Bytes/strings belong in content= (only if not already provided)
|
|||
|
|
if content is None:
|
|||
|
|
request_content = data
|
|||
|
|
else:
|
|||
|
|
# dict/Mapping stays in data= parameter
|
|||
|
|
request_data = data
|
|||
|
|
|
|||
|
|
return request_data, request_content
|
|||
|
|
|
|||
|
|
|
|||
|
|
# Cache for SSL contexts to avoid creating duplicate contexts with the same configuration
|
|||
|
|
# Key: tuple of (cafile, ssl_security_level, ssl_ecdh_curve)
|
|||
|
|
# Value: ssl.SSLContext
|
|||
|
|
_ssl_context_cache: Dict[
|
|||
|
|
Tuple[Optional[str], Optional[str], Optional[str]], ssl.SSLContext
|
|||
|
|
] = {}
|
|||
|
|
|
|||
|
|
|
|||
|
|
def _create_ssl_context(
|
|||
|
|
cafile: Optional[str],
|
|||
|
|
ssl_security_level: Optional[str],
|
|||
|
|
ssl_ecdh_curve: Optional[str],
|
|||
|
|
) -> ssl.SSLContext:
|
|||
|
|
"""
|
|||
|
|
Create an SSL context with the given configuration.
|
|||
|
|
This is separated from get_ssl_configuration to enable caching.
|
|||
|
|
"""
|
|||
|
|
custom_ssl_context = ssl.create_default_context(cafile=cafile)
|
|||
|
|
|
|||
|
|
# Optimize SSL handshake performance
|
|||
|
|
# Set minimum TLS version to 1.2 for better performance
|
|||
|
|
custom_ssl_context.minimum_version = ssl.TLSVersion.TLSv1_2
|
|||
|
|
|
|||
|
|
# Configure cipher suites for optimal performance
|
|||
|
|
if ssl_security_level and isinstance(ssl_security_level, str):
|
|||
|
|
# User provided custom cipher configuration (e.g., via SSL_SECURITY_LEVEL env var)
|
|||
|
|
custom_ssl_context.set_ciphers(ssl_security_level)
|
|||
|
|
else:
|
|||
|
|
# Use optimized cipher list that strongly prefers fast ciphers
|
|||
|
|
# but falls back to widely compatible ones
|
|||
|
|
custom_ssl_context.set_ciphers(DEFAULT_SSL_CIPHERS)
|
|||
|
|
|
|||
|
|
# Configure ECDH curve for key exchange (e.g., to disable PQC and improve performance)
|
|||
|
|
# Set SSL_ECDH_CURVE env var or litellm.ssl_ecdh_curve to 'X25519' to disable PQC
|
|||
|
|
# Common valid curves: X25519, prime256v1, secp384r1, secp521r1
|
|||
|
|
if ssl_ecdh_curve and isinstance(ssl_ecdh_curve, str):
|
|||
|
|
try:
|
|||
|
|
custom_ssl_context.set_ecdh_curve(ssl_ecdh_curve)
|
|||
|
|
verbose_logger.debug(f"SSL ECDH curve set to: {ssl_ecdh_curve}")
|
|||
|
|
except AttributeError:
|
|||
|
|
verbose_logger.warning(
|
|||
|
|
f"SSL ECDH curve configuration not supported. "
|
|||
|
|
f"Python version: {sys.version.split()[0]}, OpenSSL version: {ssl.OPENSSL_VERSION}. "
|
|||
|
|
f"Requested curve: {ssl_ecdh_curve}. Continuing with default curves."
|
|||
|
|
)
|
|||
|
|
except ValueError as e:
|
|||
|
|
# Invalid curve name
|
|||
|
|
verbose_logger.warning(
|
|||
|
|
f"Invalid SSL ECDH curve name: '{ssl_ecdh_curve}'. {e}. "
|
|||
|
|
f"Common valid curves: X25519, prime256v1, secp384r1, secp521r1. "
|
|||
|
|
f"Continuing with default curves (including PQC)."
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
return custom_ssl_context
|
|||
|
|
|
|||
|
|
|
|||
|
|
def get_ssl_verify(
|
|||
|
|
ssl_verify: Optional[Union[bool, str]] = None,
|
|||
|
|
) -> Union[bool, str]:
|
|||
|
|
"""
|
|||
|
|
Common utility to resolve the SSL verification setting.
|
|||
|
|
Prioritizes:
|
|||
|
|
1. Passed-in ssl_verify
|
|||
|
|
2. os.environ["SSL_VERIFY"]
|
|||
|
|
3. litellm.ssl_verify
|
|||
|
|
4. os.environ["SSL_CERT_FILE"] (if ssl_verify is True)
|
|||
|
|
|
|||
|
|
Returns:
|
|||
|
|
Union[bool, str]: The resolved SSL verification setting (bool or path to CA bundle)
|
|||
|
|
"""
|
|||
|
|
from litellm.secret_managers.main import str_to_bool
|
|||
|
|
|
|||
|
|
if ssl_verify is None:
|
|||
|
|
ssl_verify = os.getenv("SSL_VERIFY", litellm.ssl_verify)
|
|||
|
|
|
|||
|
|
# Convert string "False"/"True" to boolean if applicable
|
|||
|
|
if isinstance(ssl_verify, str):
|
|||
|
|
# If it's a file path, return it directly
|
|||
|
|
if os.path.exists(ssl_verify):
|
|||
|
|
return ssl_verify
|
|||
|
|
|
|||
|
|
# Otherwise, check if it's a boolean string
|
|||
|
|
ssl_verify_bool = str_to_bool(ssl_verify)
|
|||
|
|
if ssl_verify_bool is not None:
|
|||
|
|
ssl_verify = ssl_verify_bool
|
|||
|
|
|
|||
|
|
# If SSL verification is enabled, check for SSL_CERT_FILE override
|
|||
|
|
if ssl_verify is True:
|
|||
|
|
ssl_cert_file = os.getenv("SSL_CERT_FILE")
|
|||
|
|
if ssl_cert_file and os.path.exists(ssl_cert_file):
|
|||
|
|
return ssl_cert_file
|
|||
|
|
|
|||
|
|
return ssl_verify if ssl_verify is not None else True
|
|||
|
|
|
|||
|
|
|
|||
|
|
def get_ssl_configuration(
|
|||
|
|
ssl_verify: Optional[VerifyTypes] = None,
|
|||
|
|
) -> Union[bool, str, ssl.SSLContext]:
|
|||
|
|
"""
|
|||
|
|
Unified SSL configuration function that handles ssl_context and ssl_verify logic.
|
|||
|
|
|
|||
|
|
SSL Configuration Priority:
|
|||
|
|
1. If ssl_verify is provided -> is a SSL context use the custom SSL context
|
|||
|
|
2. If ssl_verify is False -> disable SSL verification (ssl=False)
|
|||
|
|
3. If ssl_verify is a string -> use it as a path to CA bundle file
|
|||
|
|
4. If SSL_CERT_FILE environment variable is set and exists -> use it as CA bundle file
|
|||
|
|
5. Else will use default SSL context with certifi CA bundle
|
|||
|
|
|
|||
|
|
If ssl_security_level is set, it will apply the security level to the SSL context.
|
|||
|
|
|
|||
|
|
SSL contexts are cached to avoid creating duplicate contexts with the same configuration,
|
|||
|
|
which reduces memory allocation and improves performance.
|
|||
|
|
|
|||
|
|
Args:
|
|||
|
|
ssl_verify: SSL verification setting. Can be:
|
|||
|
|
- None: Use default from environment/litellm settings
|
|||
|
|
- False: Disable SSL verification
|
|||
|
|
- True: Enable SSL verification
|
|||
|
|
- str: Path to CA bundle file
|
|||
|
|
|
|||
|
|
Returns:
|
|||
|
|
Union[bool, str, ssl.SSLContext]: Appropriate SSL configuration
|
|||
|
|
"""
|
|||
|
|
if isinstance(ssl_verify, ssl.SSLContext):
|
|||
|
|
# If ssl_verify is already an SSLContext, return it directly
|
|||
|
|
return ssl_verify
|
|||
|
|
|
|||
|
|
# Get resolved ssl_verify
|
|||
|
|
ssl_verify = get_ssl_verify(ssl_verify=ssl_verify)
|
|||
|
|
|
|||
|
|
ssl_security_level = os.getenv("SSL_SECURITY_LEVEL", litellm.ssl_security_level)
|
|||
|
|
ssl_ecdh_curve = os.getenv("SSL_ECDH_CURVE", litellm.ssl_ecdh_curve)
|
|||
|
|
|
|||
|
|
cafile = None
|
|||
|
|
if isinstance(ssl_verify, str) and os.path.exists(ssl_verify):
|
|||
|
|
cafile = ssl_verify
|
|||
|
|
if not cafile:
|
|||
|
|
ssl_cert_file = os.getenv("SSL_CERT_FILE")
|
|||
|
|
if ssl_cert_file and os.path.exists(ssl_cert_file):
|
|||
|
|
cafile = ssl_cert_file
|
|||
|
|
else:
|
|||
|
|
cafile = certifi.where()
|
|||
|
|
|
|||
|
|
if ssl_verify is not False:
|
|||
|
|
# Create cache key from configuration parameters
|
|||
|
|
cache_key = (cafile, ssl_security_level, ssl_ecdh_curve)
|
|||
|
|
|
|||
|
|
# Check if we have a cached SSL context for this configuration
|
|||
|
|
if cache_key not in _ssl_context_cache:
|
|||
|
|
_ssl_context_cache[cache_key] = _create_ssl_context(
|
|||
|
|
cafile=cafile,
|
|||
|
|
ssl_security_level=ssl_security_level,
|
|||
|
|
ssl_ecdh_curve=ssl_ecdh_curve,
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
# Return the cached SSL context
|
|||
|
|
return _ssl_context_cache[cache_key]
|
|||
|
|
|
|||
|
|
return ssl_verify
|
|||
|
|
|
|||
|
|
|
|||
|
|
_shared_realtime_ssl_context: Optional[Union[bool, str, ssl.SSLContext]] = None
|
|||
|
|
|
|||
|
|
|
|||
|
|
def get_shared_realtime_ssl_context() -> Union[bool, str, ssl.SSLContext]:
|
|||
|
|
"""
|
|||
|
|
Lazily create the SSL context reused by realtime websocket clients so we avoid
|
|||
|
|
import-order cycles during startup while keeping a single shared configuration.
|
|||
|
|
"""
|
|||
|
|
global _shared_realtime_ssl_context
|
|||
|
|
if _shared_realtime_ssl_context is None:
|
|||
|
|
_shared_realtime_ssl_context = get_ssl_configuration()
|
|||
|
|
return _shared_realtime_ssl_context
|
|||
|
|
|
|||
|
|
|
|||
|
|
def mask_sensitive_info(error_message):
|
|||
|
|
# Find the start of the key parameter
|
|||
|
|
if isinstance(error_message, str):
|
|||
|
|
key_index = error_message.find("key=")
|
|||
|
|
else:
|
|||
|
|
return error_message
|
|||
|
|
|
|||
|
|
# If key is found
|
|||
|
|
if key_index != -1:
|
|||
|
|
# Find the end of the key parameter (next & or end of string)
|
|||
|
|
next_param = error_message.find("&", key_index)
|
|||
|
|
|
|||
|
|
if next_param == -1:
|
|||
|
|
# If no more parameters, mask until the end of the string
|
|||
|
|
masked_message = error_message[: key_index + 4] + "[REDACTED_API_KEY]"
|
|||
|
|
else:
|
|||
|
|
# Replace the key with redacted value, keeping other parameters
|
|||
|
|
masked_message = (
|
|||
|
|
error_message[: key_index + 4]
|
|||
|
|
+ "[REDACTED_API_KEY]"
|
|||
|
|
+ error_message[next_param:]
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
return masked_message
|
|||
|
|
|
|||
|
|
return error_message
|
|||
|
|
|
|||
|
|
|
|||
|
|
class MaskedHTTPStatusError(httpx.HTTPStatusError):
|
|||
|
|
def __init__(
|
|||
|
|
self, original_error, message: Optional[str] = None, text: Optional[str] = None
|
|||
|
|
):
|
|||
|
|
# Create a new error with the masked URL
|
|||
|
|
masked_url = mask_sensitive_info(str(original_error.request.url))
|
|||
|
|
# Create a new error that looks like the original, but with a masked URL
|
|||
|
|
|
|||
|
|
super().__init__(
|
|||
|
|
message=original_error.message,
|
|||
|
|
request=httpx.Request(
|
|||
|
|
method=original_error.request.method,
|
|||
|
|
url=masked_url,
|
|||
|
|
headers=original_error.request.headers,
|
|||
|
|
content=original_error.request.content,
|
|||
|
|
),
|
|||
|
|
response=httpx.Response(
|
|||
|
|
status_code=original_error.response.status_code,
|
|||
|
|
content=original_error.response.content,
|
|||
|
|
headers=original_error.response.headers,
|
|||
|
|
),
|
|||
|
|
)
|
|||
|
|
self.message = message
|
|||
|
|
self.text = text
|
|||
|
|
|
|||
|
|
|
|||
|
|
class AsyncHTTPHandler:
|
|||
|
|
def __init__(
|
|||
|
|
self,
|
|||
|
|
timeout: Optional[Union[float, httpx.Timeout]] = None,
|
|||
|
|
event_hooks: Optional[Mapping[str, List[Callable[..., Any]]]] = None,
|
|||
|
|
concurrent_limit=None, # Kept for backward compatibility, but ignored (no limits)
|
|||
|
|
client_alias: Optional[str] = None, # name for client in logs
|
|||
|
|
ssl_verify: Optional[VerifyTypes] = None,
|
|||
|
|
shared_session: Optional["ClientSession"] = None,
|
|||
|
|
):
|
|||
|
|
self.timeout = timeout
|
|||
|
|
self.event_hooks = event_hooks
|
|||
|
|
self.client = self.create_client(
|
|||
|
|
timeout=timeout,
|
|||
|
|
event_hooks=event_hooks,
|
|||
|
|
ssl_verify=ssl_verify,
|
|||
|
|
shared_session=shared_session,
|
|||
|
|
)
|
|||
|
|
self.client_alias = client_alias
|
|||
|
|
|
|||
|
|
def create_client(
|
|||
|
|
self,
|
|||
|
|
timeout: Optional[Union[float, httpx.Timeout]],
|
|||
|
|
event_hooks: Optional[Mapping[str, List[Callable[..., Any]]]],
|
|||
|
|
ssl_verify: Optional[VerifyTypes] = None,
|
|||
|
|
shared_session: Optional["ClientSession"] = None,
|
|||
|
|
) -> httpx.AsyncClient:
|
|||
|
|
# Get unified SSL configuration
|
|||
|
|
ssl_config = get_ssl_configuration(ssl_verify)
|
|||
|
|
|
|||
|
|
# An SSL certificate used by the requested host to authenticate the client.
|
|||
|
|
# /path/to/client.pem
|
|||
|
|
cert = os.getenv("SSL_CERTIFICATE", litellm.ssl_certificate)
|
|||
|
|
|
|||
|
|
if timeout is None:
|
|||
|
|
timeout = _DEFAULT_TIMEOUT
|
|||
|
|
# Create a client with a connection pool
|
|||
|
|
|
|||
|
|
transport = AsyncHTTPHandler._create_async_transport(
|
|||
|
|
ssl_context=ssl_config if isinstance(ssl_config, ssl.SSLContext) else None,
|
|||
|
|
ssl_verify=ssl_config if isinstance(ssl_config, bool) else None,
|
|||
|
|
shared_session=shared_session,
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
# Get default headers (User-Agent, overridable via LITELLM_USER_AGENT)
|
|||
|
|
default_headers = get_default_headers()
|
|||
|
|
|
|||
|
|
return httpx.AsyncClient(
|
|||
|
|
transport=transport,
|
|||
|
|
event_hooks=event_hooks,
|
|||
|
|
timeout=timeout,
|
|||
|
|
verify=ssl_config,
|
|||
|
|
cert=cert,
|
|||
|
|
headers=default_headers,
|
|||
|
|
follow_redirects=True,
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
async def close(self):
|
|||
|
|
# Close the client when you're done with it
|
|||
|
|
await self.client.aclose()
|
|||
|
|
|
|||
|
|
async def __aenter__(self):
|
|||
|
|
return self.client
|
|||
|
|
|
|||
|
|
async def __aexit__(self):
|
|||
|
|
# close the client when exiting
|
|||
|
|
await self.client.aclose()
|
|||
|
|
|
|||
|
|
async def get(
|
|||
|
|
self,
|
|||
|
|
url: str,
|
|||
|
|
params: Optional[dict] = None,
|
|||
|
|
headers: Optional[dict] = None,
|
|||
|
|
follow_redirects: Optional[bool] = None,
|
|||
|
|
):
|
|||
|
|
# Set follow_redirects to UseClientDefault if None
|
|||
|
|
_follow_redirects = (
|
|||
|
|
follow_redirects if follow_redirects is not None else USE_CLIENT_DEFAULT
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
params = params or {}
|
|||
|
|
params.update(HTTPHandler.extract_query_params(url))
|
|||
|
|
|
|||
|
|
response = await self.client.get(
|
|||
|
|
url, params=params, headers=headers, follow_redirects=_follow_redirects # type: ignore
|
|||
|
|
)
|
|||
|
|
return response
|
|||
|
|
|
|||
|
|
@track_llm_api_timing()
|
|||
|
|
async def post(
|
|||
|
|
self,
|
|||
|
|
url: str,
|
|||
|
|
data: Optional[Union[dict, str, bytes]] = None, # type: ignore
|
|||
|
|
json: Optional[dict] = None,
|
|||
|
|
params: Optional[dict] = None,
|
|||
|
|
headers: Optional[dict] = None,
|
|||
|
|
timeout: Optional[Union[float, httpx.Timeout]] = None,
|
|||
|
|
stream: bool = False,
|
|||
|
|
logging_obj: Optional[LiteLLMLoggingObject] = None,
|
|||
|
|
files: Optional[RequestFiles] = None,
|
|||
|
|
content: Any = None,
|
|||
|
|
):
|
|||
|
|
start_time = time.time()
|
|||
|
|
try:
|
|||
|
|
if timeout is None:
|
|||
|
|
timeout = self.timeout
|
|||
|
|
|
|||
|
|
# Prepare data/content parameters to prevent httpx DeprecationWarning (memory leak fix)
|
|||
|
|
request_data, request_content = _prepare_request_data_and_content(
|
|||
|
|
data, content
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
req = self.client.build_request(
|
|||
|
|
"POST",
|
|||
|
|
url,
|
|||
|
|
data=request_data,
|
|||
|
|
json=json,
|
|||
|
|
params=params,
|
|||
|
|
headers=headers,
|
|||
|
|
timeout=timeout,
|
|||
|
|
files=files,
|
|||
|
|
content=request_content,
|
|||
|
|
)
|
|||
|
|
response = await self.client.send(req, stream=stream)
|
|||
|
|
response.raise_for_status()
|
|||
|
|
return response
|
|||
|
|
except (httpx.RemoteProtocolError, httpx.ConnectError):
|
|||
|
|
# Retry the request with a new session if there is a connection error
|
|||
|
|
new_client = self.create_client(
|
|||
|
|
timeout=timeout, event_hooks=self.event_hooks
|
|||
|
|
)
|
|||
|
|
try:
|
|||
|
|
return await self.single_connection_post_request(
|
|||
|
|
url=url,
|
|||
|
|
client=new_client,
|
|||
|
|
data=data,
|
|||
|
|
json=json,
|
|||
|
|
params=params,
|
|||
|
|
headers=headers,
|
|||
|
|
stream=stream,
|
|||
|
|
)
|
|||
|
|
finally:
|
|||
|
|
await new_client.aclose()
|
|||
|
|
except httpx.TimeoutException as e:
|
|||
|
|
end_time = time.time()
|
|||
|
|
time_delta = round(end_time - start_time, 3)
|
|||
|
|
headers = {}
|
|||
|
|
error_response = getattr(e, "response", None)
|
|||
|
|
if error_response is not None:
|
|||
|
|
for key, value in error_response.headers.items():
|
|||
|
|
headers["response_headers-{}".format(key)] = value
|
|||
|
|
|
|||
|
|
raise litellm.Timeout(
|
|||
|
|
message=f"Connection timed out. Timeout passed={timeout}, time taken={time_delta} seconds",
|
|||
|
|
model="default-model-name",
|
|||
|
|
llm_provider="litellm-httpx-handler",
|
|||
|
|
headers=headers,
|
|||
|
|
)
|
|||
|
|
except httpx.HTTPStatusError as e:
|
|||
|
|
if stream is True:
|
|||
|
|
setattr(e, "message", await e.response.aread())
|
|||
|
|
setattr(e, "text", await e.response.aread())
|
|||
|
|
else:
|
|||
|
|
setattr(e, "message", mask_sensitive_info(e.response.text))
|
|||
|
|
setattr(e, "text", mask_sensitive_info(e.response.text))
|
|||
|
|
|
|||
|
|
setattr(e, "status_code", e.response.status_code)
|
|||
|
|
|
|||
|
|
raise e
|
|||
|
|
except Exception as e:
|
|||
|
|
raise e
|
|||
|
|
|
|||
|
|
async def put(
|
|||
|
|
self,
|
|||
|
|
url: str,
|
|||
|
|
data: Optional[Union[dict, str, bytes]] = None, # type: ignore
|
|||
|
|
json: Optional[dict] = None,
|
|||
|
|
params: Optional[dict] = None,
|
|||
|
|
headers: Optional[dict] = None,
|
|||
|
|
timeout: Optional[Union[float, httpx.Timeout]] = None,
|
|||
|
|
stream: bool = False,
|
|||
|
|
content: Any = None,
|
|||
|
|
):
|
|||
|
|
try:
|
|||
|
|
if timeout is None:
|
|||
|
|
timeout = self.timeout
|
|||
|
|
|
|||
|
|
# Prepare data/content parameters to prevent httpx DeprecationWarning (memory leak fix)
|
|||
|
|
request_data, request_content = _prepare_request_data_and_content(
|
|||
|
|
data, content
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
req = self.client.build_request(
|
|||
|
|
"PUT", url, data=request_data, json=json, params=params, headers=headers, timeout=timeout, content=request_content # type: ignore
|
|||
|
|
)
|
|||
|
|
response = await self.client.send(req)
|
|||
|
|
response.raise_for_status()
|
|||
|
|
return response
|
|||
|
|
except (httpx.RemoteProtocolError, httpx.ConnectError):
|
|||
|
|
# Retry the request with a new session if there is a connection error
|
|||
|
|
new_client = self.create_client(
|
|||
|
|
timeout=timeout, event_hooks=self.event_hooks
|
|||
|
|
)
|
|||
|
|
try:
|
|||
|
|
return await self.single_connection_post_request(
|
|||
|
|
url=url,
|
|||
|
|
client=new_client,
|
|||
|
|
data=data,
|
|||
|
|
json=json,
|
|||
|
|
params=params,
|
|||
|
|
headers=headers,
|
|||
|
|
stream=stream,
|
|||
|
|
)
|
|||
|
|
finally:
|
|||
|
|
await new_client.aclose()
|
|||
|
|
except httpx.TimeoutException as e:
|
|||
|
|
headers = {}
|
|||
|
|
error_response = getattr(e, "response", None)
|
|||
|
|
if error_response is not None:
|
|||
|
|
for key, value in error_response.headers.items():
|
|||
|
|
headers["response_headers-{}".format(key)] = value
|
|||
|
|
|
|||
|
|
raise litellm.Timeout(
|
|||
|
|
message=f"Connection timed out after {timeout} seconds.",
|
|||
|
|
model="default-model-name",
|
|||
|
|
llm_provider="litellm-httpx-handler",
|
|||
|
|
headers=headers,
|
|||
|
|
)
|
|||
|
|
except httpx.HTTPStatusError as e:
|
|||
|
|
setattr(e, "status_code", e.response.status_code)
|
|||
|
|
if stream is True:
|
|||
|
|
setattr(e, "message", await e.response.aread())
|
|||
|
|
else:
|
|||
|
|
setattr(e, "message", e.response.text)
|
|||
|
|
raise e
|
|||
|
|
except Exception as e:
|
|||
|
|
raise e
|
|||
|
|
|
|||
|
|
async def patch(
|
|||
|
|
self,
|
|||
|
|
url: str,
|
|||
|
|
data: Optional[Union[dict, str, bytes]] = None, # type: ignore
|
|||
|
|
json: Optional[dict] = None,
|
|||
|
|
params: Optional[dict] = None,
|
|||
|
|
headers: Optional[dict] = None,
|
|||
|
|
timeout: Optional[Union[float, httpx.Timeout]] = None,
|
|||
|
|
stream: bool = False,
|
|||
|
|
content: Any = None,
|
|||
|
|
):
|
|||
|
|
try:
|
|||
|
|
if timeout is None:
|
|||
|
|
timeout = self.timeout
|
|||
|
|
|
|||
|
|
# Prepare data/content parameters to prevent httpx DeprecationWarning (memory leak fix)
|
|||
|
|
request_data, request_content = _prepare_request_data_and_content(
|
|||
|
|
data, content
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
req = self.client.build_request(
|
|||
|
|
"PATCH", url, data=request_data, json=json, params=params, headers=headers, timeout=timeout, content=request_content # type: ignore
|
|||
|
|
)
|
|||
|
|
response = await self.client.send(req)
|
|||
|
|
response.raise_for_status()
|
|||
|
|
return response
|
|||
|
|
except (httpx.RemoteProtocolError, httpx.ConnectError):
|
|||
|
|
# Retry the request with a new session if there is a connection error
|
|||
|
|
new_client = self.create_client(
|
|||
|
|
timeout=timeout, event_hooks=self.event_hooks
|
|||
|
|
)
|
|||
|
|
try:
|
|||
|
|
return await self.single_connection_post_request(
|
|||
|
|
url=url,
|
|||
|
|
client=new_client,
|
|||
|
|
data=data,
|
|||
|
|
json=json,
|
|||
|
|
params=params,
|
|||
|
|
headers=headers,
|
|||
|
|
stream=stream,
|
|||
|
|
)
|
|||
|
|
finally:
|
|||
|
|
await new_client.aclose()
|
|||
|
|
except httpx.TimeoutException as e:
|
|||
|
|
headers = {}
|
|||
|
|
error_response = getattr(e, "response", None)
|
|||
|
|
if error_response is not None:
|
|||
|
|
for key, value in error_response.headers.items():
|
|||
|
|
headers["response_headers-{}".format(key)] = value
|
|||
|
|
|
|||
|
|
raise litellm.Timeout(
|
|||
|
|
message=f"Connection timed out after {timeout} seconds.",
|
|||
|
|
model="default-model-name",
|
|||
|
|
llm_provider="litellm-httpx-handler",
|
|||
|
|
headers=headers,
|
|||
|
|
)
|
|||
|
|
except httpx.HTTPStatusError as e:
|
|||
|
|
setattr(e, "status_code", e.response.status_code)
|
|||
|
|
if stream is True:
|
|||
|
|
setattr(e, "message", await e.response.aread())
|
|||
|
|
else:
|
|||
|
|
setattr(e, "message", e.response.text)
|
|||
|
|
raise e
|
|||
|
|
except Exception as e:
|
|||
|
|
raise e
|
|||
|
|
|
|||
|
|
async def delete(
|
|||
|
|
self,
|
|||
|
|
url: str,
|
|||
|
|
data: Optional[Union[dict, str, bytes]] = None, # type: ignore
|
|||
|
|
json: Optional[dict] = None,
|
|||
|
|
params: Optional[dict] = None,
|
|||
|
|
headers: Optional[dict] = None,
|
|||
|
|
timeout: Optional[Union[float, httpx.Timeout]] = None,
|
|||
|
|
stream: bool = False,
|
|||
|
|
content: Any = None,
|
|||
|
|
):
|
|||
|
|
try:
|
|||
|
|
if timeout is None:
|
|||
|
|
timeout = self.timeout
|
|||
|
|
|
|||
|
|
# Prepare data/content parameters to prevent httpx DeprecationWarning (memory leak fix)
|
|||
|
|
request_data, request_content = _prepare_request_data_and_content(
|
|||
|
|
data, content
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
req = self.client.build_request(
|
|||
|
|
"DELETE", url, data=request_data, json=json, params=params, headers=headers, timeout=timeout, content=request_content # type: ignore
|
|||
|
|
)
|
|||
|
|
response = await self.client.send(req, stream=stream)
|
|||
|
|
response.raise_for_status()
|
|||
|
|
return response
|
|||
|
|
except (httpx.RemoteProtocolError, httpx.ConnectError):
|
|||
|
|
# Retry the request with a new session if there is a connection error
|
|||
|
|
new_client = self.create_client(
|
|||
|
|
timeout=timeout, event_hooks=self.event_hooks
|
|||
|
|
)
|
|||
|
|
try:
|
|||
|
|
return await self.single_connection_post_request(
|
|||
|
|
url=url,
|
|||
|
|
client=new_client,
|
|||
|
|
data=data,
|
|||
|
|
json=json,
|
|||
|
|
params=params,
|
|||
|
|
headers=headers,
|
|||
|
|
stream=stream,
|
|||
|
|
)
|
|||
|
|
finally:
|
|||
|
|
await new_client.aclose()
|
|||
|
|
except httpx.HTTPStatusError as e:
|
|||
|
|
setattr(e, "status_code", e.response.status_code)
|
|||
|
|
if stream is True:
|
|||
|
|
setattr(e, "message", await e.response.aread())
|
|||
|
|
else:
|
|||
|
|
setattr(e, "message", e.response.text)
|
|||
|
|
raise e
|
|||
|
|
except Exception as e:
|
|||
|
|
raise e
|
|||
|
|
|
|||
|
|
async def single_connection_post_request(
|
|||
|
|
self,
|
|||
|
|
url: str,
|
|||
|
|
client: httpx.AsyncClient,
|
|||
|
|
data: Optional[Union[dict, str, bytes]] = None, # type: ignore
|
|||
|
|
json: Optional[dict] = None,
|
|||
|
|
params: Optional[dict] = None,
|
|||
|
|
headers: Optional[dict] = None,
|
|||
|
|
stream: bool = False,
|
|||
|
|
content: Any = None,
|
|||
|
|
):
|
|||
|
|
"""
|
|||
|
|
Making POST request for a single connection client.
|
|||
|
|
|
|||
|
|
Used for retrying connection client errors.
|
|||
|
|
"""
|
|||
|
|
# Prepare data/content parameters to prevent httpx DeprecationWarning (memory leak fix)
|
|||
|
|
request_data, request_content = _prepare_request_data_and_content(data, content)
|
|||
|
|
|
|||
|
|
req = client.build_request(
|
|||
|
|
"POST", url, data=request_data, json=json, params=params, headers=headers, content=request_content # type: ignore
|
|||
|
|
)
|
|||
|
|
response = await client.send(req, stream=stream)
|
|||
|
|
response.raise_for_status()
|
|||
|
|
return response
|
|||
|
|
|
|||
|
|
def __del__(self) -> None:
|
|||
|
|
try:
|
|||
|
|
asyncio.get_running_loop().create_task(self.close())
|
|||
|
|
except Exception:
|
|||
|
|
pass
|
|||
|
|
|
|||
|
|
@staticmethod
|
|||
|
|
def _create_async_transport(
|
|||
|
|
ssl_context: Optional[ssl.SSLContext] = None,
|
|||
|
|
ssl_verify: Optional[bool] = None,
|
|||
|
|
shared_session: Optional["ClientSession"] = None,
|
|||
|
|
) -> Optional[Union[LiteLLMAiohttpTransport, AsyncHTTPTransport]]:
|
|||
|
|
"""
|
|||
|
|
- Creates a transport for httpx.AsyncClient
|
|||
|
|
- if litellm.force_ipv4 is True, it will return AsyncHTTPTransport with local_address="0.0.0.0"
|
|||
|
|
- [Default] It will return AiohttpTransport
|
|||
|
|
- Users can opt out of using AiohttpTransport by setting litellm.use_aiohttp_transport to False
|
|||
|
|
|
|||
|
|
|
|||
|
|
Notes on this handler:
|
|||
|
|
- Why AiohttpTransport?
|
|||
|
|
- By default, we use AiohttpTransport since it offers much higher throughput and lower latency than httpx.
|
|||
|
|
|
|||
|
|
- Why force ipv4?
|
|||
|
|
- Some users have seen httpx ConnectionError when using ipv6 - forcing ipv4 resolves the issue for them
|
|||
|
|
"""
|
|||
|
|
#########################################################
|
|||
|
|
# AIOHTTP TRANSPORT is off by default
|
|||
|
|
#########################################################
|
|||
|
|
if AsyncHTTPHandler._should_use_aiohttp_transport():
|
|||
|
|
return AsyncHTTPHandler._create_aiohttp_transport(
|
|||
|
|
ssl_context=ssl_context,
|
|||
|
|
ssl_verify=ssl_verify,
|
|||
|
|
shared_session=shared_session,
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
#########################################################
|
|||
|
|
# HTTPX TRANSPORT is used when aiohttp is not installed
|
|||
|
|
#########################################################
|
|||
|
|
return AsyncHTTPHandler._create_httpx_transport()
|
|||
|
|
|
|||
|
|
@staticmethod
|
|||
|
|
def _should_use_aiohttp_transport() -> bool:
|
|||
|
|
"""
|
|||
|
|
AiohttpTransport is the default transport for litellm.
|
|||
|
|
|
|||
|
|
Httpx can be used by the following
|
|||
|
|
- litellm.disable_aiohttp_transport = True
|
|||
|
|
- os.getenv("DISABLE_AIOHTTP_TRANSPORT") = "True"
|
|||
|
|
"""
|
|||
|
|
import os
|
|||
|
|
|
|||
|
|
from litellm.secret_managers.main import str_to_bool
|
|||
|
|
|
|||
|
|
#########################################################
|
|||
|
|
# Check if user disabled aiohttp transport
|
|||
|
|
########################################################
|
|||
|
|
if (
|
|||
|
|
litellm.disable_aiohttp_transport is True
|
|||
|
|
or str_to_bool(os.getenv("DISABLE_AIOHTTP_TRANSPORT", "False")) is True
|
|||
|
|
):
|
|||
|
|
return False
|
|||
|
|
|
|||
|
|
#########################################################
|
|||
|
|
# Default: Use AiohttpTransport
|
|||
|
|
########################################################
|
|||
|
|
verbose_logger.debug("Using AiohttpTransport...")
|
|||
|
|
return True
|
|||
|
|
|
|||
|
|
@staticmethod
|
|||
|
|
def _get_ssl_connector_kwargs(
|
|||
|
|
ssl_verify: Optional[bool] = None,
|
|||
|
|
ssl_context: Optional[ssl.SSLContext] = None,
|
|||
|
|
) -> Dict[str, Any]:
|
|||
|
|
"""
|
|||
|
|
Helper method to get SSL connector initialization arguments for aiohttp TCPConnector.
|
|||
|
|
|
|||
|
|
SSL Configuration Priority:
|
|||
|
|
1. If ssl_context is provided -> use the custom SSL context
|
|||
|
|
2. If ssl_verify is False -> disable SSL verification (ssl=False)
|
|||
|
|
|
|||
|
|
Returns:
|
|||
|
|
Dict with appropriate SSL configuration for TCPConnector
|
|||
|
|
"""
|
|||
|
|
connector_kwargs: Dict[str, Any] = {
|
|||
|
|
"local_addr": ("0.0.0.0", 0) if litellm.force_ipv4 else None,
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
if ssl_context is not None:
|
|||
|
|
# Priority 1: Use the provided custom SSL context
|
|||
|
|
connector_kwargs["ssl"] = ssl_context
|
|||
|
|
elif ssl_verify is False:
|
|||
|
|
# Priority 2: Explicitly disable SSL verification
|
|||
|
|
connector_kwargs["ssl"] = False
|
|||
|
|
|
|||
|
|
return connector_kwargs
|
|||
|
|
|
|||
|
|
@staticmethod
|
|||
|
|
def _create_aiohttp_transport(
|
|||
|
|
ssl_verify: Optional[bool] = None,
|
|||
|
|
ssl_context: Optional[ssl.SSLContext] = None,
|
|||
|
|
shared_session: Optional["ClientSession"] = None,
|
|||
|
|
) -> LiteLLMAiohttpTransport:
|
|||
|
|
"""
|
|||
|
|
Creates an AiohttpTransport with RequestNotRead error handling
|
|||
|
|
|
|||
|
|
Note: aiohttp TCPConnector ssl parameter accepts:
|
|||
|
|
- SSLContext: custom SSL context
|
|||
|
|
- False: disable SSL verification
|
|||
|
|
"""
|
|||
|
|
from litellm.llms.custom_httpx.aiohttp_transport import LiteLLMAiohttpTransport
|
|||
|
|
from litellm.secret_managers.main import str_to_bool
|
|||
|
|
|
|||
|
|
connector_kwargs = AsyncHTTPHandler._get_ssl_connector_kwargs(
|
|||
|
|
ssl_verify=ssl_verify, ssl_context=ssl_context
|
|||
|
|
)
|
|||
|
|
#########################################################
|
|||
|
|
# Check if user enabled aiohttp trust env
|
|||
|
|
# use for HTTP_PROXY, HTTPS_PROXY, etc.
|
|||
|
|
########################################################
|
|||
|
|
trust_env: bool = litellm.aiohttp_trust_env
|
|||
|
|
if str_to_bool(os.getenv("AIOHTTP_TRUST_ENV", "False")) is True:
|
|||
|
|
trust_env = True
|
|||
|
|
|
|||
|
|
#########################################################
|
|||
|
|
# Determine SSL config to pass to transport for per-request override
|
|||
|
|
# This ensures ssl_verify works even with shared sessions
|
|||
|
|
#########################################################
|
|||
|
|
ssl_for_transport: Optional[Union[bool, ssl.SSLContext]] = None
|
|||
|
|
if ssl_context is not None:
|
|||
|
|
ssl_for_transport = ssl_context
|
|||
|
|
elif ssl_verify is False:
|
|||
|
|
ssl_for_transport = False
|
|||
|
|
|
|||
|
|
verbose_logger.debug("Creating AiohttpTransport...")
|
|||
|
|
|
|||
|
|
# Use shared session if provided and valid
|
|||
|
|
if shared_session is not None and not shared_session.closed:
|
|||
|
|
verbose_logger.debug(
|
|||
|
|
f"SHARED SESSION: Reusing existing ClientSession (ID: {id(shared_session)})"
|
|||
|
|
)
|
|||
|
|
return LiteLLMAiohttpTransport(
|
|||
|
|
client=shared_session,
|
|||
|
|
ssl_verify=ssl_for_transport,
|
|||
|
|
owns_session=False,
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
# Create new session only if none provided or existing one is invalid
|
|||
|
|
verbose_logger.debug(
|
|||
|
|
"NEW SESSION: Creating new ClientSession (no shared session provided)"
|
|||
|
|
)
|
|||
|
|
transport_connector_kwargs = {
|
|||
|
|
"keepalive_timeout": AIOHTTP_KEEPALIVE_TIMEOUT,
|
|||
|
|
"ttl_dns_cache": AIOHTTP_TTL_DNS_CACHE,
|
|||
|
|
**connector_kwargs,
|
|||
|
|
}
|
|||
|
|
if AIOHTTP_NEEDS_CLEANUP_CLOSED:
|
|||
|
|
transport_connector_kwargs["enable_cleanup_closed"] = True
|
|||
|
|
if AIOHTTP_CONNECTOR_LIMIT > 0:
|
|||
|
|
transport_connector_kwargs["limit"] = AIOHTTP_CONNECTOR_LIMIT
|
|||
|
|
if AIOHTTP_CONNECTOR_LIMIT_PER_HOST > 0:
|
|||
|
|
transport_connector_kwargs[
|
|||
|
|
"limit_per_host"
|
|||
|
|
] = AIOHTTP_CONNECTOR_LIMIT_PER_HOST
|
|||
|
|
|
|||
|
|
return LiteLLMAiohttpTransport(
|
|||
|
|
client=lambda: ClientSession(
|
|||
|
|
connector=TCPConnector(**transport_connector_kwargs),
|
|||
|
|
trust_env=trust_env,
|
|||
|
|
),
|
|||
|
|
ssl_verify=ssl_for_transport,
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
@staticmethod
|
|||
|
|
def _create_httpx_transport() -> Optional[AsyncHTTPTransport]:
|
|||
|
|
"""
|
|||
|
|
Creates an AsyncHTTPTransport
|
|||
|
|
|
|||
|
|
- If force_ipv4 is True, it will create an AsyncHTTPTransport with local_address set to "0.0.0.0"
|
|||
|
|
- [Default] If force_ipv4 is False, it will return None
|
|||
|
|
"""
|
|||
|
|
if litellm.force_ipv4:
|
|||
|
|
return AsyncHTTPTransport(local_address="0.0.0.0")
|
|||
|
|
else:
|
|||
|
|
return None
|
|||
|
|
|
|||
|
|
|
|||
|
|
class HTTPHandler:
|
|||
|
|
def __init__(
|
|||
|
|
self,
|
|||
|
|
timeout: Optional[Union[float, httpx.Timeout]] = None,
|
|||
|
|
concurrent_limit=None, # Kept for backward compatibility, but ignored (no limits)
|
|||
|
|
client: Optional[httpx.Client] = None,
|
|||
|
|
ssl_verify: Optional[Union[bool, str]] = None,
|
|||
|
|
disable_default_headers: Optional[
|
|||
|
|
bool
|
|||
|
|
] = False, # arize phoenix returns different API responses when user agent header in request
|
|||
|
|
):
|
|||
|
|
if timeout is None:
|
|||
|
|
timeout = _DEFAULT_TIMEOUT
|
|||
|
|
|
|||
|
|
# Get unified SSL configuration
|
|||
|
|
ssl_config = get_ssl_configuration(ssl_verify)
|
|||
|
|
|
|||
|
|
# An SSL certificate used by the requested host to authenticate the client.
|
|||
|
|
# /path/to/client.pem
|
|||
|
|
cert = os.getenv("SSL_CERTIFICATE", litellm.ssl_certificate)
|
|||
|
|
|
|||
|
|
# Get default headers (User-Agent, overridable via LITELLM_USER_AGENT)
|
|||
|
|
default_headers = get_default_headers() if not disable_default_headers else None
|
|||
|
|
|
|||
|
|
if client is None:
|
|||
|
|
transport = self._create_sync_transport()
|
|||
|
|
|
|||
|
|
# Create a client with a connection pool
|
|||
|
|
self.client = httpx.Client(
|
|||
|
|
transport=transport,
|
|||
|
|
timeout=timeout,
|
|||
|
|
verify=ssl_config,
|
|||
|
|
cert=cert,
|
|||
|
|
headers=default_headers,
|
|||
|
|
follow_redirects=True,
|
|||
|
|
)
|
|||
|
|
else:
|
|||
|
|
self.client = client
|
|||
|
|
|
|||
|
|
def close(self):
|
|||
|
|
# Close the client when you're done with it
|
|||
|
|
self.client.close()
|
|||
|
|
|
|||
|
|
def get(
|
|||
|
|
self,
|
|||
|
|
url: str,
|
|||
|
|
params: Optional[dict] = None,
|
|||
|
|
headers: Optional[dict] = None,
|
|||
|
|
follow_redirects: Optional[bool] = None,
|
|||
|
|
):
|
|||
|
|
# Set follow_redirects to UseClientDefault if None
|
|||
|
|
_follow_redirects = (
|
|||
|
|
follow_redirects if follow_redirects is not None else USE_CLIENT_DEFAULT
|
|||
|
|
)
|
|||
|
|
params = params or {}
|
|||
|
|
params.update(self.extract_query_params(url))
|
|||
|
|
|
|||
|
|
response = self.client.get(
|
|||
|
|
url,
|
|||
|
|
params=params,
|
|||
|
|
headers=headers,
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
return response
|
|||
|
|
|
|||
|
|
@staticmethod
|
|||
|
|
def extract_query_params(url: str) -> Dict[str, str]:
|
|||
|
|
"""
|
|||
|
|
Parse a URL’s query-string into a dict.
|
|||
|
|
|
|||
|
|
:param url: full URL, e.g. "https://.../path?foo=1&bar=2"
|
|||
|
|
:return: {"foo": "1", "bar": "2"}
|
|||
|
|
"""
|
|||
|
|
from urllib.parse import parse_qsl, urlsplit
|
|||
|
|
|
|||
|
|
parts = urlsplit(url)
|
|||
|
|
return dict(parse_qsl(parts.query))
|
|||
|
|
|
|||
|
|
def post(
|
|||
|
|
self,
|
|||
|
|
url: str,
|
|||
|
|
data: Optional[Union[dict, str, bytes]] = None,
|
|||
|
|
json: Optional[Union[dict, str, List]] = None,
|
|||
|
|
params: Optional[dict] = None,
|
|||
|
|
headers: Optional[dict] = None,
|
|||
|
|
stream: bool = False,
|
|||
|
|
timeout: Optional[Union[float, httpx.Timeout]] = None,
|
|||
|
|
files: Optional[Union[dict, RequestFiles]] = None,
|
|||
|
|
content: Any = None,
|
|||
|
|
logging_obj: Optional[LiteLLMLoggingObject] = None,
|
|||
|
|
):
|
|||
|
|
try:
|
|||
|
|
# Prepare data/content parameters to prevent httpx DeprecationWarning (memory leak fix)
|
|||
|
|
request_data, request_content = _prepare_request_data_and_content(
|
|||
|
|
data, content
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
if timeout is not None:
|
|||
|
|
req = self.client.build_request(
|
|||
|
|
"POST",
|
|||
|
|
url,
|
|||
|
|
data=request_data, # type: ignore
|
|||
|
|
json=json,
|
|||
|
|
params=params,
|
|||
|
|
headers=headers,
|
|||
|
|
timeout=timeout,
|
|||
|
|
files=files,
|
|||
|
|
content=request_content, # type: ignore
|
|||
|
|
)
|
|||
|
|
else:
|
|||
|
|
req = self.client.build_request(
|
|||
|
|
"POST", url, data=request_data, json=json, params=params, headers=headers, files=files, content=request_content # type: ignore
|
|||
|
|
)
|
|||
|
|
response = self.client.send(req, stream=stream)
|
|||
|
|
response.raise_for_status()
|
|||
|
|
return response
|
|||
|
|
except httpx.TimeoutException:
|
|||
|
|
raise litellm.Timeout(
|
|||
|
|
message=f"Connection timed out after {timeout} seconds.",
|
|||
|
|
model="default-model-name",
|
|||
|
|
llm_provider="litellm-httpx-handler",
|
|||
|
|
)
|
|||
|
|
except httpx.HTTPStatusError as e:
|
|||
|
|
if stream is True:
|
|||
|
|
setattr(e, "message", mask_sensitive_info(e.response.read()))
|
|||
|
|
setattr(e, "text", mask_sensitive_info(e.response.read()))
|
|||
|
|
else:
|
|||
|
|
error_text = mask_sensitive_info(e.response.text)
|
|||
|
|
setattr(e, "message", error_text)
|
|||
|
|
setattr(e, "text", error_text)
|
|||
|
|
|
|||
|
|
setattr(e, "status_code", e.response.status_code)
|
|||
|
|
raise e
|
|||
|
|
except Exception as e:
|
|||
|
|
raise e
|
|||
|
|
|
|||
|
|
def patch(
|
|||
|
|
self,
|
|||
|
|
url: str,
|
|||
|
|
data: Optional[Union[dict, str, bytes]] = None,
|
|||
|
|
json: Optional[Union[dict, str]] = None,
|
|||
|
|
params: Optional[dict] = None,
|
|||
|
|
headers: Optional[dict] = None,
|
|||
|
|
stream: bool = False,
|
|||
|
|
timeout: Optional[Union[float, httpx.Timeout]] = None,
|
|||
|
|
content: Any = None,
|
|||
|
|
):
|
|||
|
|
try:
|
|||
|
|
# Prepare data/content parameters to prevent httpx DeprecationWarning (memory leak fix)
|
|||
|
|
request_data, request_content = _prepare_request_data_and_content(
|
|||
|
|
data, content
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
if timeout is not None:
|
|||
|
|
req = self.client.build_request(
|
|||
|
|
"PATCH", url, data=request_data, json=json, params=params, headers=headers, timeout=timeout, content=request_content # type: ignore
|
|||
|
|
)
|
|||
|
|
else:
|
|||
|
|
req = self.client.build_request(
|
|||
|
|
"PATCH", url, data=request_data, json=json, params=params, headers=headers, content=request_content # type: ignore
|
|||
|
|
)
|
|||
|
|
response = self.client.send(req, stream=stream)
|
|||
|
|
response.raise_for_status()
|
|||
|
|
return response
|
|||
|
|
except httpx.TimeoutException:
|
|||
|
|
raise litellm.Timeout(
|
|||
|
|
message=f"Connection timed out after {timeout} seconds.",
|
|||
|
|
model="default-model-name",
|
|||
|
|
llm_provider="litellm-httpx-handler",
|
|||
|
|
)
|
|||
|
|
except httpx.HTTPStatusError as e:
|
|||
|
|
if stream is True:
|
|||
|
|
setattr(e, "message", mask_sensitive_info(e.response.read()))
|
|||
|
|
setattr(e, "text", mask_sensitive_info(e.response.read()))
|
|||
|
|
else:
|
|||
|
|
error_text = mask_sensitive_info(e.response.text)
|
|||
|
|
setattr(e, "message", error_text)
|
|||
|
|
setattr(e, "text", error_text)
|
|||
|
|
|
|||
|
|
setattr(e, "status_code", e.response.status_code)
|
|||
|
|
|
|||
|
|
raise e
|
|||
|
|
except Exception as e:
|
|||
|
|
raise e
|
|||
|
|
|
|||
|
|
def put(
|
|||
|
|
self,
|
|||
|
|
url: str,
|
|||
|
|
data: Optional[Union[dict, str, bytes]] = None,
|
|||
|
|
json: Optional[Union[dict, str]] = None,
|
|||
|
|
params: Optional[dict] = None,
|
|||
|
|
headers: Optional[dict] = None,
|
|||
|
|
stream: bool = False,
|
|||
|
|
timeout: Optional[Union[float, httpx.Timeout]] = None,
|
|||
|
|
content: Any = None,
|
|||
|
|
):
|
|||
|
|
try:
|
|||
|
|
# Prepare data/content parameters to prevent httpx DeprecationWarning (memory leak fix)
|
|||
|
|
request_data, request_content = _prepare_request_data_and_content(
|
|||
|
|
data, content
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
if timeout is not None:
|
|||
|
|
req = self.client.build_request(
|
|||
|
|
"PUT", url, data=request_data, json=json, params=params, headers=headers, timeout=timeout, content=request_content # type: ignore
|
|||
|
|
)
|
|||
|
|
else:
|
|||
|
|
req = self.client.build_request(
|
|||
|
|
"PUT", url, data=request_data, json=json, params=params, headers=headers, content=request_content # type: ignore
|
|||
|
|
)
|
|||
|
|
response = self.client.send(req, stream=stream)
|
|||
|
|
return response
|
|||
|
|
except httpx.TimeoutException:
|
|||
|
|
raise litellm.Timeout(
|
|||
|
|
message=f"Connection timed out after {timeout} seconds.",
|
|||
|
|
model="default-model-name",
|
|||
|
|
llm_provider="litellm-httpx-handler",
|
|||
|
|
)
|
|||
|
|
except Exception as e:
|
|||
|
|
raise e
|
|||
|
|
|
|||
|
|
def delete(
|
|||
|
|
self,
|
|||
|
|
url: str,
|
|||
|
|
data: Optional[Union[dict, str, bytes]] = None, # type: ignore
|
|||
|
|
json: Optional[dict] = None,
|
|||
|
|
params: Optional[dict] = None,
|
|||
|
|
headers: Optional[dict] = None,
|
|||
|
|
timeout: Optional[Union[float, httpx.Timeout]] = None,
|
|||
|
|
stream: bool = False,
|
|||
|
|
content: Any = None,
|
|||
|
|
):
|
|||
|
|
try:
|
|||
|
|
# Prepare data/content parameters to prevent httpx DeprecationWarning (memory leak fix)
|
|||
|
|
request_data, request_content = _prepare_request_data_and_content(
|
|||
|
|
data, content
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
if timeout is not None:
|
|||
|
|
req = self.client.build_request(
|
|||
|
|
"DELETE", url, data=request_data, json=json, params=params, headers=headers, timeout=timeout, content=request_content # type: ignore
|
|||
|
|
)
|
|||
|
|
else:
|
|||
|
|
req = self.client.build_request(
|
|||
|
|
"DELETE", url, data=request_data, json=json, params=params, headers=headers, content=request_content # type: ignore
|
|||
|
|
)
|
|||
|
|
response = self.client.send(req, stream=stream)
|
|||
|
|
response.raise_for_status()
|
|||
|
|
return response
|
|||
|
|
except httpx.TimeoutException:
|
|||
|
|
raise litellm.Timeout(
|
|||
|
|
message=f"Connection timed out after {timeout} seconds.",
|
|||
|
|
model="default-model-name",
|
|||
|
|
llm_provider="litellm-httpx-handler",
|
|||
|
|
)
|
|||
|
|
except httpx.HTTPStatusError as e:
|
|||
|
|
if stream is True:
|
|||
|
|
setattr(e, "message", mask_sensitive_info(e.response.read()))
|
|||
|
|
setattr(e, "text", mask_sensitive_info(e.response.read()))
|
|||
|
|
else:
|
|||
|
|
error_text = mask_sensitive_info(e.response.text)
|
|||
|
|
setattr(e, "message", error_text)
|
|||
|
|
setattr(e, "text", error_text)
|
|||
|
|
|
|||
|
|
setattr(e, "status_code", e.response.status_code)
|
|||
|
|
|
|||
|
|
raise e
|
|||
|
|
except Exception as e:
|
|||
|
|
raise e
|
|||
|
|
|
|||
|
|
def __del__(self) -> None:
|
|||
|
|
try:
|
|||
|
|
self.close()
|
|||
|
|
except Exception:
|
|||
|
|
pass
|
|||
|
|
|
|||
|
|
def _create_sync_transport(self) -> Optional[HTTPTransport]:
|
|||
|
|
"""
|
|||
|
|
Create an HTTP transport with IPv4 only if litellm.force_ipv4 is True.
|
|||
|
|
Otherwise, return None.
|
|||
|
|
|
|||
|
|
Some users have seen httpx ConnectionError when using ipv6 - forcing ipv4 resolves the issue for them
|
|||
|
|
"""
|
|||
|
|
if litellm.force_ipv4:
|
|||
|
|
return HTTPTransport(local_address="0.0.0.0")
|
|||
|
|
else:
|
|||
|
|
return getattr(litellm, "sync_transport", None)
|
|||
|
|
|
|||
|
|
|
|||
|
|
def get_async_httpx_client(
|
|||
|
|
llm_provider: Union[LlmProviders, httpxSpecialProvider],
|
|||
|
|
params: Optional[dict] = None,
|
|||
|
|
shared_session: Optional["ClientSession"] = None,
|
|||
|
|
) -> AsyncHTTPHandler:
|
|||
|
|
"""
|
|||
|
|
Retrieves the async HTTP client from the cache
|
|||
|
|
If not present, creates a new client
|
|||
|
|
|
|||
|
|
Caches the new client and returns it.
|
|||
|
|
"""
|
|||
|
|
_params_key_name = ""
|
|||
|
|
if params is not None:
|
|||
|
|
for key, value in params.items():
|
|||
|
|
try:
|
|||
|
|
_params_key_name += f"{key}_{value}"
|
|||
|
|
except Exception:
|
|||
|
|
pass
|
|||
|
|
|
|||
|
|
_cache_key_name = "async_httpx_client" + _params_key_name + llm_provider
|
|||
|
|
|
|||
|
|
# Lazily initialize the global in-memory client cache to avoid relying on
|
|||
|
|
# litellm globals being fully populated during import time.
|
|||
|
|
cache = getattr(litellm, "in_memory_llm_clients_cache", None)
|
|||
|
|
if cache is None:
|
|||
|
|
from litellm.caching.llm_caching_handler import LLMClientCache
|
|||
|
|
|
|||
|
|
cache = LLMClientCache()
|
|||
|
|
setattr(litellm, "in_memory_llm_clients_cache", cache)
|
|||
|
|
|
|||
|
|
_cached_client = cache.get_cache(_cache_key_name)
|
|||
|
|
if _cached_client:
|
|||
|
|
return _cached_client
|
|||
|
|
|
|||
|
|
if params is not None:
|
|||
|
|
# Filter out params that are only used for cache key, not for AsyncHTTPHandler.__init__
|
|||
|
|
handler_params = {
|
|||
|
|
k: v for k, v in params.items() if k != "disable_aiohttp_transport"
|
|||
|
|
}
|
|||
|
|
handler_params["shared_session"] = shared_session
|
|||
|
|
_new_client = AsyncHTTPHandler(**handler_params)
|
|||
|
|
else:
|
|||
|
|
_new_client = AsyncHTTPHandler(
|
|||
|
|
timeout=httpx.Timeout(timeout=600.0, connect=5.0),
|
|||
|
|
shared_session=shared_session,
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
cache.set_cache(
|
|||
|
|
key=_cache_key_name,
|
|||
|
|
value=_new_client,
|
|||
|
|
ttl=_DEFAULT_TTL_FOR_HTTPX_CLIENTS,
|
|||
|
|
)
|
|||
|
|
return _new_client
|
|||
|
|
|
|||
|
|
|
|||
|
|
def _get_httpx_client(params: Optional[dict] = None) -> HTTPHandler:
|
|||
|
|
"""
|
|||
|
|
Retrieves the HTTP client from the cache
|
|||
|
|
If not present, creates a new client
|
|||
|
|
|
|||
|
|
Caches the new client and returns it.
|
|||
|
|
"""
|
|||
|
|
_params_key_name = ""
|
|||
|
|
if params is not None:
|
|||
|
|
for key, value in params.items():
|
|||
|
|
try:
|
|||
|
|
_params_key_name += f"{key}_{value}"
|
|||
|
|
except Exception:
|
|||
|
|
pass
|
|||
|
|
|
|||
|
|
_cache_key_name = "httpx_client" + _params_key_name
|
|||
|
|
|
|||
|
|
# Lazily initialize the global in-memory client cache to avoid relying on
|
|||
|
|
# litellm globals being fully populated during import time.
|
|||
|
|
cache = getattr(litellm, "in_memory_llm_clients_cache", None)
|
|||
|
|
if cache is None:
|
|||
|
|
from litellm.caching.llm_caching_handler import LLMClientCache
|
|||
|
|
|
|||
|
|
cache = LLMClientCache()
|
|||
|
|
setattr(litellm, "in_memory_llm_clients_cache", cache)
|
|||
|
|
|
|||
|
|
_cached_client = cache.get_cache(_cache_key_name)
|
|||
|
|
if _cached_client:
|
|||
|
|
return _cached_client
|
|||
|
|
|
|||
|
|
if params is not None:
|
|||
|
|
# Filter out params that are only used for cache key, not for HTTPHandler.__init__
|
|||
|
|
handler_params = {
|
|||
|
|
k: v for k, v in params.items() if k != "disable_aiohttp_transport"
|
|||
|
|
}
|
|||
|
|
_new_client = HTTPHandler(**handler_params)
|
|||
|
|
else:
|
|||
|
|
_new_client = HTTPHandler(timeout=httpx.Timeout(timeout=600.0, connect=5.0))
|
|||
|
|
|
|||
|
|
cache.set_cache(
|
|||
|
|
key=_cache_key_name,
|
|||
|
|
value=_new_client,
|
|||
|
|
ttl=_DEFAULT_TTL_FOR_HTTPX_CLIENTS,
|
|||
|
|
)
|
|||
|
|
return _new_client
|