llm-gateway-competitors/litellm-wheel-src/litellm/llms/custom_httpx/http_handler.py

import asyncio
import os
import ssl
import sys
import time
from typing import (
    TYPE_CHECKING,
    Any,
    Callable,
    Dict,
    List,
    Mapping,
    Optional,
    Tuple,
    Union,
)

import certifi
import httpx
from aiohttp import ClientSession, TCPConnector
from httpx import USE_CLIENT_DEFAULT, AsyncHTTPTransport, HTTPTransport
from httpx._types import RequestFiles

import litellm
from litellm._logging import verbose_logger
from litellm.constants import (
    _DEFAULT_TTL_FOR_HTTPX_CLIENTS,
    AIOHTTP_CONNECTOR_LIMIT,
    AIOHTTP_CONNECTOR_LIMIT_PER_HOST,
    AIOHTTP_KEEPALIVE_TIMEOUT,
    AIOHTTP_NEEDS_CLEANUP_CLOSED,
    AIOHTTP_TTL_DNS_CACHE,
    DEFAULT_SSL_CIPHERS,
)
from litellm.litellm_core_utils.logging_utils import track_llm_api_timing
from litellm.types.llms.custom_http import *

if TYPE_CHECKING:
    from litellm import LlmProviders
    from litellm.litellm_core_utils.litellm_logging import (
        Logging as LiteLLMLoggingObject,
    )
    from litellm.llms.custom_httpx.aiohttp_transport import LiteLLMAiohttpTransport
else:
    LlmProviders = Any
    LiteLLMLoggingObject = Any
    LiteLLMAiohttpTransport = Any

try:
    from litellm._version import version
except Exception:
    version = "0.0.0"


def get_default_headers() -> dict:
    """
    Get default headers for HTTP requests.

    - Default: `User-Agent: litellm/{version}`
    - Override: set `LITELLM_USER_AGENT` to fully override the header value.
    """
    user_agent = os.environ.get("LITELLM_USER_AGENT")
    if user_agent is not None:
        return {"User-Agent": user_agent}

    return {"User-Agent": f"litellm/{version}"}


# Initialize headers (User-Agent)
headers = get_default_headers()

# https://www.python-httpx.org/advanced/timeouts
_DEFAULT_TIMEOUT = httpx.Timeout(timeout=5.0, connect=5.0)


def _prepare_request_data_and_content(
    data: Optional[Union[dict, str, bytes]] = None,
    content: Any = None,
) -> Tuple[Optional[Union[dict, Mapping]], Any]:
    """
    Helper function to route data/content parameters correctly for httpx requests

    This prevents httpx DeprecationWarnings that cause memory leaks.

    Background:
    - httpx shows a DeprecationWarning when you pass bytes/str to `data=`
    - It wants you to use `content=` instead for bytes/str
    - The warning itself leaks memory when triggered repeatedly

    Solution:
    - Move bytes/str from `data=` to `content=` before calling build_request
    - Keep dicts in `data=` (that's still the correct parameter for dicts)

    Args:
        data: Request data (can be dict, str, or bytes)
        content: Request content (raw bytes/str)

    Returns:
        Tuple of (request_data, request_content) properly routed for httpx
    """
    request_data = None
    request_content = content

    if data is not None:
        if isinstance(data, (bytes, str)):
            # Bytes/strings belong in content= (only if not already provided)
            if content is None:
                request_content = data
        else:
            # dict/Mapping stays in data= parameter
            request_data = data

    return request_data, request_content


# Cache for SSL contexts to avoid creating duplicate contexts with the same configuration
# Key: tuple of (cafile, ssl_security_level, ssl_ecdh_curve)
# Value: ssl.SSLContext
_ssl_context_cache: Dict[
    Tuple[Optional[str], Optional[str], Optional[str]], ssl.SSLContext
] = {}


def _create_ssl_context(
    cafile: Optional[str],
    ssl_security_level: Optional[str],
    ssl_ecdh_curve: Optional[str],
) -> ssl.SSLContext:
    """
    Create an SSL context with the given configuration.
    This is separated from get_ssl_configuration to enable caching.
    """
    custom_ssl_context = ssl.create_default_context(cafile=cafile)

    # Optimize SSL handshake performance
    # Set minimum TLS version to 1.2 for better performance
    custom_ssl_context.minimum_version = ssl.TLSVersion.TLSv1_2

    # Configure cipher suites for optimal performance
    if ssl_security_level and isinstance(ssl_security_level, str):
        # User provided custom cipher configuration (e.g., via SSL_SECURITY_LEVEL env var)
        custom_ssl_context.set_ciphers(ssl_security_level)
    else:
        # Use optimized cipher list that strongly prefers fast ciphers
        # but falls back to widely compatible ones
        custom_ssl_context.set_ciphers(DEFAULT_SSL_CIPHERS)

    # Configure ECDH curve for key exchange (e.g., to disable PQC and improve performance)
    # Set SSL_ECDH_CURVE env var or litellm.ssl_ecdh_curve to 'X25519' to disable PQC
    # Common valid curves: X25519, prime256v1, secp384r1, secp521r1
    if ssl_ecdh_curve and isinstance(ssl_ecdh_curve, str):
        try:
            custom_ssl_context.set_ecdh_curve(ssl_ecdh_curve)
            verbose_logger.debug(f"SSL ECDH curve set to: {ssl_ecdh_curve}")
        except AttributeError:
            verbose_logger.warning(
                f"SSL ECDH curve configuration not supported. "
                f"Python version: {sys.version.split()[0]}, OpenSSL version: {ssl.OPENSSL_VERSION}. "
                f"Requested curve: {ssl_ecdh_curve}. Continuing with default curves."
            )
        except ValueError as e:
            # Invalid curve name
            verbose_logger.warning(
                f"Invalid SSL ECDH curve name: '{ssl_ecdh_curve}'. {e}. "
                f"Common valid curves: X25519, prime256v1, secp384r1, secp521r1. "
                f"Continuing with default curves (including PQC)."
            )

    return custom_ssl_context


def get_ssl_verify(
    ssl_verify: Optional[Union[bool, str]] = None,
) -> Union[bool, str]:
    """
    Common utility to resolve the SSL verification setting.
    Prioritizes:
    1. Passed-in ssl_verify
    2. os.environ["SSL_VERIFY"]
    3. litellm.ssl_verify
    4. os.environ["SSL_CERT_FILE"] (if ssl_verify is True)

    Returns:
        Union[bool, str]: The resolved SSL verification setting (bool or path to CA bundle)
    """
    from litellm.secret_managers.main import str_to_bool

    if ssl_verify is None:
        ssl_verify = os.getenv("SSL_VERIFY", litellm.ssl_verify)

    # Convert string "False"/"True" to boolean if applicable
    if isinstance(ssl_verify, str):
        # If it's a file path, return it directly
        if os.path.exists(ssl_verify):
            return ssl_verify

        # Otherwise, check if it's a boolean string
        ssl_verify_bool = str_to_bool(ssl_verify)
        if ssl_verify_bool is not None:
            ssl_verify = ssl_verify_bool

    # If SSL verification is enabled, check for SSL_CERT_FILE override
    if ssl_verify is True:
        ssl_cert_file = os.getenv("SSL_CERT_FILE")
        if ssl_cert_file and os.path.exists(ssl_cert_file):
            return ssl_cert_file

    return ssl_verify if ssl_verify is not None else True


def get_ssl_configuration(
    ssl_verify: Optional[VerifyTypes] = None,
) -> Union[bool, str, ssl.SSLContext]:
    """
    Unified SSL configuration function that handles ssl_context and ssl_verify logic.

    SSL Configuration Priority:
    1. If ssl_verify is provided -> is a SSL context use the custom SSL context
    2. If ssl_verify is False -> disable SSL verification (ssl=False)
    3. If ssl_verify is a string -> use it as a path to CA bundle file
    4. If SSL_CERT_FILE environment variable is set and exists -> use it as CA bundle file
    5. Else will use default SSL context with certifi CA bundle

    If ssl_security_level is set, it will apply the security level to the SSL context.

    SSL contexts are cached to avoid creating duplicate contexts with the same configuration,
    which reduces memory allocation and improves performance.

    Args:
        ssl_verify: SSL verification setting. Can be:
            - None: Use default from environment/litellm settings
            - False: Disable SSL verification
            - True: Enable SSL verification
            - str: Path to CA bundle file

    Returns:
        Union[bool, str, ssl.SSLContext]: Appropriate SSL configuration
    """
    if isinstance(ssl_verify, ssl.SSLContext):
        # If ssl_verify is already an SSLContext, return it directly
        return ssl_verify

    # Get resolved ssl_verify
    ssl_verify = get_ssl_verify(ssl_verify=ssl_verify)

    ssl_security_level = os.getenv("SSL_SECURITY_LEVEL", litellm.ssl_security_level)
    ssl_ecdh_curve = os.getenv("SSL_ECDH_CURVE", litellm.ssl_ecdh_curve)

    cafile = None
    if isinstance(ssl_verify, str) and os.path.exists(ssl_verify):
        cafile = ssl_verify
    if not cafile:
        ssl_cert_file = os.getenv("SSL_CERT_FILE")
        if ssl_cert_file and os.path.exists(ssl_cert_file):
            cafile = ssl_cert_file
        else:
            cafile = certifi.where()

    if ssl_verify is not False:
        # Create cache key from configuration parameters
        cache_key = (cafile, ssl_security_level, ssl_ecdh_curve)

        # Check if we have a cached SSL context for this configuration
        if cache_key not in _ssl_context_cache:
            _ssl_context_cache[cache_key] = _create_ssl_context(
                cafile=cafile,
                ssl_security_level=ssl_security_level,
                ssl_ecdh_curve=ssl_ecdh_curve,
            )

        # Return the cached SSL context
        return _ssl_context_cache[cache_key]

    return ssl_verify


_shared_realtime_ssl_context: Optional[Union[bool, str, ssl.SSLContext]] = None


def get_shared_realtime_ssl_context() -> Union[bool, str, ssl.SSLContext]:
    """
    Lazily create the SSL context reused by realtime websocket clients so we avoid
    import-order cycles during startup while keeping a single shared configuration.
    """
    global _shared_realtime_ssl_context
    if _shared_realtime_ssl_context is None:
        _shared_realtime_ssl_context = get_ssl_configuration()
    return _shared_realtime_ssl_context


def mask_sensitive_info(error_message):
    # Find the start of the key parameter
    if isinstance(error_message, str):
        key_index = error_message.find("key=")
    else:
        return error_message

    # If key is found
    if key_index != -1:
        # Find the end of the key parameter (next & or end of string)
        next_param = error_message.find("&", key_index)

        if next_param == -1:
            # If no more parameters, mask until the end of the string
            masked_message = error_message[: key_index + 4] + "[REDACTED_API_KEY]"
        else:
            # Replace the key with redacted value, keeping other parameters
            masked_message = (
                error_message[: key_index + 4]
                + "[REDACTED_API_KEY]"
                + error_message[next_param:]
            )

        return masked_message

    return error_message


class MaskedHTTPStatusError(httpx.HTTPStatusError):
    def __init__(
        self, original_error, message: Optional[str] = None, text: Optional[str] = None
    ):
        # Create a new error with the masked URL
        masked_url = mask_sensitive_info(str(original_error.request.url))
        # Create a new error that looks like the original, but with a masked URL

        super().__init__(
            message=original_error.message,
            request=httpx.Request(
                method=original_error.request.method,
                url=masked_url,
                headers=original_error.request.headers,
                content=original_error.request.content,
            ),
            response=httpx.Response(
                status_code=original_error.response.status_code,
                content=original_error.response.content,
                headers=original_error.response.headers,
            ),
        )
        self.message = message
        self.text = text


class AsyncHTTPHandler:
    def __init__(
        self,
        timeout: Optional[Union[float, httpx.Timeout]] = None,
        event_hooks: Optional[Mapping[str, List[Callable[..., Any]]]] = None,
        concurrent_limit=None,  # Kept for backward compatibility, but ignored (no limits)
        client_alias: Optional[str] = None,  # name for client in logs
        ssl_verify: Optional[VerifyTypes] = None,
        shared_session: Optional["ClientSession"] = None,
    ):
        self.timeout = timeout
        self.event_hooks = event_hooks
        self.client = self.create_client(
            timeout=timeout,
            event_hooks=event_hooks,
            ssl_verify=ssl_verify,
            shared_session=shared_session,
        )
        self.client_alias = client_alias

    def create_client(
        self,
        timeout: Optional[Union[float, httpx.Timeout]],
        event_hooks: Optional[Mapping[str, List[Callable[..., Any]]]],
        ssl_verify: Optional[VerifyTypes] = None,
        shared_session: Optional["ClientSession"] = None,
    ) -> httpx.AsyncClient:
        # Get unified SSL configuration
        ssl_config = get_ssl_configuration(ssl_verify)

        # An SSL certificate used by the requested host to authenticate the client.
        # /path/to/client.pem
        cert = os.getenv("SSL_CERTIFICATE", litellm.ssl_certificate)

        if timeout is None:
            timeout = _DEFAULT_TIMEOUT
        # Create a client with a connection pool

        transport = AsyncHTTPHandler._create_async_transport(
            ssl_context=ssl_config if isinstance(ssl_config, ssl.SSLContext) else None,
            ssl_verify=ssl_config if isinstance(ssl_config, bool) else None,
            shared_session=shared_session,
        )

        # Get default headers (User-Agent, overridable via LITELLM_USER_AGENT)
        default_headers = get_default_headers()

        return httpx.AsyncClient(
            transport=transport,
            event_hooks=event_hooks,
            timeout=timeout,
            verify=ssl_config,
            cert=cert,
            headers=default_headers,
            follow_redirects=True,
        )

    async def close(self):
        # Close the client when you're done with it
        await self.client.aclose()

    async def __aenter__(self):
        return self.client

    async def __aexit__(self):
        # close the client when exiting
        await self.client.aclose()

    async def get(
        self,
        url: str,
        params: Optional[dict] = None,
        headers: Optional[dict] = None,
        follow_redirects: Optional[bool] = None,
    ):
        # Set follow_redirects to UseClientDefault if None
        _follow_redirects = (
            follow_redirects if follow_redirects is not None else USE_CLIENT_DEFAULT
        )

        params = params or {}
        params.update(HTTPHandler.extract_query_params(url))

        response = await self.client.get(
            url, params=params, headers=headers, follow_redirects=_follow_redirects  # type: ignore
        )
        return response

    @track_llm_api_timing()
    async def post(
        self,
        url: str,
        data: Optional[Union[dict, str, bytes]] = None,  # type: ignore
        json: Optional[dict] = None,
        params: Optional[dict] = None,
        headers: Optional[dict] = None,
        timeout: Optional[Union[float, httpx.Timeout]] = None,
        stream: bool = False,
        logging_obj: Optional[LiteLLMLoggingObject] = None,
        files: Optional[RequestFiles] = None,
        content: Any = None,
    ):
        start_time = time.time()
        try:
            if timeout is None:
                timeout = self.timeout

            # Prepare data/content parameters to prevent httpx DeprecationWarning (memory leak fix)
            request_data, request_content = _prepare_request_data_and_content(
                data, content
            )

            req = self.client.build_request(
                "POST",
                url,
                data=request_data,
                json=json,
                params=params,
                headers=headers,
                timeout=timeout,
                files=files,
                content=request_content,
            )
            response = await self.client.send(req, stream=stream)
            response.raise_for_status()
            return response
        except (httpx.RemoteProtocolError, httpx.ConnectError):
            # Retry the request with a new session if there is a connection error
            new_client = self.create_client(
                timeout=timeout, event_hooks=self.event_hooks
            )
            try:
                return await self.single_connection_post_request(
                    url=url,
                    client=new_client,
                    data=data,
                    json=json,
                    params=params,
                    headers=headers,
                    stream=stream,
                )
            finally:
                await new_client.aclose()
        except httpx.TimeoutException as e:
            end_time = time.time()
            time_delta = round(end_time - start_time, 3)
            headers = {}
            error_response = getattr(e, "response", None)
            if error_response is not None:
                for key, value in error_response.headers.items():
                    headers["response_headers-{}".format(key)] = value

            raise litellm.Timeout(
                message=f"Connection timed out. Timeout passed={timeout}, time taken={time_delta} seconds",
                model="default-model-name",
                llm_provider="litellm-httpx-handler",
                headers=headers,
            )
        except httpx.HTTPStatusError as e:
            if stream is True:
                setattr(e, "message", await e.response.aread())
                setattr(e, "text", await e.response.aread())
            else:
                setattr(e, "message", mask_sensitive_info(e.response.text))
                setattr(e, "text", mask_sensitive_info(e.response.text))

            setattr(e, "status_code", e.response.status_code)

            raise e
        except Exception as e:
            raise e

    async def put(
        self,
        url: str,
        data: Optional[Union[dict, str, bytes]] = None,  # type: ignore
        json: Optional[dict] = None,
        params: Optional[dict] = None,
        headers: Optional[dict] = None,
        timeout: Optional[Union[float, httpx.Timeout]] = None,
        stream: bool = False,
        content: Any = None,
    ):
        try:
            if timeout is None:
                timeout = self.timeout

            # Prepare data/content parameters to prevent httpx DeprecationWarning (memory leak fix)
            request_data, request_content = _prepare_request_data_and_content(
                data, content
            )

            req = self.client.build_request(
                "PUT", url, data=request_data, json=json, params=params, headers=headers, timeout=timeout, content=request_content  # type: ignore
            )
            response = await self.client.send(req)
            response.raise_for_status()
            return response
        except (httpx.RemoteProtocolError, httpx.ConnectError):
            # Retry the request with a new session if there is a connection error
            new_client = self.create_client(
                timeout=timeout, event_hooks=self.event_hooks
            )
            try:
                return await self.single_connection_post_request(
                    url=url,
                    client=new_client,
                    data=data,
                    json=json,
                    params=params,
                    headers=headers,
                    stream=stream,
                )
            finally:
                await new_client.aclose()
        except httpx.TimeoutException as e:
            headers = {}
            error_response = getattr(e, "response", None)
            if error_response is not None:
                for key, value in error_response.headers.items():
                    headers["response_headers-{}".format(key)] = value

            raise litellm.Timeout(
                message=f"Connection timed out after {timeout} seconds.",
                model="default-model-name",
                llm_provider="litellm-httpx-handler",
                headers=headers,
            )
        except httpx.HTTPStatusError as e:
            setattr(e, "status_code", e.response.status_code)
            if stream is True:
                setattr(e, "message", await e.response.aread())
            else:
                setattr(e, "message", e.response.text)
            raise e
        except Exception as e:
            raise e

    async def patch(
        self,
        url: str,
        data: Optional[Union[dict, str, bytes]] = None,  # type: ignore
        json: Optional[dict] = None,
        params: Optional[dict] = None,
        headers: Optional[dict] = None,
        timeout: Optional[Union[float, httpx.Timeout]] = None,
        stream: bool = False,
        content: Any = None,
    ):
        try:
            if timeout is None:
                timeout = self.timeout

            # Prepare data/content parameters to prevent httpx DeprecationWarning (memory leak fix)
            request_data, request_content = _prepare_request_data_and_content(
                data, content
            )

            req = self.client.build_request(
                "PATCH", url, data=request_data, json=json, params=params, headers=headers, timeout=timeout, content=request_content  # type: ignore
            )
            response = await self.client.send(req)
            response.raise_for_status()
            return response
        except (httpx.RemoteProtocolError, httpx.ConnectError):
            # Retry the request with a new session if there is a connection error
            new_client = self.create_client(
                timeout=timeout, event_hooks=self.event_hooks
            )
            try:
                return await self.single_connection_post_request(
                    url=url,
                    client=new_client,
                    data=data,
                    json=json,
                    params=params,
                    headers=headers,
                    stream=stream,
                )
            finally:
                await new_client.aclose()
        except httpx.TimeoutException as e:
            headers = {}
            error_response = getattr(e, "response", None)
            if error_response is not None:
                for key, value in error_response.headers.items():
                    headers["response_headers-{}".format(key)] = value

            raise litellm.Timeout(
                message=f"Connection timed out after {timeout} seconds.",
                model="default-model-name",
                llm_provider="litellm-httpx-handler",
                headers=headers,
            )
        except httpx.HTTPStatusError as e:
            setattr(e, "status_code", e.response.status_code)
            if stream is True:
                setattr(e, "message", await e.response.aread())
            else:
                setattr(e, "message", e.response.text)
            raise e
        except Exception as e:
            raise e

    async def delete(
        self,
        url: str,
        data: Optional[Union[dict, str, bytes]] = None,  # type: ignore
        json: Optional[dict] = None,
        params: Optional[dict] = None,
        headers: Optional[dict] = None,
        timeout: Optional[Union[float, httpx.Timeout]] = None,
        stream: bool = False,
        content: Any = None,
    ):
        try:
            if timeout is None:
                timeout = self.timeout

            # Prepare data/content parameters to prevent httpx DeprecationWarning (memory leak fix)
            request_data, request_content = _prepare_request_data_and_content(
                data, content
            )

            req = self.client.build_request(
                "DELETE", url, data=request_data, json=json, params=params, headers=headers, timeout=timeout, content=request_content  # type: ignore
            )
            response = await self.client.send(req, stream=stream)
            response.raise_for_status()
            return response
        except (httpx.RemoteProtocolError, httpx.ConnectError):
            # Retry the request with a new session if there is a connection error
            new_client = self.create_client(
                timeout=timeout, event_hooks=self.event_hooks
            )
            try:
                return await self.single_connection_post_request(
                    url=url,
                    client=new_client,
                    data=data,
                    json=json,
                    params=params,
                    headers=headers,
                    stream=stream,
                )
            finally:
                await new_client.aclose()
        except httpx.HTTPStatusError as e:
            setattr(e, "status_code", e.response.status_code)
            if stream is True:
                setattr(e, "message", await e.response.aread())
            else:
                setattr(e, "message", e.response.text)
            raise e
        except Exception as e:
            raise e

    async def single_connection_post_request(
        self,
        url: str,
        client: httpx.AsyncClient,
        data: Optional[Union[dict, str, bytes]] = None,  # type: ignore
        json: Optional[dict] = None,
        params: Optional[dict] = None,
        headers: Optional[dict] = None,
        stream: bool = False,
        content: Any = None,
    ):
        """
        Making POST request for a single connection client.

        Used for retrying connection client errors.
        """
        # Prepare data/content parameters to prevent httpx DeprecationWarning (memory leak fix)
        request_data, request_content = _prepare_request_data_and_content(data, content)

        req = client.build_request(
            "POST", url, data=request_data, json=json, params=params, headers=headers, content=request_content  # type: ignore
        )
        response = await client.send(req, stream=stream)
        response.raise_for_status()
        return response

    def __del__(self) -> None:
        try:
            asyncio.get_running_loop().create_task(self.close())
        except Exception:
            pass

    @staticmethod
    def _create_async_transport(
        ssl_context: Optional[ssl.SSLContext] = None,
        ssl_verify: Optional[bool] = None,
        shared_session: Optional["ClientSession"] = None,
    ) -> Optional[Union[LiteLLMAiohttpTransport, AsyncHTTPTransport]]:
        """
        - Creates a transport for httpx.AsyncClient
            - if litellm.force_ipv4 is True, it will return AsyncHTTPTransport with local_address="0.0.0.0"
            - [Default] It will return AiohttpTransport
            - Users can opt out of using AiohttpTransport by setting litellm.use_aiohttp_transport to False


        Notes on this handler:
        - Why AiohttpTransport?
            - By default, we use AiohttpTransport since it offers much higher throughput and lower latency than httpx.

        - Why force ipv4?
            - Some users have seen httpx ConnectionError when using ipv6 - forcing ipv4 resolves the issue for them
        """
        #########################################################
        # AIOHTTP TRANSPORT is off by default
        #########################################################
        if AsyncHTTPHandler._should_use_aiohttp_transport():
            return AsyncHTTPHandler._create_aiohttp_transport(
                ssl_context=ssl_context,
                ssl_verify=ssl_verify,
                shared_session=shared_session,
            )

        #########################################################
        # HTTPX TRANSPORT is used when aiohttp is not installed
        #########################################################
        return AsyncHTTPHandler._create_httpx_transport()

    @staticmethod
    def _should_use_aiohttp_transport() -> bool:
        """
        AiohttpTransport is the default transport for litellm.

        Httpx can be used by the following
            - litellm.disable_aiohttp_transport = True
            - os.getenv("DISABLE_AIOHTTP_TRANSPORT") = "True"
        """
        import os

        from litellm.secret_managers.main import str_to_bool

        #########################################################
        # Check if user disabled aiohttp transport
        ########################################################
        if (
            litellm.disable_aiohttp_transport is True
            or str_to_bool(os.getenv("DISABLE_AIOHTTP_TRANSPORT", "False")) is True
        ):
            return False

        #########################################################
        # Default: Use AiohttpTransport
        ########################################################
        verbose_logger.debug("Using AiohttpTransport...")
        return True

    @staticmethod
    def _get_ssl_connector_kwargs(
        ssl_verify: Optional[bool] = None,
        ssl_context: Optional[ssl.SSLContext] = None,
    ) -> Dict[str, Any]:
        """
        Helper method to get SSL connector initialization arguments for aiohttp TCPConnector.

        SSL Configuration Priority:
        1. If ssl_context is provided -> use the custom SSL context
        2. If ssl_verify is False -> disable SSL verification (ssl=False)

        Returns:
            Dict with appropriate SSL configuration for TCPConnector
        """
        connector_kwargs: Dict[str, Any] = {
            "local_addr": ("0.0.0.0", 0) if litellm.force_ipv4 else None,
        }

        if ssl_context is not None:
            # Priority 1: Use the provided custom SSL context
            connector_kwargs["ssl"] = ssl_context
        elif ssl_verify is False:
            # Priority 2: Explicitly disable SSL verification
            connector_kwargs["ssl"] = False

        return connector_kwargs

    @staticmethod
    def _create_aiohttp_transport(
        ssl_verify: Optional[bool] = None,
        ssl_context: Optional[ssl.SSLContext] = None,
        shared_session: Optional["ClientSession"] = None,
    ) -> LiteLLMAiohttpTransport:
        """
        Creates an AiohttpTransport with RequestNotRead error handling

        Note: aiohttp TCPConnector ssl parameter accepts:
        - SSLContext: custom SSL context
        - False: disable SSL verification
        """
        from litellm.llms.custom_httpx.aiohttp_transport import LiteLLMAiohttpTransport
        from litellm.secret_managers.main import str_to_bool

        connector_kwargs = AsyncHTTPHandler._get_ssl_connector_kwargs(
            ssl_verify=ssl_verify, ssl_context=ssl_context
        )
        #########################################################
        # Check if user enabled aiohttp trust env
        # use for HTTP_PROXY, HTTPS_PROXY, etc.
        ########################################################
        trust_env: bool = litellm.aiohttp_trust_env
        if str_to_bool(os.getenv("AIOHTTP_TRUST_ENV", "False")) is True:
            trust_env = True

        #########################################################
        # Determine SSL config to pass to transport for per-request override
        # This ensures ssl_verify works even with shared sessions
        #########################################################
        ssl_for_transport: Optional[Union[bool, ssl.SSLContext]] = None
        if ssl_context is not None:
            ssl_for_transport = ssl_context
        elif ssl_verify is False:
            ssl_for_transport = False

        verbose_logger.debug("Creating AiohttpTransport...")

        # Use shared session if provided and valid
        if shared_session is not None and not shared_session.closed:
            verbose_logger.debug(
                f"SHARED SESSION: Reusing existing ClientSession (ID: {id(shared_session)})"
            )
            return LiteLLMAiohttpTransport(
                client=shared_session,
                ssl_verify=ssl_for_transport,
                owns_session=False,
            )

        # Create new session only if none provided or existing one is invalid
        verbose_logger.debug(
            "NEW SESSION: Creating new ClientSession (no shared session provided)"
        )
        transport_connector_kwargs = {
            "keepalive_timeout": AIOHTTP_KEEPALIVE_TIMEOUT,
            "ttl_dns_cache": AIOHTTP_TTL_DNS_CACHE,
            **connector_kwargs,
        }
        if AIOHTTP_NEEDS_CLEANUP_CLOSED:
            transport_connector_kwargs["enable_cleanup_closed"] = True
        if AIOHTTP_CONNECTOR_LIMIT > 0:
            transport_connector_kwargs["limit"] = AIOHTTP_CONNECTOR_LIMIT
        if AIOHTTP_CONNECTOR_LIMIT_PER_HOST > 0:
            transport_connector_kwargs[
                "limit_per_host"
            ] = AIOHTTP_CONNECTOR_LIMIT_PER_HOST

        return LiteLLMAiohttpTransport(
            client=lambda: ClientSession(
                connector=TCPConnector(**transport_connector_kwargs),
                trust_env=trust_env,
            ),
            ssl_verify=ssl_for_transport,
        )

    @staticmethod
    def _create_httpx_transport() -> Optional[AsyncHTTPTransport]:
        """
        Creates an AsyncHTTPTransport

        - If force_ipv4 is True, it will create an AsyncHTTPTransport with local_address set to "0.0.0.0"
        - [Default] If force_ipv4 is False, it will return None
        """
        if litellm.force_ipv4:
            return AsyncHTTPTransport(local_address="0.0.0.0")
        else:
            return None


class HTTPHandler:
    def __init__(
        self,
        timeout: Optional[Union[float, httpx.Timeout]] = None,
        concurrent_limit=None,  # Kept for backward compatibility, but ignored (no limits)
        client: Optional[httpx.Client] = None,
        ssl_verify: Optional[Union[bool, str]] = None,
        disable_default_headers: Optional[
            bool
        ] = False,  # arize phoenix returns different API responses when user agent header in request
    ):
        if timeout is None:
            timeout = _DEFAULT_TIMEOUT

        # Get unified SSL configuration
        ssl_config = get_ssl_configuration(ssl_verify)

        # An SSL certificate used by the requested host to authenticate the client.
        # /path/to/client.pem
        cert = os.getenv("SSL_CERTIFICATE", litellm.ssl_certificate)

        # Get default headers (User-Agent, overridable via LITELLM_USER_AGENT)
        default_headers = get_default_headers() if not disable_default_headers else None

        if client is None:
            transport = self._create_sync_transport()

            # Create a client with a connection pool
            self.client = httpx.Client(
                transport=transport,
                timeout=timeout,
                verify=ssl_config,
                cert=cert,
                headers=default_headers,
                follow_redirects=True,
            )
        else:
            self.client = client

    def close(self):
        # Close the client when you're done with it
        self.client.close()

    def get(
        self,
        url: str,
        params: Optional[dict] = None,
        headers: Optional[dict] = None,
        follow_redirects: Optional[bool] = None,
    ):
        # Set follow_redirects to UseClientDefault if None
        _follow_redirects = (
            follow_redirects if follow_redirects is not None else USE_CLIENT_DEFAULT
        )
        params = params or {}
        params.update(self.extract_query_params(url))

        response = self.client.get(
            url,
            params=params,
            headers=headers,
        )

        return response

    @staticmethod
    def extract_query_params(url: str) -> Dict[str, str]:
        """
        Parse a URL’s query-string into a dict.

        :param url: full URL, e.g. "https://.../path?foo=1&bar=2"
        :return: {"foo": "1", "bar": "2"}
        """
        from urllib.parse import parse_qsl, urlsplit

        parts = urlsplit(url)
        return dict(parse_qsl(parts.query))

    def post(
        self,
        url: str,
        data: Optional[Union[dict, str, bytes]] = None,
        json: Optional[Union[dict, str, List]] = None,
        params: Optional[dict] = None,
        headers: Optional[dict] = None,
        stream: bool = False,
        timeout: Optional[Union[float, httpx.Timeout]] = None,
        files: Optional[Union[dict, RequestFiles]] = None,
        content: Any = None,
        logging_obj: Optional[LiteLLMLoggingObject] = None,
    ):
        try:
            # Prepare data/content parameters to prevent httpx DeprecationWarning (memory leak fix)
            request_data, request_content = _prepare_request_data_and_content(
                data, content
            )

            if timeout is not None:
                req = self.client.build_request(
                    "POST",
                    url,
                    data=request_data,  # type: ignore
                    json=json,
                    params=params,
                    headers=headers,
                    timeout=timeout,
                    files=files,
                    content=request_content,  # type: ignore
                )
            else:
                req = self.client.build_request(
                    "POST", url, data=request_data, json=json, params=params, headers=headers, files=files, content=request_content  # type: ignore
                )
            response = self.client.send(req, stream=stream)
            response.raise_for_status()
            return response
        except httpx.TimeoutException:
            raise litellm.Timeout(
                message=f"Connection timed out after {timeout} seconds.",
                model="default-model-name",
                llm_provider="litellm-httpx-handler",
            )
        except httpx.HTTPStatusError as e:
            if stream is True:
                setattr(e, "message", mask_sensitive_info(e.response.read()))
                setattr(e, "text", mask_sensitive_info(e.response.read()))
            else:
                error_text = mask_sensitive_info(e.response.text)
                setattr(e, "message", error_text)
                setattr(e, "text", error_text)

            setattr(e, "status_code", e.response.status_code)
            raise e
        except Exception as e:
            raise e

    def patch(
        self,
        url: str,
        data: Optional[Union[dict, str, bytes]] = None,
        json: Optional[Union[dict, str]] = None,
        params: Optional[dict] = None,
        headers: Optional[dict] = None,
        stream: bool = False,
        timeout: Optional[Union[float, httpx.Timeout]] = None,
        content: Any = None,
    ):
        try:
            # Prepare data/content parameters to prevent httpx DeprecationWarning (memory leak fix)
            request_data, request_content = _prepare_request_data_and_content(
                data, content
            )

            if timeout is not None:
                req = self.client.build_request(
                    "PATCH", url, data=request_data, json=json, params=params, headers=headers, timeout=timeout, content=request_content  # type: ignore
                )
            else:
                req = self.client.build_request(
                    "PATCH", url, data=request_data, json=json, params=params, headers=headers, content=request_content  # type: ignore
                )
            response = self.client.send(req, stream=stream)
            response.raise_for_status()
            return response
        except httpx.TimeoutException:
            raise litellm.Timeout(
                message=f"Connection timed out after {timeout} seconds.",
                model="default-model-name",
                llm_provider="litellm-httpx-handler",
            )
        except httpx.HTTPStatusError as e:
            if stream is True:
                setattr(e, "message", mask_sensitive_info(e.response.read()))
                setattr(e, "text", mask_sensitive_info(e.response.read()))
            else:
                error_text = mask_sensitive_info(e.response.text)
                setattr(e, "message", error_text)
                setattr(e, "text", error_text)

            setattr(e, "status_code", e.response.status_code)

            raise e
        except Exception as e:
            raise e

    def put(
        self,
        url: str,
        data: Optional[Union[dict, str, bytes]] = None,
        json: Optional[Union[dict, str]] = None,
        params: Optional[dict] = None,
        headers: Optional[dict] = None,
        stream: bool = False,
        timeout: Optional[Union[float, httpx.Timeout]] = None,
        content: Any = None,
    ):
        try:
            # Prepare data/content parameters to prevent httpx DeprecationWarning (memory leak fix)
            request_data, request_content = _prepare_request_data_and_content(
                data, content
            )

            if timeout is not None:
                req = self.client.build_request(
                    "PUT", url, data=request_data, json=json, params=params, headers=headers, timeout=timeout, content=request_content  # type: ignore
                )
            else:
                req = self.client.build_request(
                    "PUT", url, data=request_data, json=json, params=params, headers=headers, content=request_content  # type: ignore
                )
            response = self.client.send(req, stream=stream)
            return response
        except httpx.TimeoutException:
            raise litellm.Timeout(
                message=f"Connection timed out after {timeout} seconds.",
                model="default-model-name",
                llm_provider="litellm-httpx-handler",
            )
        except Exception as e:
            raise e

    def delete(
        self,
        url: str,
        data: Optional[Union[dict, str, bytes]] = None,  # type: ignore
        json: Optional[dict] = None,
        params: Optional[dict] = None,
        headers: Optional[dict] = None,
        timeout: Optional[Union[float, httpx.Timeout]] = None,
        stream: bool = False,
        content: Any = None,
    ):
        try:
            # Prepare data/content parameters to prevent httpx DeprecationWarning (memory leak fix)
            request_data, request_content = _prepare_request_data_and_content(
                data, content
            )

            if timeout is not None:
                req = self.client.build_request(
                    "DELETE", url, data=request_data, json=json, params=params, headers=headers, timeout=timeout, content=request_content  # type: ignore
                )
            else:
                req = self.client.build_request(
                    "DELETE", url, data=request_data, json=json, params=params, headers=headers, content=request_content  # type: ignore
                )
            response = self.client.send(req, stream=stream)
            response.raise_for_status()
            return response
        except httpx.TimeoutException:
            raise litellm.Timeout(
                message=f"Connection timed out after {timeout} seconds.",
                model="default-model-name",
                llm_provider="litellm-httpx-handler",
            )
        except httpx.HTTPStatusError as e:
            if stream is True:
                setattr(e, "message", mask_sensitive_info(e.response.read()))
                setattr(e, "text", mask_sensitive_info(e.response.read()))
            else:
                error_text = mask_sensitive_info(e.response.text)
                setattr(e, "message", error_text)
                setattr(e, "text", error_text)

            setattr(e, "status_code", e.response.status_code)

            raise e
        except Exception as e:
            raise e

    def __del__(self) -> None:
        try:
            self.close()
        except Exception:
            pass

    def _create_sync_transport(self) -> Optional[HTTPTransport]:
        """
        Create an HTTP transport with IPv4 only if litellm.force_ipv4 is True.
        Otherwise, return None.

        Some users have seen httpx ConnectionError when using ipv6 - forcing ipv4 resolves the issue for them
        """
        if litellm.force_ipv4:
            return HTTPTransport(local_address="0.0.0.0")
        else:
            return getattr(litellm, "sync_transport", None)


def get_async_httpx_client(
    llm_provider: Union[LlmProviders, httpxSpecialProvider],
    params: Optional[dict] = None,
    shared_session: Optional["ClientSession"] = None,
) -> AsyncHTTPHandler:
    """
    Retrieves the async HTTP client from the cache
    If not present, creates a new client

    Caches the new client and returns it.
    """
    _params_key_name = ""
    if params is not None:
        for key, value in params.items():
            try:
                _params_key_name += f"{key}_{value}"
            except Exception:
                pass

    _cache_key_name = "async_httpx_client" + _params_key_name + llm_provider

    # Lazily initialize the global in-memory client cache to avoid relying on
    # litellm globals being fully populated during import time.
    cache = getattr(litellm, "in_memory_llm_clients_cache", None)
    if cache is None:
        from litellm.caching.llm_caching_handler import LLMClientCache

        cache = LLMClientCache()
        setattr(litellm, "in_memory_llm_clients_cache", cache)

    _cached_client = cache.get_cache(_cache_key_name)
    if _cached_client:
        return _cached_client

    if params is not None:
        # Filter out params that are only used for cache key, not for AsyncHTTPHandler.__init__
        handler_params = {
            k: v for k, v in params.items() if k != "disable_aiohttp_transport"
        }
        handler_params["shared_session"] = shared_session
        _new_client = AsyncHTTPHandler(**handler_params)
    else:
        _new_client = AsyncHTTPHandler(
            timeout=httpx.Timeout(timeout=600.0, connect=5.0),
            shared_session=shared_session,
        )

    cache.set_cache(
        key=_cache_key_name,
        value=_new_client,
        ttl=_DEFAULT_TTL_FOR_HTTPX_CLIENTS,
    )
    return _new_client


def _get_httpx_client(params: Optional[dict] = None) -> HTTPHandler:
    """
    Retrieves the HTTP client from the cache
    If not present, creates a new client

    Caches the new client and returns it.
    """
    _params_key_name = ""
    if params is not None:
        for key, value in params.items():
            try:
                _params_key_name += f"{key}_{value}"
            except Exception:
                pass

    _cache_key_name = "httpx_client" + _params_key_name

    # Lazily initialize the global in-memory client cache to avoid relying on
    # litellm globals being fully populated during import time.
    cache = getattr(litellm, "in_memory_llm_clients_cache", None)
    if cache is None:
        from litellm.caching.llm_caching_handler import LLMClientCache

        cache = LLMClientCache()
        setattr(litellm, "in_memory_llm_clients_cache", cache)

    _cached_client = cache.get_cache(_cache_key_name)
    if _cached_client:
        return _cached_client

    if params is not None:
        # Filter out params that are only used for cache key, not for HTTPHandler.__init__
        handler_params = {
            k: v for k, v in params.items() if k != "disable_aiohttp_transport"
        }
        _new_client = HTTPHandler(**handler_params)
    else:
        _new_client = HTTPHandler(timeout=httpx.Timeout(timeout=600.0, connect=5.0))

    cache.set_cache(
        key=_cache_key_name,
        value=_new_client,
        ttl=_DEFAULT_TTL_FOR_HTTPX_CLIENTS,
    )
    return _new_client