chore: initial public snapshot for github upload

2026-03-26 20:06:14 +08:00
commit 0e5ecd930e
3497 changed files with 1586236 additions and 0 deletions
--- a/llm-gateway-competitors/litellm-wheel-src/litellm/passthrough/main.py
+++ b/llm-gateway-competitors/litellm-wheel-src/litellm/passthrough/main.py
@@ -0,0 +1,433 @@
+"""
+This module is used to pass through requests to the LLM APIs.
+"""
+
+import asyncio
+import contextvars
+from functools import partial
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    AsyncGenerator,
+    Coroutine,
+    Generator,
+    List,
+    Optional,
+    Union,
+    cast,
+)
+
+import httpx
+from httpx._types import CookieTypes, QueryParamTypes, RequestFiles
+
+import litellm
+from litellm.litellm_core_utils.get_llm_provider_logic import get_llm_provider
+from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
+from litellm.llms.custom_httpx.llm_http_handler import BaseLLMHTTPHandler
+from litellm.passthrough.utils import CommonUtils
+from litellm.utils import client
+
+base_llm_http_handler = BaseLLMHTTPHandler()
+from .utils import BasePassthroughUtils
+
+if TYPE_CHECKING:
+    from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
+    from litellm.llms.base_llm.passthrough.transformation import BasePassthroughConfig
+
+
+@client
+async def allm_passthrough_route(
+    *,
+    method: str,
+    endpoint: str,
+    model: str,
+    custom_llm_provider: Optional[str] = None,
+    api_base: Optional[str] = None,
+    api_key: Optional[str] = None,
+    request_query_params: Optional[dict] = None,
+    request_headers: Optional[dict] = None,
+    content: Optional[Any] = None,
+    data: Optional[dict] = None,
+    files: Optional[RequestFiles] = None,
+    json: Optional[Any] = None,
+    params: Optional[QueryParamTypes] = None,
+    cookies: Optional[CookieTypes] = None,
+    client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
+    **kwargs,
+) -> Union[httpx.Response, AsyncGenerator[Any, Any]]:
+    """
+    Async: Reranks a list of documents based on their relevance to the query
+    """
+    try:
+        loop = asyncio.get_event_loop()
+        kwargs["allm_passthrough_route"] = True
+
+        model, custom_llm_provider, api_key, api_base = get_llm_provider(
+            model=model,
+            custom_llm_provider=custom_llm_provider,
+            api_base=api_base,
+            api_key=api_key,
+        )
+
+        from litellm.types.utils import LlmProviders
+        from litellm.utils import ProviderConfigManager
+
+        provider_config = cast(
+            Optional["BasePassthroughConfig"], kwargs.get("provider_config")
+        ) or ProviderConfigManager.get_provider_passthrough_config(
+            provider=LlmProviders(custom_llm_provider),
+            model=model,
+        )
+
+        if provider_config is None:
+            raise Exception(f"Provider {custom_llm_provider} not found")
+
+        func = partial(
+            llm_passthrough_route,
+            method=method,
+            endpoint=endpoint,
+            model=model,
+            custom_llm_provider=custom_llm_provider,
+            api_base=api_base,
+            api_key=api_key,
+            request_query_params=request_query_params,
+            request_headers=request_headers,
+            content=content,
+            data=data,
+            files=files,
+            json=json,
+            params=params,
+            cookies=cookies,
+            client=client,
+            **kwargs,
+        )
+
+        ctx = contextvars.copy_context()
+        func_with_context = partial(ctx.run, func)
+        init_response = await loop.run_in_executor(None, func_with_context)
+
+        # Since allm_passthrough_route=True, we always get a coroutine from _async_passthrough_request
+        if asyncio.iscoroutine(init_response):
+            response = await init_response
+
+            # Only call raise_for_status if it's a Response object (not a generator)
+            if isinstance(response, httpx.Response):
+                response.raise_for_status()
+
+            return response
+        else:
+            # This shouldn't happen when allm_passthrough_route=True, but handle it for type safety
+            raise Exception("Expected coroutine from async passthrough route")
+
+    except httpx.HTTPStatusError as e:
+        # For HTTP errors, re-raise as-is to preserve the original error details
+        # The caller (e.g., proxy layer) can handle conversion to appropriate response format
+        raise e
+    except Exception as e:
+        # For other exceptions, use provider-specific error handling
+        from litellm.types.utils import LlmProviders
+        from litellm.utils import ProviderConfigManager
+
+        # Get the provider using the same logic as llm_passthrough_route
+        _, resolved_custom_llm_provider, _, _ = get_llm_provider(
+            model=model,
+            custom_llm_provider=custom_llm_provider,
+            api_base=api_base,
+            api_key=api_key,
+        )
+
+        # Get provider config if available
+        provider_config = None
+        if resolved_custom_llm_provider:
+            try:
+                provider_config = cast(
+                    Optional["BasePassthroughConfig"], kwargs.get("provider_config")
+                ) or ProviderConfigManager.get_provider_passthrough_config(
+                    provider=LlmProviders(resolved_custom_llm_provider),
+                    model=model,
+                )
+            except Exception:
+                # If we can't get provider config, pass None
+                pass
+
+        if provider_config is None:
+            # If no provider config available, raise the original exception
+            raise e
+
+        raise base_llm_http_handler._handle_error(
+            e=e,
+            provider_config=provider_config,
+        )
+
+
+@client
+def llm_passthrough_route(
+    *,
+    method: str,
+    endpoint: str,
+    model: str,
+    custom_llm_provider: Optional[str] = None,
+    api_base: Optional[str] = None,
+    api_key: Optional[str] = None,
+    request_query_params: Optional[dict] = None,
+    request_headers: Optional[dict] = None,
+    allm_passthrough_route: bool = False,
+    content: Optional[Any] = None,
+    data: Optional[dict] = None,
+    files: Optional[RequestFiles] = None,
+    json: Optional[Any] = None,
+    params: Optional[QueryParamTypes] = None,
+    cookies: Optional[CookieTypes] = None,
+    client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
+    **kwargs,
+) -> Union[
+    httpx.Response,
+    Coroutine[Any, Any, httpx.Response],
+    Coroutine[Any, Any, Union[httpx.Response, AsyncGenerator[Any, Any]]],
+    Generator[Any, Any, Any],
+    AsyncGenerator[Any, Any],
+]:
+    """
+    Pass through requests to the LLM APIs.
+
+    Step 1. Build the request
+    Step 2. Send the request
+    Step 3. Return the response
+    """
+    from litellm.litellm_core_utils.get_litellm_params import get_litellm_params
+    from litellm.types.utils import LlmProviders
+    from litellm.utils import ProviderConfigManager
+
+    _is_async = allm_passthrough_route
+
+    if client is None:
+        if _is_async:
+            client = litellm.module_level_aclient
+        else:
+            client = litellm.module_level_client
+
+    litellm_logging_obj = cast("LiteLLMLoggingObj", kwargs.get("litellm_logging_obj"))
+
+    model, custom_llm_provider, api_key, api_base = get_llm_provider(
+        model=model,
+        custom_llm_provider=custom_llm_provider,
+        api_base=api_base,
+        api_key=api_key,
+    )
+
+    litellm_params_dict = get_litellm_params(**kwargs)
+
+    # Add model_id to litellm_params if present in kwargs (for Bedrock Application Inference Profiles)
+    if "model_id" in kwargs:
+        litellm_params_dict["model_id"] = kwargs["model_id"]
+
+    litellm_logging_obj.update_environment_variables(
+        model=model,
+        litellm_params=litellm_params_dict,
+        optional_params={},
+        endpoint=endpoint,
+        custom_llm_provider=custom_llm_provider,
+        request_data=data if data else json,
+    )
+
+    provider_config = cast(
+        Optional["BasePassthroughConfig"], kwargs.get("provider_config")
+    ) or ProviderConfigManager.get_provider_passthrough_config(
+        provider=LlmProviders(custom_llm_provider),
+        model=model,
+    )
+    if provider_config is None:
+        raise Exception(f"Provider {custom_llm_provider} not found")
+
+    updated_url, base_target_url = provider_config.get_complete_url(
+        api_base=api_base,
+        api_key=api_key,
+        model=model,
+        endpoint=endpoint,
+        request_query_params=request_query_params,
+        litellm_params=litellm_params_dict,
+    )
+
+    # [TODO: Refactor to bedrockpassthroughconfig] need to encode the id of application-inference-profile for bedrock
+    if custom_llm_provider == "bedrock" and "application-inference-profile" in endpoint:
+        encoded_url_str = CommonUtils.encode_bedrock_runtime_modelid_arn(
+            str(updated_url)
+        )
+        updated_url = httpx.URL(encoded_url_str)
+
+    # Add or update query parameters
+    provider_api_key = provider_config.get_api_key(api_key)
+
+    auth_headers = provider_config.validate_environment(
+        headers={},
+        model=model,
+        messages=[],
+        optional_params={},
+        litellm_params=litellm_params_dict,
+        api_key=provider_api_key,
+        api_base=base_target_url,
+    )
+
+    headers = BasePassthroughUtils.forward_headers_from_request(
+        request_headers=request_headers or {},
+        headers=auth_headers,
+        forward_headers=False,
+    )
+
+    headers, signed_json_body = provider_config.sign_request(
+        headers=headers,
+        litellm_params=litellm_params_dict,
+        request_data=data if data else json,
+        api_base=str(updated_url),
+        model=model,
+    )
+
+    ## SWAP MODEL IN JSON BODY [TODO: REFACTOR TO A provider_config.transform_request method]
+    if json and isinstance(json, dict) and "model" in json:
+        json["model"] = model
+
+    request = client.client.build_request(
+        method=method,
+        url=updated_url,
+        content=signed_json_body if signed_json_body is not None else content,
+        data=data if (signed_json_body is None and content is None) else None,
+        files=files,
+        json=json if (signed_json_body is None and content is None) else None,
+        params=params,
+        headers=headers,
+        cookies=cookies,
+    )
+
+    ## IS STREAMING REQUEST
+    is_streaming_request = provider_config.is_streaming_request(
+        endpoint=endpoint,
+        request_data=data or json or {},
+    )
+
+    # Update logging object with streaming status
+    litellm_logging_obj.stream = is_streaming_request
+
+    ## LOGGING PRE-CALL
+    request_data = data if data else json
+    litellm_logging_obj.pre_call(
+        input=request_data,
+        api_key=provider_api_key,
+        additional_args={
+            "complete_input_dict": request_data,
+            "api_base": str(updated_url),
+            "headers": headers,
+        },
+    )
+
+    try:
+        if _is_async:
+            # Return the coroutine to be awaited by the caller
+            return _async_passthrough_request(
+                client=client,
+                request=request,
+                is_streaming_request=is_streaming_request,
+                litellm_logging_obj=litellm_logging_obj,
+                provider_config=provider_config,
+            )
+        else:
+            # Sync path - client.client.send returns Response directly
+            response: httpx.Response = client.client.send(request=request, stream=is_streaming_request)  # type: ignore
+            response.raise_for_status()
+
+            if (
+                hasattr(response, "iter_bytes") and is_streaming_request
+            ):  # yield the chunk, so we can store it in the logging object
+                return _sync_streaming(response, litellm_logging_obj, provider_config)
+            else:
+                # For non-streaming responses, yield the entire response
+                return response
+    except Exception as e:
+        if provider_config is None:
+            raise e
+        raise base_llm_http_handler._handle_error(
+            e=e,
+            provider_config=provider_config,
+        )
+
+
+async def _async_passthrough_request(
+    client: Union[HTTPHandler, AsyncHTTPHandler],
+    request: httpx.Request,
+    is_streaming_request: bool,
+    litellm_logging_obj: "LiteLLMLoggingObj",
+    provider_config: "BasePassthroughConfig",
+) -> Union[httpx.Response, AsyncGenerator[Any, Any]]:
+    """
+    Handle async passthrough requests.
+    Uses async client to send request and properly handles streaming.
+    """
+    # client.client.send returns a coroutine for async clients
+    response_result = client.client.send(request=request, stream=is_streaming_request)
+
+    # Check if it's a coroutine and await it
+    if asyncio.iscoroutine(response_result):
+        if is_streaming_request:
+            # Pass the coroutine to _async_streaming which will await it
+            return _async_streaming(
+                response=response_result,
+                litellm_logging_obj=litellm_logging_obj,
+                provider_config=provider_config,
+            )
+        else:
+            response = await response_result
+            await response.aread()
+            response.raise_for_status()
+            return response
+    else:
+        # Fallback for sync-like behavior (shouldn't happen in async path)
+        raise Exception("Expected coroutine from async client")
+
+
+def _sync_streaming(
+    response: httpx.Response,
+    litellm_logging_obj: "LiteLLMLoggingObj",
+    provider_config: "BasePassthroughConfig",
+):
+    from litellm.utils import executor
+
+    try:
+        raw_bytes: List[bytes] = []
+        for chunk in response.iter_bytes():  # type: ignore
+            raw_bytes.append(chunk)
+            yield chunk
+
+        executor.submit(
+            litellm_logging_obj.flush_passthrough_collected_chunks,
+            raw_bytes=raw_bytes,
+            provider_config=provider_config,
+        )
+    except Exception as e:
+        raise e
+
+
+async def _async_streaming(
+    response: Coroutine[Any, Any, httpx.Response],
+    litellm_logging_obj: "LiteLLMLoggingObj",
+    provider_config: "BasePassthroughConfig",
+):
+    iter_response = await response
+    try:
+        iter_response.raise_for_status()
+        raw_bytes: List[bytes] = []
+
+        async for chunk in iter_response.aiter_bytes():  # type: ignore
+            raw_bytes.append(chunk)
+            yield chunk
+
+        asyncio.create_task(
+            litellm_logging_obj.async_flush_passthrough_collected_chunks(
+                raw_bytes=raw_bytes,
+                provider_config=provider_config,
+            )
+        )
+    except Exception:
+        try:
+            await iter_response.aclose()
+        except Exception:
+            pass
+        raise