chore: initial snapshot for gitea/github upload

2026-03-26 16:04:46 +08:00
commit a699a1ac98
3497 changed files with 1586237 additions and 0 deletions
--- a/llm-gateway-competitors/litellm-wheel-src/litellm/passthrough/README.md
+++ b/llm-gateway-competitors/litellm-wheel-src/litellm/passthrough/README.md
@@ -0,0 +1,118 @@
+This makes it easier to pass through requests to the LLM APIs.
+
+E.g. Route to VLLM's `/classify` endpoint:
+
+
+## SDK (Basic)
+
+```python
+import litellm
+
+
+response = litellm.llm_passthrough_route(
+    model="hosted_vllm/papluca/xlm-roberta-base-language-detection",
+    method="POST",
+    endpoint="classify",
+    api_base="http://localhost:8090",
+    api_key=None,
+    json={
+        "model": "swapped-for-litellm-model",
+        "input": "Hello, world!",
+    }
+)
+
+print(response)
+```
+
+## SDK (Router)
+
+```python
+import asyncio
+from litellm import Router
+
+router = Router(
+    model_list=[
+        {
+            "model_name": "roberta-base-language-detection",
+            "litellm_params": {
+                "model": "hosted_vllm/papluca/xlm-roberta-base-language-detection",
+                "api_base": "http://localhost:8090", 
+            }
+        }
+    ]
+)
+
+request_data = {
+    "model": "roberta-base-language-detection",
+    "method": "POST",
+    "endpoint": "classify",
+    "api_base": "http://localhost:8090",
+    "api_key": None,
+    "json": {
+        "model": "roberta-base-language-detection",
+        "input": "Hello, world!",
+    }
+}
+
+async def main():
+    response = await router.allm_passthrough_route(**request_data)
+    print(response)
+
+if __name__ == "__main__":
+    asyncio.run(main())
+```
+
+## PROXY 
+
+1. Setup config.yaml 
+
+```yaml
+model_list:
+  - model_name: roberta-base-language-detection
+    litellm_params:
+      model: hosted_vllm/papluca/xlm-roberta-base-language-detection
+      api_base: http://localhost:8090
+```
+
+2. Run the proxy
+
+```bash
+litellm proxy --config config.yaml
+
+# RUNNING on http://localhost:4000
+```
+
+3. Use the proxy
+
+```bash
+curl -X POST http://localhost:4000/vllm/classify \
+-H "Content-Type: application/json" \
+-H "Authorization: Bearer <your-api-key>" \
+-d '{"model": "roberta-base-language-detection", "input": "Hello, world!"}' \
+```
+
+# How to add a provider for passthrough
+
+See [VLLMModelInfo](https://github.com/BerriAI/litellm/blob/main/litellm/llms/vllm/common_utils.py) for an example.
+
+1. Inherit from BaseModelInfo
+
+```python
+from litellm.llms.base_llm.base_utils import BaseLLMModelInfo
+
+class VLLMModelInfo(BaseLLMModelInfo):
+    pass
+```
+
+2. Register the provider in the ProviderConfigManager.get_provider_model_info
+
+```python
+from litellm.utils import ProviderConfigManager
+from litellm.types.utils import LlmProviders
+
+provider_config = ProviderConfigManager.get_provider_model_info(
+    model="my-test-model", provider=LlmProviders.VLLM
+)
+
+print(provider_config)
+```
--- a/llm-gateway-competitors/litellm-wheel-src/litellm/passthrough/init.py
+++ b/llm-gateway-competitors/litellm-wheel-src/litellm/passthrough/init.py
@@ -0,0 +1,8 @@
+from .main import allm_passthrough_route, llm_passthrough_route
+from .utils import BasePassthroughUtils
+
+__all__ = [
+    "allm_passthrough_route",
+    "llm_passthrough_route",
+    "BasePassthroughUtils",
+]
--- a/llm-gateway-competitors/litellm-wheel-src/litellm/passthrough/main.py
+++ b/llm-gateway-competitors/litellm-wheel-src/litellm/passthrough/main.py
@@ -0,0 +1,433 @@
+"""
+This module is used to pass through requests to the LLM APIs.
+"""
+
+import asyncio
+import contextvars
+from functools import partial
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    AsyncGenerator,
+    Coroutine,
+    Generator,
+    List,
+    Optional,
+    Union,
+    cast,
+)
+
+import httpx
+from httpx._types import CookieTypes, QueryParamTypes, RequestFiles
+
+import litellm
+from litellm.litellm_core_utils.get_llm_provider_logic import get_llm_provider
+from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
+from litellm.llms.custom_httpx.llm_http_handler import BaseLLMHTTPHandler
+from litellm.passthrough.utils import CommonUtils
+from litellm.utils import client
+
+base_llm_http_handler = BaseLLMHTTPHandler()
+from .utils import BasePassthroughUtils
+
+if TYPE_CHECKING:
+    from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
+    from litellm.llms.base_llm.passthrough.transformation import BasePassthroughConfig
+
+
+@client
+async def allm_passthrough_route(
+    *,
+    method: str,
+    endpoint: str,
+    model: str,
+    custom_llm_provider: Optional[str] = None,
+    api_base: Optional[str] = None,
+    api_key: Optional[str] = None,
+    request_query_params: Optional[dict] = None,
+    request_headers: Optional[dict] = None,
+    content: Optional[Any] = None,
+    data: Optional[dict] = None,
+    files: Optional[RequestFiles] = None,
+    json: Optional[Any] = None,
+    params: Optional[QueryParamTypes] = None,
+    cookies: Optional[CookieTypes] = None,
+    client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
+    **kwargs,
+) -> Union[httpx.Response, AsyncGenerator[Any, Any]]:
+    """
+    Async: Reranks a list of documents based on their relevance to the query
+    """
+    try:
+        loop = asyncio.get_event_loop()
+        kwargs["allm_passthrough_route"] = True
+
+        model, custom_llm_provider, api_key, api_base = get_llm_provider(
+            model=model,
+            custom_llm_provider=custom_llm_provider,
+            api_base=api_base,
+            api_key=api_key,
+        )
+
+        from litellm.types.utils import LlmProviders
+        from litellm.utils import ProviderConfigManager
+
+        provider_config = cast(
+            Optional["BasePassthroughConfig"], kwargs.get("provider_config")
+        ) or ProviderConfigManager.get_provider_passthrough_config(
+            provider=LlmProviders(custom_llm_provider),
+            model=model,
+        )
+
+        if provider_config is None:
+            raise Exception(f"Provider {custom_llm_provider} not found")
+
+        func = partial(
+            llm_passthrough_route,
+            method=method,
+            endpoint=endpoint,
+            model=model,
+            custom_llm_provider=custom_llm_provider,
+            api_base=api_base,
+            api_key=api_key,
+            request_query_params=request_query_params,
+            request_headers=request_headers,
+            content=content,
+            data=data,
+            files=files,
+            json=json,
+            params=params,
+            cookies=cookies,
+            client=client,
+            **kwargs,
+        )
+
+        ctx = contextvars.copy_context()
+        func_with_context = partial(ctx.run, func)
+        init_response = await loop.run_in_executor(None, func_with_context)
+
+        # Since allm_passthrough_route=True, we always get a coroutine from _async_passthrough_request
+        if asyncio.iscoroutine(init_response):
+            response = await init_response
+
+            # Only call raise_for_status if it's a Response object (not a generator)
+            if isinstance(response, httpx.Response):
+                response.raise_for_status()
+
+            return response
+        else:
+            # This shouldn't happen when allm_passthrough_route=True, but handle it for type safety
+            raise Exception("Expected coroutine from async passthrough route")
+
+    except httpx.HTTPStatusError as e:
+        # For HTTP errors, re-raise as-is to preserve the original error details
+        # The caller (e.g., proxy layer) can handle conversion to appropriate response format
+        raise e
+    except Exception as e:
+        # For other exceptions, use provider-specific error handling
+        from litellm.types.utils import LlmProviders
+        from litellm.utils import ProviderConfigManager
+
+        # Get the provider using the same logic as llm_passthrough_route
+        _, resolved_custom_llm_provider, _, _ = get_llm_provider(
+            model=model,
+            custom_llm_provider=custom_llm_provider,
+            api_base=api_base,
+            api_key=api_key,
+        )
+
+        # Get provider config if available
+        provider_config = None
+        if resolved_custom_llm_provider:
+            try:
+                provider_config = cast(
+                    Optional["BasePassthroughConfig"], kwargs.get("provider_config")
+                ) or ProviderConfigManager.get_provider_passthrough_config(
+                    provider=LlmProviders(resolved_custom_llm_provider),
+                    model=model,
+                )
+            except Exception:
+                # If we can't get provider config, pass None
+                pass
+
+        if provider_config is None:
+            # If no provider config available, raise the original exception
+            raise e
+
+        raise base_llm_http_handler._handle_error(
+            e=e,
+            provider_config=provider_config,
+        )
+
+
+@client
+def llm_passthrough_route(
+    *,
+    method: str,
+    endpoint: str,
+    model: str,
+    custom_llm_provider: Optional[str] = None,
+    api_base: Optional[str] = None,
+    api_key: Optional[str] = None,
+    request_query_params: Optional[dict] = None,
+    request_headers: Optional[dict] = None,
+    allm_passthrough_route: bool = False,
+    content: Optional[Any] = None,
+    data: Optional[dict] = None,
+    files: Optional[RequestFiles] = None,
+    json: Optional[Any] = None,
+    params: Optional[QueryParamTypes] = None,
+    cookies: Optional[CookieTypes] = None,
+    client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
+    **kwargs,
+) -> Union[
+    httpx.Response,
+    Coroutine[Any, Any, httpx.Response],
+    Coroutine[Any, Any, Union[httpx.Response, AsyncGenerator[Any, Any]]],
+    Generator[Any, Any, Any],
+    AsyncGenerator[Any, Any],
+]:
+    """
+    Pass through requests to the LLM APIs.
+
+    Step 1. Build the request
+    Step 2. Send the request
+    Step 3. Return the response
+    """
+    from litellm.litellm_core_utils.get_litellm_params import get_litellm_params
+    from litellm.types.utils import LlmProviders
+    from litellm.utils import ProviderConfigManager
+
+    _is_async = allm_passthrough_route
+
+    if client is None:
+        if _is_async:
+            client = litellm.module_level_aclient
+        else:
+            client = litellm.module_level_client
+
+    litellm_logging_obj = cast("LiteLLMLoggingObj", kwargs.get("litellm_logging_obj"))
+
+    model, custom_llm_provider, api_key, api_base = get_llm_provider(
+        model=model,
+        custom_llm_provider=custom_llm_provider,
+        api_base=api_base,
+        api_key=api_key,
+    )
+
+    litellm_params_dict = get_litellm_params(**kwargs)
+
+    # Add model_id to litellm_params if present in kwargs (for Bedrock Application Inference Profiles)
+    if "model_id" in kwargs:
+        litellm_params_dict["model_id"] = kwargs["model_id"]
+
+    litellm_logging_obj.update_environment_variables(
+        model=model,
+        litellm_params=litellm_params_dict,
+        optional_params={},
+        endpoint=endpoint,
+        custom_llm_provider=custom_llm_provider,
+        request_data=data if data else json,
+    )
+
+    provider_config = cast(
+        Optional["BasePassthroughConfig"], kwargs.get("provider_config")
+    ) or ProviderConfigManager.get_provider_passthrough_config(
+        provider=LlmProviders(custom_llm_provider),
+        model=model,
+    )
+    if provider_config is None:
+        raise Exception(f"Provider {custom_llm_provider} not found")
+
+    updated_url, base_target_url = provider_config.get_complete_url(
+        api_base=api_base,
+        api_key=api_key,
+        model=model,
+        endpoint=endpoint,
+        request_query_params=request_query_params,
+        litellm_params=litellm_params_dict,
+    )
+
+    # [TODO: Refactor to bedrockpassthroughconfig] need to encode the id of application-inference-profile for bedrock
+    if custom_llm_provider == "bedrock" and "application-inference-profile" in endpoint:
+        encoded_url_str = CommonUtils.encode_bedrock_runtime_modelid_arn(
+            str(updated_url)
+        )
+        updated_url = httpx.URL(encoded_url_str)
+
+    # Add or update query parameters
+    provider_api_key = provider_config.get_api_key(api_key)
+
+    auth_headers = provider_config.validate_environment(
+        headers={},
+        model=model,
+        messages=[],
+        optional_params={},
+        litellm_params=litellm_params_dict,
+        api_key=provider_api_key,
+        api_base=base_target_url,
+    )
+
+    headers = BasePassthroughUtils.forward_headers_from_request(
+        request_headers=request_headers or {},
+        headers=auth_headers,
+        forward_headers=False,
+    )
+
+    headers, signed_json_body = provider_config.sign_request(
+        headers=headers,
+        litellm_params=litellm_params_dict,
+        request_data=data if data else json,
+        api_base=str(updated_url),
+        model=model,
+    )
+
+    ## SWAP MODEL IN JSON BODY [TODO: REFACTOR TO A provider_config.transform_request method]
+    if json and isinstance(json, dict) and "model" in json:
+        json["model"] = model
+
+    request = client.client.build_request(
+        method=method,
+        url=updated_url,
+        content=signed_json_body if signed_json_body is not None else content,
+        data=data if (signed_json_body is None and content is None) else None,
+        files=files,
+        json=json if (signed_json_body is None and content is None) else None,
+        params=params,
+        headers=headers,
+        cookies=cookies,
+    )
+
+    ## IS STREAMING REQUEST
+    is_streaming_request = provider_config.is_streaming_request(
+        endpoint=endpoint,
+        request_data=data or json or {},
+    )
+
+    # Update logging object with streaming status
+    litellm_logging_obj.stream = is_streaming_request
+
+    ## LOGGING PRE-CALL
+    request_data = data if data else json
+    litellm_logging_obj.pre_call(
+        input=request_data,
+        api_key=provider_api_key,
+        additional_args={
+            "complete_input_dict": request_data,
+            "api_base": str(updated_url),
+            "headers": headers,
+        },
+    )
+
+    try:
+        if _is_async:
+            # Return the coroutine to be awaited by the caller
+            return _async_passthrough_request(
+                client=client,
+                request=request,
+                is_streaming_request=is_streaming_request,
+                litellm_logging_obj=litellm_logging_obj,
+                provider_config=provider_config,
+            )
+        else:
+            # Sync path - client.client.send returns Response directly
+            response: httpx.Response = client.client.send(request=request, stream=is_streaming_request)  # type: ignore
+            response.raise_for_status()
+
+            if (
+                hasattr(response, "iter_bytes") and is_streaming_request
+            ):  # yield the chunk, so we can store it in the logging object
+                return _sync_streaming(response, litellm_logging_obj, provider_config)
+            else:
+                # For non-streaming responses, yield the entire response
+                return response
+    except Exception as e:
+        if provider_config is None:
+            raise e
+        raise base_llm_http_handler._handle_error(
+            e=e,
+            provider_config=provider_config,
+        )
+
+
+async def _async_passthrough_request(
+    client: Union[HTTPHandler, AsyncHTTPHandler],
+    request: httpx.Request,
+    is_streaming_request: bool,
+    litellm_logging_obj: "LiteLLMLoggingObj",
+    provider_config: "BasePassthroughConfig",
+) -> Union[httpx.Response, AsyncGenerator[Any, Any]]:
+    """
+    Handle async passthrough requests.
+    Uses async client to send request and properly handles streaming.
+    """
+    # client.client.send returns a coroutine for async clients
+    response_result = client.client.send(request=request, stream=is_streaming_request)
+
+    # Check if it's a coroutine and await it
+    if asyncio.iscoroutine(response_result):
+        if is_streaming_request:
+            # Pass the coroutine to _async_streaming which will await it
+            return _async_streaming(
+                response=response_result,
+                litellm_logging_obj=litellm_logging_obj,
+                provider_config=provider_config,
+            )
+        else:
+            response = await response_result
+            await response.aread()
+            response.raise_for_status()
+            return response
+    else:
+        # Fallback for sync-like behavior (shouldn't happen in async path)
+        raise Exception("Expected coroutine from async client")
+
+
+def _sync_streaming(
+    response: httpx.Response,
+    litellm_logging_obj: "LiteLLMLoggingObj",
+    provider_config: "BasePassthroughConfig",
+):
+    from litellm.utils import executor
+
+    try:
+        raw_bytes: List[bytes] = []
+        for chunk in response.iter_bytes():  # type: ignore
+            raw_bytes.append(chunk)
+            yield chunk
+
+        executor.submit(
+            litellm_logging_obj.flush_passthrough_collected_chunks,
+            raw_bytes=raw_bytes,
+            provider_config=provider_config,
+        )
+    except Exception as e:
+        raise e
+
+
+async def _async_streaming(
+    response: Coroutine[Any, Any, httpx.Response],
+    litellm_logging_obj: "LiteLLMLoggingObj",
+    provider_config: "BasePassthroughConfig",
+):
+    iter_response = await response
+    try:
+        iter_response.raise_for_status()
+        raw_bytes: List[bytes] = []
+
+        async for chunk in iter_response.aiter_bytes():  # type: ignore
+            raw_bytes.append(chunk)
+            yield chunk
+
+        asyncio.create_task(
+            litellm_logging_obj.async_flush_passthrough_collected_chunks(
+                raw_bytes=raw_bytes,
+                provider_config=provider_config,
+            )
+        )
+    except Exception:
+        try:
+            await iter_response.aclose()
+        except Exception:
+            pass
+        raise
--- a/llm-gateway-competitors/litellm-wheel-src/litellm/passthrough/utils.py
+++ b/llm-gateway-competitors/litellm-wheel-src/litellm/passthrough/utils.py
@@ -0,0 +1,119 @@
+from typing import Dict, List, Mapping, Optional, Union
+from urllib.parse import parse_qs
+
+import httpx
+
+from litellm.constants import PASS_THROUGH_HEADER_PREFIX
+
+
+class BasePassthroughUtils:
+    @staticmethod
+    def get_merged_query_parameters(
+        existing_url: httpx.URL,
+        request_query_params: Mapping[str, Union[str, list]],
+        default_query_params: Optional[Dict[str, Union[str, list]]] = None,
+    ) -> Dict[str, Union[str, List[str]]]:
+        # Get the existing query params from the target URL
+        existing_query_string = existing_url.query.decode("utf-8")
+        existing_query_params = parse_qs(existing_query_string)
+
+        # parse_qs returns a dict where each value is a list, so let's flatten it
+        updated_existing_query_params = {
+            k: v[0] if len(v) == 1 else v for k, v in existing_query_params.items()
+        }
+
+        # Start with default query params (lowest priority)
+        merged_params = {}
+        if default_query_params:
+            merged_params.update(default_query_params)
+
+        # Override with existing URL query params (medium priority)
+        merged_params.update(updated_existing_query_params)
+
+        # Override with request query params (highest priority - client can override anything)
+        merged_params.update(request_query_params)
+
+        return merged_params
+
+    @staticmethod
+    def forward_headers_from_request(
+        request_headers: dict,
+        headers: dict,
+        forward_headers: Optional[bool] = False,
+    ):
+        """
+        Helper to forward headers from original request.
+
+        Also handles 'x-pass-' prefixed headers which are always forwarded
+        with the prefix stripped, regardless of forward_headers setting.
+        e.g., 'x-pass-anthropic-beta: value' becomes 'anthropic-beta: value'
+        """
+        if forward_headers is True:
+            # Header We Should NOT forward
+            request_headers.pop("content-length", None)
+            request_headers.pop("host", None)
+
+            # Combine request headers with custom headers
+            headers = {**request_headers, **headers}
+
+        # Always process x-pass- prefixed headers (strip prefix and forward)
+        for header_name, header_value in request_headers.items():
+            if header_name.lower().startswith(PASS_THROUGH_HEADER_PREFIX):
+                # Strip the 'x-pass-' prefix to get the actual header name
+                actual_header_name = header_name[len(PASS_THROUGH_HEADER_PREFIX) :]
+                headers[actual_header_name] = header_value
+
+        return headers
+
+
+class CommonUtils:
+    @staticmethod
+    def encode_bedrock_runtime_modelid_arn(endpoint: str) -> str:
+        """
+        Encodes any "/" found in the modelId of an AWS Bedrock Runtime Endpoint when arns are passed in.
+        - modelID value can be an ARN which contains slashes that SHOULD NOT be treated as path separators.
+        e.g endpoint: /model/<modelId>/invoke
+        <modelId> containing arns with slashes need to be encoded from
+            arn:aws:bedrock:ap-southeast-1:123456789012:application-inference-profile/abdefg12334 =>
+            arn:aws:bedrock:ap-southeast-1:123456789012:application-inference-profile%2Fabdefg12334
+        so that it is treated as one part of the path.
+        Otherwise, the encoded endpoint will return 500 error when passed to Bedrock endpoint.
+
+        See the apis in https://docs.aws.amazon.com/bedrock/latest/APIReference/API_Operations_Amazon_Bedrock_Runtime.html
+        for more details on the regex patterns of modelId which we use in the regex logic below.
+
+        Args:
+            endpoint (str): The original endpoint string which may contain ARNs that contain slashes.
+
+        Returns:
+            str: The endpoint with properly encoded ARN slashes
+        """
+        import re
+
+        # Early exit: if no ARN detected, return unchanged
+        if "arn:aws:" not in endpoint:
+            return endpoint
+
+        # Handle all patterns in one go - more efficient and cleaner
+        patterns = [
+            # Custom model with 2 slashes (order matters - do this first)
+            (r"(custom-model)/([a-z0-9.-]+)/([a-z0-9]+)", r"\1%2F\2%2F\3"),
+            # All other resource types with 1 slash
+            (r"(:application-inference-profile)/", r"\1%2F"),
+            (r"(:inference-profile)/", r"\1%2F"),
+            (r"(:foundation-model)/", r"\1%2F"),
+            (r"(:imported-model)/", r"\1%2F"),
+            (r"(:provisioned-model)/", r"\1%2F"),
+            (r"(:prompt)/", r"\1%2F"),
+            (r"(:endpoint)/", r"\1%2F"),
+            (r"(:prompt-router)/", r"\1%2F"),
+            (r"(:default-prompt-router)/", r"\1%2F"),
+        ]
+
+        for pattern, replacement in patterns:
+            # Check if pattern exists before applying regex (early exit optimization)
+            if re.search(pattern, endpoint):
+                endpoint = re.sub(pattern, replacement, endpoint)
+                break  # Exit after first match since each ARN has only one resource type
+
+        return endpoint