chore: initial snapshot for gitea/github upload

2026-03-26 16:04:46 +08:00
commit a699a1ac98
3497 changed files with 1586237 additions and 0 deletions
--- a/llm-gateway-competitors/litellm-wheel-src/litellm/llms/deepinfra/chat/transformation.py
+++ b/llm-gateway-competitors/litellm-wheel-src/litellm/llms/deepinfra/chat/transformation.py
@@ -0,0 +1,212 @@
+import json
+from typing import Any, Coroutine, List, Literal, Optional, Tuple, Union, cast, overload
+
+import litellm
+from litellm.constants import MIN_NON_ZERO_TEMPERATURE
+from litellm.llms.openai.chat.gpt_transformation import OpenAIGPTConfig
+from litellm.secret_managers.main import get_secret_str
+from litellm.types.llms.openai import AllMessageValues
+
+
+class DeepInfraConfig(OpenAIGPTConfig):
+    """
+    Reference: https://deepinfra.com/docs/advanced/openai_api
+
+    The class `DeepInfra` provides configuration for the DeepInfra's Chat Completions API interface. Below are the parameters:
+    """
+
+    @property
+    def custom_llm_provider(self) -> Optional[str]:
+        return "deepinfra"
+
+    frequency_penalty: Optional[int] = None
+    function_call: Optional[Union[str, dict]] = None
+    functions: Optional[list] = None
+    logit_bias: Optional[dict] = None
+    max_tokens: Optional[int] = None
+    n: Optional[int] = None
+    presence_penalty: Optional[int] = None
+    stop: Optional[Union[str, list]] = None
+    temperature: Optional[int] = None
+    top_p: Optional[int] = None
+    response_format: Optional[dict] = None
+    tools: Optional[list] = None
+    tool_choice: Optional[Union[str, dict]] = None
+
+    def __init__(
+        self,
+        frequency_penalty: Optional[int] = None,
+        function_call: Optional[Union[str, dict]] = None,
+        functions: Optional[list] = None,
+        logit_bias: Optional[dict] = None,
+        max_tokens: Optional[int] = None,
+        n: Optional[int] = None,
+        presence_penalty: Optional[int] = None,
+        stop: Optional[Union[str, list]] = None,
+        temperature: Optional[int] = None,
+        top_p: Optional[int] = None,
+        response_format: Optional[dict] = None,
+        tools: Optional[list] = None,
+        tool_choice: Optional[Union[str, dict]] = None,
+    ) -> None:
+        locals_ = locals().copy()
+        for key, value in locals_.items():
+            if key != "self" and value is not None:
+                setattr(self.__class__, key, value)
+
+    @classmethod
+    def get_config(cls):
+        return super().get_config()
+
+    def get_supported_openai_params(self, model: str):
+        supported_openai_params = [
+            "stream",
+            "frequency_penalty",
+            "function_call",
+            "functions",
+            "logit_bias",
+            "max_tokens",
+            "max_completion_tokens",
+            "n",
+            "presence_penalty",
+            "stop",
+            "temperature",
+            "top_p",
+            "response_format",
+            "tools",
+            "tool_choice",
+        ]
+
+        if litellm.supports_reasoning(
+            model=model,
+            custom_llm_provider=self.custom_llm_provider,
+        ):
+            supported_openai_params.append("reasoning_effort")
+        return supported_openai_params
+
+    def map_openai_params(
+        self,
+        non_default_params: dict,
+        optional_params: dict,
+        model: str,
+        drop_params: bool,
+    ) -> dict:
+        supported_openai_params = self.get_supported_openai_params(model=model)
+        for param, value in non_default_params.items():
+            if (
+                param == "temperature"
+                and value == 0
+                and model == "mistralai/Mistral-7B-Instruct-v0.1"
+            ):  # this model does no support temperature == 0
+                value = MIN_NON_ZERO_TEMPERATURE  # close to 0
+            if param == "tool_choice":
+                if (
+                    value != "auto" and value != "none"
+                ):  # https://deepinfra.com/docs/advanced/function_calling
+                    ## UNSUPPORTED TOOL CHOICE VALUE
+                    if litellm.drop_params is True or drop_params is True:
+                        value = None
+                    else:
+                        raise litellm.utils.UnsupportedParamsError(
+                            message="Deepinfra doesn't support tool_choice={}. To drop unsupported openai params from the call, set `litellm.drop_params = True`".format(
+                                value
+                            ),
+                            status_code=400,
+                        )
+            elif param == "max_completion_tokens":
+                optional_params["max_tokens"] = value
+            elif param in supported_openai_params:
+                if value is not None:
+                    optional_params[param] = value
+        return optional_params
+
+    def _transform_tool_message_content(
+        self, messages: List[AllMessageValues]
+    ) -> List[AllMessageValues]:
+        """
+        Transform tool message content from array to string format for DeepInfra compatibility.
+
+        DeepInfra requires tool message content to be a string, not an array.
+        This method converts tool message content from array format to string format.
+
+        Example transformation:
+        - Input:  {"role": "tool", "content": [{"type": "text", "text": "20"}]}
+        - Output: {"role": "tool", "content": "20"}
+
+        Or if content is complex:
+        - Input:  {"role": "tool", "content": [{"type": "text", "text": "result"}]}
+        - Output: {"role": "tool", "content": "[{\"type\": \"text\", \"text\": \"result\"}]"}
+        """
+        for message in messages:
+            if message.get("role") == "tool":
+                content = message.get("content")
+
+                # If content is a list/array, convert it to string
+                if isinstance(content, list):
+                    # Check if it's a simple single text item
+                    if (
+                        len(content) == 1
+                        and isinstance(content[0], dict)
+                        and content[0].get("type") == "text"
+                        and "text" in content[0]
+                    ):
+                        # Extract just the text value for simple cases
+                        message["content"] = content[0]["text"]
+                    else:
+                        # For complex content, serialize the entire array as JSON string
+                        message["content"] = json.dumps(content)
+
+        return messages
+
+    @overload
+    def _transform_messages(
+        self, messages: List[AllMessageValues], model: str, is_async: Literal[True]
+    ) -> Coroutine[Any, Any, List[AllMessageValues]]:
+        ...
+
+    @overload
+    def _transform_messages(
+        self,
+        messages: List[AllMessageValues],
+        model: str,
+        is_async: Literal[False] = False,
+    ) -> List[AllMessageValues]:
+        ...
+
+    def _transform_messages(
+        self, messages: List[AllMessageValues], model: str, is_async: bool = False
+    ) -> Union[List[AllMessageValues], Coroutine[Any, Any, List[AllMessageValues]]]:
+        """
+        Transform messages for DeepInfra compatibility.
+        Handles both sync and async transformations.
+        """
+        if is_async:
+            # For async case, create an async function that awaits parent and applies our transformation
+            async def _async_transform():
+                # Call parent with is_async=True (literal) for async case
+                parent_result = super(DeepInfraConfig, self)._transform_messages(
+                    messages=messages, model=model, is_async=cast(Literal[True], True)
+                )
+                transformed_messages = await parent_result
+                return self._transform_tool_message_content(transformed_messages)
+
+            return _async_transform()
+        else:
+            # Call parent with is_async=False (literal) for sync case
+            parent_result = super()._transform_messages(
+                messages=messages, model=model, is_async=cast(Literal[False], False)
+            )
+            # For sync case, parent_result is already the transformed messages
+            return self._transform_tool_message_content(parent_result)
+
+    def _get_openai_compatible_provider_info(
+        self, api_base: Optional[str], api_key: Optional[str]
+    ) -> Tuple[Optional[str], Optional[str]]:
+        # deepinfra is openai compatible, we just need to set this to custom_openai and have the api_base be https://api.endpoints.anyscale.com/v1
+        api_base = (
+            api_base
+            or get_secret_str("DEEPINFRA_API_BASE")
+            or "https://api.deepinfra.com/v1/openai"
+        )
+        dynamic_api_key = api_key or get_secret_str("DEEPINFRA_API_KEY")
+        return api_base, dynamic_api_key
--- a/llm-gateway-competitors/litellm-wheel-src/litellm/llms/deepinfra/rerank/transformation.py
+++ b/llm-gateway-competitors/litellm-wheel-src/litellm/llms/deepinfra/rerank/transformation.py
@@ -0,0 +1,245 @@
+"""
+Translate between Cohere's `/rerank` format and Deepinfra's `/rerank` format. 
+"""
+
+from typing import Any, Dict, List, Optional, Union
+
+import httpx
+
+from litellm._uuid import uuid
+from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
+from litellm.llms.base_llm.rerank.transformation import (
+    BaseLLMException,
+    BaseRerankConfig,
+)
+from litellm.secret_managers.main import get_secret_str
+from litellm.types.rerank import (
+    OptionalRerankParams,
+    RerankBilledUnits,
+    RerankResponse,
+    RerankResponseMeta,
+    RerankResponseResult,
+    RerankTokens,
+)
+
+
+class DeepinfraRerankConfig(BaseRerankConfig):
+    """
+    Deepinfra Rerank - Follows the same Spec as Cohere Rerank
+    """
+
+    def get_complete_url(
+        self,
+        api_base: Optional[str],
+        model: str,
+        optional_params: Optional[dict] = None,
+    ) -> str:
+        """
+        Constructs the complete DeepInfra inference endpoint URL for rerank.
+
+        Args:
+            api_base (Optional[str]): The base URL for the DeepInfra API.
+            model (str): The model identifier.
+
+        Returns:
+            str: The complete URL for the DeepInfra rerank inference endpoint.
+
+        Raises:
+            ValueError: If api_base is None.
+        """
+        if not api_base:
+            raise ValueError(
+                "Deepinfra API Base is required. api_base=None. Set in call or via `DEEPINFRA_API_BASE` env var."
+            )
+
+        # Remove 'openai' from the base if present
+        api_base_clean = (
+            api_base.replace("openai", "") if "openai" in api_base else api_base
+        )
+
+        # Remove any trailing slashes for consistency, then add one
+        api_base_clean = api_base_clean.rstrip("/") + "/"
+
+        # Compose the full endpoint
+        return f"{api_base_clean}inference/{model}"
+
+    def validate_environment(
+        self,
+        headers: dict,
+        model: str,
+        api_key: Optional[str] = None,
+        optional_params: Optional[dict] = None,
+    ) -> dict:
+        if api_key is None:
+            api_key = get_secret_str("DEEPINFRA_API_KEY")
+
+        if api_key is None:
+            raise ValueError(
+                "Deepinfra API key is required. Please set 'DEEPINFRA_API_KEY' environment variable"
+            )
+
+        default_headers = {
+            "Authorization": f"Bearer {api_key}",
+            "accept": "application/json",
+            "content-type": "application/json",
+        }
+
+        # If 'Authorization' is provided in headers, it overrides the default.
+        if "Authorization" in headers:
+            default_headers["Authorization"] = headers["Authorization"]
+
+        # Merge other headers, overriding any default ones except Authorization
+        return {**default_headers, **headers}
+
+    def map_cohere_rerank_params(
+        self,
+        non_default_params: dict,
+        model: str,
+        drop_params: bool,
+        query: str,
+        documents: List[Union[str, Dict[str, Any]]],
+        custom_llm_provider: Optional[str] = None,
+        top_n: Optional[int] = None,
+        rank_fields: Optional[List[str]] = None,
+        return_documents: Optional[bool] = True,
+        max_chunks_per_doc: Optional[int] = None,
+        max_tokens_per_doc: Optional[int] = None,
+    ) -> Dict:
+        # Start with the basic parameters
+        optional_rerank_params = {}
+        if query:
+            optional_rerank_params["queries"] = [query] * len(
+                documents
+            )  # Deepinfra rerank requires queries to be of same length as documents
+
+        if non_default_params is not None:
+            for k, v in non_default_params.items():
+                if k == "queries" and v is not None:
+                    # This should override the query parameter if it is provided
+                    optional_rerank_params["queries"] = v
+                elif k == "documents" and v is not None:
+                    optional_rerank_params["documents"] = v
+                elif k == "service_tier" and v is not None:
+                    optional_rerank_params["service_tier"] = v
+                elif k == "instruction" and v is not None:
+                    optional_rerank_params["instruction"] = v
+                elif k == "webhook" and v is not None:
+                    optional_rerank_params["webhook"] = v
+        return OptionalRerankParams(**optional_rerank_params)  # type: ignore
+
+    def transform_rerank_request(
+        self,
+        model: str,
+        optional_rerank_params: Dict,
+        headers: dict,
+    ) -> dict:
+        # Convert OptionalRerankParams to dict as expected by parent class
+        if optional_rerank_params is None:
+            return {}
+        return dict(optional_rerank_params)
+
+    def transform_rerank_response(
+        self,
+        model: str,
+        raw_response: httpx.Response,
+        model_response: RerankResponse,
+        logging_obj: LiteLLMLoggingObj,
+        api_key: Optional[str] = None,
+        request_data: dict = {},
+        optional_params: dict = {},
+        litellm_params: dict = {},
+    ) -> RerankResponse:
+        try:
+            response_json = raw_response.json()
+            logging_obj.post_call(original_response=raw_response.text)
+
+            # Extract the scores from the response
+            scores = response_json.get("scores", [])
+            input_tokens = response_json.get("input_tokens", 0)
+            request_id = response_json.get("request_id")
+
+            # Create inference status information
+            inference_status = response_json.get("inference_status", {})
+            status = inference_status.get("status", "unknown")
+            runtime_ms = inference_status.get("runtime_ms", 0)
+            cost = inference_status.get("cost", 0.0)
+            tokens_generated = inference_status.get("tokens_generated", 0)
+            tokens_input = inference_status.get("tokens_input", 0)
+
+            # Create RerankResponse
+            results = []
+            for i, score in enumerate(scores):
+                results.append(
+                    RerankResponseResult(index=i, relevance_score=float(score))
+                )
+
+            # Create metadata for the response
+            tokens = RerankTokens(
+                input_tokens=input_tokens,
+                output_tokens=0,  # DeepInfra doesn't provide output tokens for rerank
+            )
+            billed_units = RerankBilledUnits(total_tokens=input_tokens)
+            meta = RerankResponseMeta(tokens=tokens, billed_units=billed_units)
+
+            rerank_response = RerankResponse(
+                id=request_id or str(uuid.uuid4()), results=results, meta=meta
+            )
+
+            # Store additional information in hidden params
+            rerank_response._hidden_params = {
+                "status": status,
+                "runtime_ms": runtime_ms,
+                "cost": cost,
+                "tokens_generated": tokens_generated,
+                "tokens_input": tokens_input,
+                "model": model,
+            }
+
+            return rerank_response
+
+        except Exception:
+            # If there's an error parsing the response, fall back to the parent implementation
+            rerank_response = super().transform_rerank_response(
+                model=model,
+                raw_response=raw_response,
+                model_response=model_response,
+                logging_obj=logging_obj,
+                api_key=api_key,
+                request_data=request_data,
+                optional_params=optional_params,
+                litellm_params=litellm_params,
+            )
+
+            rerank_response._hidden_params["model"] = model
+            return rerank_response
+
+    def get_supported_cohere_rerank_params(self, model: str) -> list:
+        return ["query", "documents"]
+
+    def get_error_class(
+        self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers]
+    ) -> BaseLLMException:
+        # Deepinfra errors may come as JSON: {"detail": {"error": "..."}}
+        import json
+
+        # Try to extract a more specific error message if possible
+        try:
+            error_data = error_message
+            if isinstance(error_message, str):
+                error_data = json.loads(error_message)
+            if isinstance(error_data, dict):
+                # Check for {"detail": {"error": "..."}}
+                detail = error_data.get("detail")
+                if isinstance(detail, dict) and "error" in detail:
+                    error_message = detail["error"]
+                elif isinstance(detail, str):
+                    error_message = detail
+        except Exception:
+            # If parsing fails, just use the original error_message
+            pass
+
+        raise BaseLLMException(
+            status_code=status_code,
+            message=error_message,
+            headers=headers,
+        )