chore: initial snapshot for gitea/github upload
This commit is contained in:
@@ -0,0 +1,212 @@
|
||||
import json
|
||||
from typing import Any, Coroutine, List, Literal, Optional, Tuple, Union, cast, overload
|
||||
|
||||
import litellm
|
||||
from litellm.constants import MIN_NON_ZERO_TEMPERATURE
|
||||
from litellm.llms.openai.chat.gpt_transformation import OpenAIGPTConfig
|
||||
from litellm.secret_managers.main import get_secret_str
|
||||
from litellm.types.llms.openai import AllMessageValues
|
||||
|
||||
|
||||
class DeepInfraConfig(OpenAIGPTConfig):
|
||||
"""
|
||||
Reference: https://deepinfra.com/docs/advanced/openai_api
|
||||
|
||||
The class `DeepInfra` provides configuration for the DeepInfra's Chat Completions API interface. Below are the parameters:
|
||||
"""
|
||||
|
||||
@property
|
||||
def custom_llm_provider(self) -> Optional[str]:
|
||||
return "deepinfra"
|
||||
|
||||
frequency_penalty: Optional[int] = None
|
||||
function_call: Optional[Union[str, dict]] = None
|
||||
functions: Optional[list] = None
|
||||
logit_bias: Optional[dict] = None
|
||||
max_tokens: Optional[int] = None
|
||||
n: Optional[int] = None
|
||||
presence_penalty: Optional[int] = None
|
||||
stop: Optional[Union[str, list]] = None
|
||||
temperature: Optional[int] = None
|
||||
top_p: Optional[int] = None
|
||||
response_format: Optional[dict] = None
|
||||
tools: Optional[list] = None
|
||||
tool_choice: Optional[Union[str, dict]] = None
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
frequency_penalty: Optional[int] = None,
|
||||
function_call: Optional[Union[str, dict]] = None,
|
||||
functions: Optional[list] = None,
|
||||
logit_bias: Optional[dict] = None,
|
||||
max_tokens: Optional[int] = None,
|
||||
n: Optional[int] = None,
|
||||
presence_penalty: Optional[int] = None,
|
||||
stop: Optional[Union[str, list]] = None,
|
||||
temperature: Optional[int] = None,
|
||||
top_p: Optional[int] = None,
|
||||
response_format: Optional[dict] = None,
|
||||
tools: Optional[list] = None,
|
||||
tool_choice: Optional[Union[str, dict]] = None,
|
||||
) -> None:
|
||||
locals_ = locals().copy()
|
||||
for key, value in locals_.items():
|
||||
if key != "self" and value is not None:
|
||||
setattr(self.__class__, key, value)
|
||||
|
||||
@classmethod
|
||||
def get_config(cls):
|
||||
return super().get_config()
|
||||
|
||||
def get_supported_openai_params(self, model: str):
|
||||
supported_openai_params = [
|
||||
"stream",
|
||||
"frequency_penalty",
|
||||
"function_call",
|
||||
"functions",
|
||||
"logit_bias",
|
||||
"max_tokens",
|
||||
"max_completion_tokens",
|
||||
"n",
|
||||
"presence_penalty",
|
||||
"stop",
|
||||
"temperature",
|
||||
"top_p",
|
||||
"response_format",
|
||||
"tools",
|
||||
"tool_choice",
|
||||
]
|
||||
|
||||
if litellm.supports_reasoning(
|
||||
model=model,
|
||||
custom_llm_provider=self.custom_llm_provider,
|
||||
):
|
||||
supported_openai_params.append("reasoning_effort")
|
||||
return supported_openai_params
|
||||
|
||||
def map_openai_params(
|
||||
self,
|
||||
non_default_params: dict,
|
||||
optional_params: dict,
|
||||
model: str,
|
||||
drop_params: bool,
|
||||
) -> dict:
|
||||
supported_openai_params = self.get_supported_openai_params(model=model)
|
||||
for param, value in non_default_params.items():
|
||||
if (
|
||||
param == "temperature"
|
||||
and value == 0
|
||||
and model == "mistralai/Mistral-7B-Instruct-v0.1"
|
||||
): # this model does no support temperature == 0
|
||||
value = MIN_NON_ZERO_TEMPERATURE # close to 0
|
||||
if param == "tool_choice":
|
||||
if (
|
||||
value != "auto" and value != "none"
|
||||
): # https://deepinfra.com/docs/advanced/function_calling
|
||||
## UNSUPPORTED TOOL CHOICE VALUE
|
||||
if litellm.drop_params is True or drop_params is True:
|
||||
value = None
|
||||
else:
|
||||
raise litellm.utils.UnsupportedParamsError(
|
||||
message="Deepinfra doesn't support tool_choice={}. To drop unsupported openai params from the call, set `litellm.drop_params = True`".format(
|
||||
value
|
||||
),
|
||||
status_code=400,
|
||||
)
|
||||
elif param == "max_completion_tokens":
|
||||
optional_params["max_tokens"] = value
|
||||
elif param in supported_openai_params:
|
||||
if value is not None:
|
||||
optional_params[param] = value
|
||||
return optional_params
|
||||
|
||||
def _transform_tool_message_content(
|
||||
self, messages: List[AllMessageValues]
|
||||
) -> List[AllMessageValues]:
|
||||
"""
|
||||
Transform tool message content from array to string format for DeepInfra compatibility.
|
||||
|
||||
DeepInfra requires tool message content to be a string, not an array.
|
||||
This method converts tool message content from array format to string format.
|
||||
|
||||
Example transformation:
|
||||
- Input: {"role": "tool", "content": [{"type": "text", "text": "20"}]}
|
||||
- Output: {"role": "tool", "content": "20"}
|
||||
|
||||
Or if content is complex:
|
||||
- Input: {"role": "tool", "content": [{"type": "text", "text": "result"}]}
|
||||
- Output: {"role": "tool", "content": "[{\"type\": \"text\", \"text\": \"result\"}]"}
|
||||
"""
|
||||
for message in messages:
|
||||
if message.get("role") == "tool":
|
||||
content = message.get("content")
|
||||
|
||||
# If content is a list/array, convert it to string
|
||||
if isinstance(content, list):
|
||||
# Check if it's a simple single text item
|
||||
if (
|
||||
len(content) == 1
|
||||
and isinstance(content[0], dict)
|
||||
and content[0].get("type") == "text"
|
||||
and "text" in content[0]
|
||||
):
|
||||
# Extract just the text value for simple cases
|
||||
message["content"] = content[0]["text"]
|
||||
else:
|
||||
# For complex content, serialize the entire array as JSON string
|
||||
message["content"] = json.dumps(content)
|
||||
|
||||
return messages
|
||||
|
||||
@overload
|
||||
def _transform_messages(
|
||||
self, messages: List[AllMessageValues], model: str, is_async: Literal[True]
|
||||
) -> Coroutine[Any, Any, List[AllMessageValues]]:
|
||||
...
|
||||
|
||||
@overload
|
||||
def _transform_messages(
|
||||
self,
|
||||
messages: List[AllMessageValues],
|
||||
model: str,
|
||||
is_async: Literal[False] = False,
|
||||
) -> List[AllMessageValues]:
|
||||
...
|
||||
|
||||
def _transform_messages(
|
||||
self, messages: List[AllMessageValues], model: str, is_async: bool = False
|
||||
) -> Union[List[AllMessageValues], Coroutine[Any, Any, List[AllMessageValues]]]:
|
||||
"""
|
||||
Transform messages for DeepInfra compatibility.
|
||||
Handles both sync and async transformations.
|
||||
"""
|
||||
if is_async:
|
||||
# For async case, create an async function that awaits parent and applies our transformation
|
||||
async def _async_transform():
|
||||
# Call parent with is_async=True (literal) for async case
|
||||
parent_result = super(DeepInfraConfig, self)._transform_messages(
|
||||
messages=messages, model=model, is_async=cast(Literal[True], True)
|
||||
)
|
||||
transformed_messages = await parent_result
|
||||
return self._transform_tool_message_content(transformed_messages)
|
||||
|
||||
return _async_transform()
|
||||
else:
|
||||
# Call parent with is_async=False (literal) for sync case
|
||||
parent_result = super()._transform_messages(
|
||||
messages=messages, model=model, is_async=cast(Literal[False], False)
|
||||
)
|
||||
# For sync case, parent_result is already the transformed messages
|
||||
return self._transform_tool_message_content(parent_result)
|
||||
|
||||
def _get_openai_compatible_provider_info(
|
||||
self, api_base: Optional[str], api_key: Optional[str]
|
||||
) -> Tuple[Optional[str], Optional[str]]:
|
||||
# deepinfra is openai compatible, we just need to set this to custom_openai and have the api_base be https://api.endpoints.anyscale.com/v1
|
||||
api_base = (
|
||||
api_base
|
||||
or get_secret_str("DEEPINFRA_API_BASE")
|
||||
or "https://api.deepinfra.com/v1/openai"
|
||||
)
|
||||
dynamic_api_key = api_key or get_secret_str("DEEPINFRA_API_KEY")
|
||||
return api_base, dynamic_api_key
|
||||
@@ -0,0 +1,245 @@
|
||||
"""
|
||||
Translate between Cohere's `/rerank` format and Deepinfra's `/rerank` format.
|
||||
"""
|
||||
|
||||
from typing import Any, Dict, List, Optional, Union
|
||||
|
||||
import httpx
|
||||
|
||||
from litellm._uuid import uuid
|
||||
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
|
||||
from litellm.llms.base_llm.rerank.transformation import (
|
||||
BaseLLMException,
|
||||
BaseRerankConfig,
|
||||
)
|
||||
from litellm.secret_managers.main import get_secret_str
|
||||
from litellm.types.rerank import (
|
||||
OptionalRerankParams,
|
||||
RerankBilledUnits,
|
||||
RerankResponse,
|
||||
RerankResponseMeta,
|
||||
RerankResponseResult,
|
||||
RerankTokens,
|
||||
)
|
||||
|
||||
|
||||
class DeepinfraRerankConfig(BaseRerankConfig):
|
||||
"""
|
||||
Deepinfra Rerank - Follows the same Spec as Cohere Rerank
|
||||
"""
|
||||
|
||||
def get_complete_url(
|
||||
self,
|
||||
api_base: Optional[str],
|
||||
model: str,
|
||||
optional_params: Optional[dict] = None,
|
||||
) -> str:
|
||||
"""
|
||||
Constructs the complete DeepInfra inference endpoint URL for rerank.
|
||||
|
||||
Args:
|
||||
api_base (Optional[str]): The base URL for the DeepInfra API.
|
||||
model (str): The model identifier.
|
||||
|
||||
Returns:
|
||||
str: The complete URL for the DeepInfra rerank inference endpoint.
|
||||
|
||||
Raises:
|
||||
ValueError: If api_base is None.
|
||||
"""
|
||||
if not api_base:
|
||||
raise ValueError(
|
||||
"Deepinfra API Base is required. api_base=None. Set in call or via `DEEPINFRA_API_BASE` env var."
|
||||
)
|
||||
|
||||
# Remove 'openai' from the base if present
|
||||
api_base_clean = (
|
||||
api_base.replace("openai", "") if "openai" in api_base else api_base
|
||||
)
|
||||
|
||||
# Remove any trailing slashes for consistency, then add one
|
||||
api_base_clean = api_base_clean.rstrip("/") + "/"
|
||||
|
||||
# Compose the full endpoint
|
||||
return f"{api_base_clean}inference/{model}"
|
||||
|
||||
def validate_environment(
|
||||
self,
|
||||
headers: dict,
|
||||
model: str,
|
||||
api_key: Optional[str] = None,
|
||||
optional_params: Optional[dict] = None,
|
||||
) -> dict:
|
||||
if api_key is None:
|
||||
api_key = get_secret_str("DEEPINFRA_API_KEY")
|
||||
|
||||
if api_key is None:
|
||||
raise ValueError(
|
||||
"Deepinfra API key is required. Please set 'DEEPINFRA_API_KEY' environment variable"
|
||||
)
|
||||
|
||||
default_headers = {
|
||||
"Authorization": f"Bearer {api_key}",
|
||||
"accept": "application/json",
|
||||
"content-type": "application/json",
|
||||
}
|
||||
|
||||
# If 'Authorization' is provided in headers, it overrides the default.
|
||||
if "Authorization" in headers:
|
||||
default_headers["Authorization"] = headers["Authorization"]
|
||||
|
||||
# Merge other headers, overriding any default ones except Authorization
|
||||
return {**default_headers, **headers}
|
||||
|
||||
def map_cohere_rerank_params(
|
||||
self,
|
||||
non_default_params: dict,
|
||||
model: str,
|
||||
drop_params: bool,
|
||||
query: str,
|
||||
documents: List[Union[str, Dict[str, Any]]],
|
||||
custom_llm_provider: Optional[str] = None,
|
||||
top_n: Optional[int] = None,
|
||||
rank_fields: Optional[List[str]] = None,
|
||||
return_documents: Optional[bool] = True,
|
||||
max_chunks_per_doc: Optional[int] = None,
|
||||
max_tokens_per_doc: Optional[int] = None,
|
||||
) -> Dict:
|
||||
# Start with the basic parameters
|
||||
optional_rerank_params = {}
|
||||
if query:
|
||||
optional_rerank_params["queries"] = [query] * len(
|
||||
documents
|
||||
) # Deepinfra rerank requires queries to be of same length as documents
|
||||
|
||||
if non_default_params is not None:
|
||||
for k, v in non_default_params.items():
|
||||
if k == "queries" and v is not None:
|
||||
# This should override the query parameter if it is provided
|
||||
optional_rerank_params["queries"] = v
|
||||
elif k == "documents" and v is not None:
|
||||
optional_rerank_params["documents"] = v
|
||||
elif k == "service_tier" and v is not None:
|
||||
optional_rerank_params["service_tier"] = v
|
||||
elif k == "instruction" and v is not None:
|
||||
optional_rerank_params["instruction"] = v
|
||||
elif k == "webhook" and v is not None:
|
||||
optional_rerank_params["webhook"] = v
|
||||
return OptionalRerankParams(**optional_rerank_params) # type: ignore
|
||||
|
||||
def transform_rerank_request(
|
||||
self,
|
||||
model: str,
|
||||
optional_rerank_params: Dict,
|
||||
headers: dict,
|
||||
) -> dict:
|
||||
# Convert OptionalRerankParams to dict as expected by parent class
|
||||
if optional_rerank_params is None:
|
||||
return {}
|
||||
return dict(optional_rerank_params)
|
||||
|
||||
def transform_rerank_response(
|
||||
self,
|
||||
model: str,
|
||||
raw_response: httpx.Response,
|
||||
model_response: RerankResponse,
|
||||
logging_obj: LiteLLMLoggingObj,
|
||||
api_key: Optional[str] = None,
|
||||
request_data: dict = {},
|
||||
optional_params: dict = {},
|
||||
litellm_params: dict = {},
|
||||
) -> RerankResponse:
|
||||
try:
|
||||
response_json = raw_response.json()
|
||||
logging_obj.post_call(original_response=raw_response.text)
|
||||
|
||||
# Extract the scores from the response
|
||||
scores = response_json.get("scores", [])
|
||||
input_tokens = response_json.get("input_tokens", 0)
|
||||
request_id = response_json.get("request_id")
|
||||
|
||||
# Create inference status information
|
||||
inference_status = response_json.get("inference_status", {})
|
||||
status = inference_status.get("status", "unknown")
|
||||
runtime_ms = inference_status.get("runtime_ms", 0)
|
||||
cost = inference_status.get("cost", 0.0)
|
||||
tokens_generated = inference_status.get("tokens_generated", 0)
|
||||
tokens_input = inference_status.get("tokens_input", 0)
|
||||
|
||||
# Create RerankResponse
|
||||
results = []
|
||||
for i, score in enumerate(scores):
|
||||
results.append(
|
||||
RerankResponseResult(index=i, relevance_score=float(score))
|
||||
)
|
||||
|
||||
# Create metadata for the response
|
||||
tokens = RerankTokens(
|
||||
input_tokens=input_tokens,
|
||||
output_tokens=0, # DeepInfra doesn't provide output tokens for rerank
|
||||
)
|
||||
billed_units = RerankBilledUnits(total_tokens=input_tokens)
|
||||
meta = RerankResponseMeta(tokens=tokens, billed_units=billed_units)
|
||||
|
||||
rerank_response = RerankResponse(
|
||||
id=request_id or str(uuid.uuid4()), results=results, meta=meta
|
||||
)
|
||||
|
||||
# Store additional information in hidden params
|
||||
rerank_response._hidden_params = {
|
||||
"status": status,
|
||||
"runtime_ms": runtime_ms,
|
||||
"cost": cost,
|
||||
"tokens_generated": tokens_generated,
|
||||
"tokens_input": tokens_input,
|
||||
"model": model,
|
||||
}
|
||||
|
||||
return rerank_response
|
||||
|
||||
except Exception:
|
||||
# If there's an error parsing the response, fall back to the parent implementation
|
||||
rerank_response = super().transform_rerank_response(
|
||||
model=model,
|
||||
raw_response=raw_response,
|
||||
model_response=model_response,
|
||||
logging_obj=logging_obj,
|
||||
api_key=api_key,
|
||||
request_data=request_data,
|
||||
optional_params=optional_params,
|
||||
litellm_params=litellm_params,
|
||||
)
|
||||
|
||||
rerank_response._hidden_params["model"] = model
|
||||
return rerank_response
|
||||
|
||||
def get_supported_cohere_rerank_params(self, model: str) -> list:
|
||||
return ["query", "documents"]
|
||||
|
||||
def get_error_class(
|
||||
self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers]
|
||||
) -> BaseLLMException:
|
||||
# Deepinfra errors may come as JSON: {"detail": {"error": "..."}}
|
||||
import json
|
||||
|
||||
# Try to extract a more specific error message if possible
|
||||
try:
|
||||
error_data = error_message
|
||||
if isinstance(error_message, str):
|
||||
error_data = json.loads(error_message)
|
||||
if isinstance(error_data, dict):
|
||||
# Check for {"detail": {"error": "..."}}
|
||||
detail = error_data.get("detail")
|
||||
if isinstance(detail, dict) and "error" in detail:
|
||||
error_message = detail["error"]
|
||||
elif isinstance(detail, str):
|
||||
error_message = detail
|
||||
except Exception:
|
||||
# If parsing fails, just use the original error_message
|
||||
pass
|
||||
|
||||
raise BaseLLMException(
|
||||
status_code=status_code,
|
||||
message=error_message,
|
||||
headers=headers,
|
||||
)
|
||||
Reference in New Issue
Block a user