59 lines
1.8 KiB
Python
59 lines
1.8 KiB
Python
from typing import List, Optional, Tuple
|
|
|
|
from litellm.secret_managers.main import get_secret_str
|
|
from litellm.types.llms.openai import AllMessageValues, ChatCompletionToolParam
|
|
|
|
from ...openai.chat.gpt_transformation import OpenAIGPTConfig
|
|
|
|
ZAI_API_BASE = "https://api.z.ai/api/paas/v4"
|
|
|
|
|
|
class ZAIChatConfig(OpenAIGPTConfig):
|
|
@property
|
|
def custom_llm_provider(self) -> Optional[str]:
|
|
return "zai"
|
|
|
|
def _get_openai_compatible_provider_info(
|
|
self, api_base: Optional[str], api_key: Optional[str]
|
|
) -> Tuple[Optional[str], Optional[str]]:
|
|
api_base = api_base or get_secret_str("ZAI_API_BASE") or ZAI_API_BASE
|
|
dynamic_api_key = api_key or get_secret_str("ZAI_API_KEY")
|
|
return api_base, dynamic_api_key
|
|
|
|
def remove_cache_control_flag_from_messages_and_tools(
|
|
self,
|
|
model: str,
|
|
messages: List[AllMessageValues],
|
|
tools: Optional[List[ChatCompletionToolParam]] = None,
|
|
) -> Tuple[List[AllMessageValues], Optional[List[ChatCompletionToolParam]]]:
|
|
"""
|
|
Override to preserve cache_control for GLM/ZAI.
|
|
GLM supports cache_control - don't strip it.
|
|
"""
|
|
# GLM/ZAI supports cache_control, so return messages and tools unchanged
|
|
return messages, tools
|
|
|
|
def get_supported_openai_params(self, model: str) -> list:
|
|
base_params = [
|
|
"max_tokens",
|
|
"stream",
|
|
"stream_options",
|
|
"temperature",
|
|
"top_p",
|
|
"stop",
|
|
"tools",
|
|
"tool_choice",
|
|
]
|
|
|
|
import litellm
|
|
|
|
try:
|
|
if litellm.supports_reasoning(
|
|
model=model, custom_llm_provider=self.custom_llm_provider
|
|
):
|
|
base_params.append("thinking")
|
|
except Exception:
|
|
pass
|
|
|
|
return base_params
|