chore: initial public snapshot for github upload
This commit is contained in:
@@ -0,0 +1,19 @@
|
||||
"""
|
||||
OpenAI Responses API token counting implementation.
|
||||
"""
|
||||
|
||||
from litellm.llms.openai.responses.count_tokens.handler import (
|
||||
OpenAICountTokensHandler,
|
||||
)
|
||||
from litellm.llms.openai.responses.count_tokens.token_counter import (
|
||||
OpenAITokenCounter,
|
||||
)
|
||||
from litellm.llms.openai.responses.count_tokens.transformation import (
|
||||
OpenAICountTokensConfig,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"OpenAICountTokensHandler",
|
||||
"OpenAICountTokensConfig",
|
||||
"OpenAITokenCounter",
|
||||
]
|
||||
@@ -0,0 +1,107 @@
|
||||
"""
|
||||
OpenAI Responses API token counting handler.
|
||||
|
||||
Uses httpx for HTTP requests to OpenAI's /v1/responses/input_tokens endpoint.
|
||||
"""
|
||||
|
||||
import json
|
||||
from typing import Any, Dict, List, Optional, Union
|
||||
|
||||
import httpx
|
||||
|
||||
import litellm
|
||||
from litellm._logging import verbose_logger
|
||||
from litellm.llms.custom_httpx.http_handler import get_async_httpx_client
|
||||
from litellm.llms.openai.common_utils import OpenAIError
|
||||
from litellm.llms.openai.responses.count_tokens.transformation import (
|
||||
OpenAICountTokensConfig,
|
||||
)
|
||||
|
||||
|
||||
class OpenAICountTokensHandler(OpenAICountTokensConfig):
|
||||
"""
|
||||
Handler for OpenAI Responses API token counting requests.
|
||||
"""
|
||||
|
||||
async def handle_count_tokens_request(
|
||||
self,
|
||||
model: str,
|
||||
input: Union[str, List[Any]],
|
||||
api_key: str,
|
||||
api_base: Optional[str] = None,
|
||||
timeout: Optional[Union[float, httpx.Timeout]] = None,
|
||||
tools: Optional[List[Dict[str, Any]]] = None,
|
||||
instructions: Optional[str] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Handle a token counting request to OpenAI's Responses API.
|
||||
|
||||
Returns:
|
||||
Dictionary containing {"input_tokens": <number>}
|
||||
|
||||
Raises:
|
||||
OpenAIError: If the API request fails
|
||||
"""
|
||||
try:
|
||||
self.validate_request(model, input)
|
||||
|
||||
verbose_logger.debug(
|
||||
f"Processing OpenAI CountTokens request for model: {model}"
|
||||
)
|
||||
|
||||
request_body = self.transform_request_to_count_tokens(
|
||||
model=model,
|
||||
input=input,
|
||||
tools=tools,
|
||||
instructions=instructions,
|
||||
)
|
||||
|
||||
endpoint_url = self.get_openai_count_tokens_endpoint(api_base)
|
||||
|
||||
verbose_logger.debug(f"Making request to: {endpoint_url}")
|
||||
|
||||
headers = self.get_required_headers(api_key)
|
||||
|
||||
async_client = get_async_httpx_client(
|
||||
llm_provider=litellm.LlmProviders.OPENAI
|
||||
)
|
||||
|
||||
request_timeout = (
|
||||
timeout if timeout is not None else litellm.request_timeout
|
||||
)
|
||||
|
||||
response = await async_client.post(
|
||||
endpoint_url,
|
||||
headers=headers,
|
||||
json=request_body,
|
||||
timeout=request_timeout,
|
||||
)
|
||||
|
||||
verbose_logger.debug(f"Response status: {response.status_code}")
|
||||
|
||||
if response.status_code != 200:
|
||||
error_text = response.text
|
||||
verbose_logger.error(f"OpenAI API error: {error_text}")
|
||||
raise OpenAIError(
|
||||
status_code=response.status_code,
|
||||
message=error_text,
|
||||
)
|
||||
|
||||
openai_response = response.json()
|
||||
verbose_logger.debug(f"OpenAI response: {openai_response}")
|
||||
return openai_response
|
||||
|
||||
except OpenAIError:
|
||||
raise
|
||||
except httpx.HTTPStatusError as e:
|
||||
verbose_logger.error(f"HTTP error in CountTokens handler: {str(e)}")
|
||||
raise OpenAIError(
|
||||
status_code=e.response.status_code,
|
||||
message=e.response.text,
|
||||
)
|
||||
except (httpx.RequestError, json.JSONDecodeError, ValueError) as e:
|
||||
verbose_logger.error(f"Error in CountTokens handler: {str(e)}")
|
||||
raise OpenAIError(
|
||||
status_code=500,
|
||||
message=f"CountTokens processing error: {str(e)}",
|
||||
)
|
||||
@@ -0,0 +1,118 @@
|
||||
"""
|
||||
OpenAI Token Counter implementation using the Responses API /input_tokens endpoint.
|
||||
"""
|
||||
|
||||
import os
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from litellm._logging import verbose_logger
|
||||
from litellm.llms.base_llm.base_utils import BaseTokenCounter
|
||||
from litellm.llms.openai.common_utils import OpenAIError
|
||||
from litellm.llms.openai.responses.count_tokens.handler import (
|
||||
OpenAICountTokensHandler,
|
||||
)
|
||||
from litellm.llms.openai.responses.count_tokens.transformation import (
|
||||
OpenAICountTokensConfig,
|
||||
)
|
||||
from litellm.types.utils import LlmProviders, TokenCountResponse
|
||||
|
||||
# Global handler instance - reuse across all token counting requests
|
||||
openai_count_tokens_handler = OpenAICountTokensHandler()
|
||||
|
||||
|
||||
class OpenAITokenCounter(BaseTokenCounter):
|
||||
"""Token counter implementation for OpenAI provider using the Responses API."""
|
||||
|
||||
def should_use_token_counting_api(
|
||||
self,
|
||||
custom_llm_provider: Optional[str] = None,
|
||||
) -> bool:
|
||||
return custom_llm_provider == LlmProviders.OPENAI.value
|
||||
|
||||
async def count_tokens(
|
||||
self,
|
||||
model_to_use: str,
|
||||
messages: Optional[List[Dict[str, Any]]],
|
||||
contents: Optional[List[Dict[str, Any]]],
|
||||
deployment: Optional[Dict[str, Any]] = None,
|
||||
request_model: str = "",
|
||||
tools: Optional[List[Dict[str, Any]]] = None,
|
||||
system: Optional[Any] = None,
|
||||
) -> Optional[TokenCountResponse]:
|
||||
"""
|
||||
Count tokens using OpenAI's Responses API /input_tokens endpoint.
|
||||
"""
|
||||
if not messages:
|
||||
return None
|
||||
|
||||
deployment = deployment or {}
|
||||
litellm_params = deployment.get("litellm_params", {})
|
||||
|
||||
# Get OpenAI API key from deployment config or environment
|
||||
api_key = litellm_params.get("api_key")
|
||||
if not api_key:
|
||||
api_key = os.getenv("OPENAI_API_KEY")
|
||||
|
||||
if not api_key:
|
||||
verbose_logger.warning("No OpenAI API key found for token counting")
|
||||
return None
|
||||
|
||||
api_base = litellm_params.get("api_base")
|
||||
|
||||
# Convert chat messages to Responses API input format
|
||||
input_items, instructions = OpenAICountTokensConfig.messages_to_responses_input(
|
||||
messages
|
||||
)
|
||||
|
||||
# Use system param if instructions not extracted from messages
|
||||
if instructions is None and system is not None:
|
||||
instructions = system if isinstance(system, str) else str(system)
|
||||
|
||||
# If no input items were produced (e.g., system-only messages), fall back to local counting
|
||||
if not input_items:
|
||||
return None
|
||||
|
||||
try:
|
||||
result = await openai_count_tokens_handler.handle_count_tokens_request(
|
||||
model=model_to_use,
|
||||
input=input_items if input_items is not None else [],
|
||||
api_key=api_key,
|
||||
api_base=api_base,
|
||||
tools=tools,
|
||||
instructions=instructions,
|
||||
)
|
||||
|
||||
if result is not None:
|
||||
return TokenCountResponse(
|
||||
total_tokens=result.get("input_tokens", 0),
|
||||
request_model=request_model,
|
||||
model_used=model_to_use,
|
||||
tokenizer_type="openai_api",
|
||||
original_response=result,
|
||||
)
|
||||
except OpenAIError as e:
|
||||
verbose_logger.warning(
|
||||
f"OpenAI CountTokens API error: status={e.status_code}, message={e.message}"
|
||||
)
|
||||
return TokenCountResponse(
|
||||
total_tokens=0,
|
||||
request_model=request_model,
|
||||
model_used=model_to_use,
|
||||
tokenizer_type="openai_api",
|
||||
error=True,
|
||||
error_message=e.message,
|
||||
status_code=e.status_code,
|
||||
)
|
||||
except Exception as e:
|
||||
verbose_logger.warning(f"Error calling OpenAI CountTokens API: {e}")
|
||||
return TokenCountResponse(
|
||||
total_tokens=0,
|
||||
request_model=request_model,
|
||||
model_used=model_to_use,
|
||||
tokenizer_type="openai_api",
|
||||
error=True,
|
||||
error_message=str(e),
|
||||
status_code=500,
|
||||
)
|
||||
|
||||
return None
|
||||
@@ -0,0 +1,160 @@
|
||||
"""
|
||||
OpenAI Responses API token counting transformation logic.
|
||||
|
||||
This module handles the transformation of requests to OpenAI's /v1/responses/input_tokens endpoint.
|
||||
"""
|
||||
|
||||
from typing import Any, Dict, List, Optional, Union
|
||||
|
||||
|
||||
class OpenAICountTokensConfig:
|
||||
"""
|
||||
Configuration and transformation logic for OpenAI Responses API token counting.
|
||||
|
||||
OpenAI Responses API Token Counting Specification:
|
||||
- Endpoint: POST https://api.openai.com/v1/responses/input_tokens
|
||||
- Response: {"input_tokens": <number>}
|
||||
"""
|
||||
|
||||
def get_openai_count_tokens_endpoint(self, api_base: Optional[str] = None) -> str:
|
||||
base = api_base or "https://api.openai.com/v1"
|
||||
base = base.rstrip("/")
|
||||
return f"{base}/responses/input_tokens"
|
||||
|
||||
def transform_request_to_count_tokens(
|
||||
self,
|
||||
model: str,
|
||||
input: Union[str, List[Any]],
|
||||
tools: Optional[List[Dict[str, Any]]] = None,
|
||||
instructions: Optional[str] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Transform request to OpenAI Responses API token counting format.
|
||||
|
||||
The Responses API uses `input` (not `messages`) and `instructions` (not `system`).
|
||||
"""
|
||||
request: Dict[str, Any] = {
|
||||
"model": model,
|
||||
"input": input,
|
||||
}
|
||||
|
||||
if instructions is not None:
|
||||
request["instructions"] = instructions
|
||||
|
||||
if tools is not None:
|
||||
request["tools"] = self._transform_tools_for_responses_api(tools)
|
||||
|
||||
return request
|
||||
|
||||
def get_required_headers(self, api_key: str) -> Dict[str, str]:
|
||||
return {
|
||||
"Content-Type": "application/json",
|
||||
"Authorization": f"Bearer {api_key}",
|
||||
}
|
||||
|
||||
def validate_request(self, model: str, input: Union[str, List[Any]]) -> None:
|
||||
if not model:
|
||||
raise ValueError("model parameter is required")
|
||||
|
||||
if not input:
|
||||
raise ValueError("input parameter is required")
|
||||
|
||||
@staticmethod
|
||||
def _transform_tools_for_responses_api(
|
||||
tools: List[Dict[str, Any]],
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Transform OpenAI chat tools format to Responses API tools format.
|
||||
|
||||
Chat format: {"type": "function", "function": {"name": "...", "parameters": {...}}}
|
||||
Responses format: {"type": "function", "name": "...", "parameters": {...}}
|
||||
"""
|
||||
transformed = []
|
||||
for tool in tools:
|
||||
if tool.get("type") == "function" and "function" in tool:
|
||||
func = tool["function"]
|
||||
item: Dict[str, Any] = {
|
||||
"type": "function",
|
||||
"name": func.get("name", ""),
|
||||
"description": func.get("description", ""),
|
||||
"parameters": func.get("parameters", {}),
|
||||
}
|
||||
if "strict" in func:
|
||||
item["strict"] = func["strict"]
|
||||
transformed.append(item)
|
||||
else:
|
||||
# Pass through non-function tools (e.g., web_search, file_search)
|
||||
transformed.append(tool)
|
||||
return transformed
|
||||
|
||||
@staticmethod
|
||||
def messages_to_responses_input(
|
||||
messages: List[Dict[str, Any]],
|
||||
) -> tuple:
|
||||
"""
|
||||
Convert standard chat messages format to OpenAI Responses API input format.
|
||||
|
||||
Returns:
|
||||
(input_items, instructions) tuple where instructions is extracted
|
||||
from system/developer messages.
|
||||
"""
|
||||
input_items: List[Dict[str, Any]] = []
|
||||
instructions_parts: List[str] = []
|
||||
|
||||
for msg in messages:
|
||||
role = msg.get("role", "")
|
||||
content = msg.get("content") or ""
|
||||
|
||||
if role in ("system", "developer"):
|
||||
# Extract system/developer messages as instructions
|
||||
if isinstance(content, str):
|
||||
instructions_parts.append(content)
|
||||
elif isinstance(content, list):
|
||||
# Handle content blocks - extract text
|
||||
text_parts = []
|
||||
for block in content:
|
||||
if isinstance(block, dict) and block.get("type") == "text":
|
||||
text_parts.append(block.get("text", ""))
|
||||
elif isinstance(block, str):
|
||||
text_parts.append(block)
|
||||
instructions_parts.append("\n".join(text_parts))
|
||||
elif role == "user":
|
||||
if isinstance(content, list):
|
||||
# Extract text from content blocks for Responses API
|
||||
text_parts = []
|
||||
for block in content:
|
||||
if isinstance(block, dict) and block.get("type") == "text":
|
||||
text_parts.append(block.get("text", ""))
|
||||
elif isinstance(block, str):
|
||||
text_parts.append(block)
|
||||
content = "\n".join(text_parts)
|
||||
input_items.append({"role": "user", "content": content})
|
||||
elif role == "assistant":
|
||||
# Map tool_calls to Responses API function_call items
|
||||
tool_calls = msg.get("tool_calls")
|
||||
if content:
|
||||
input_items.append({"role": "assistant", "content": content})
|
||||
if tool_calls:
|
||||
for tc in tool_calls:
|
||||
func = tc.get("function", {})
|
||||
input_items.append(
|
||||
{
|
||||
"type": "function_call",
|
||||
"call_id": tc.get("id", ""),
|
||||
"name": func.get("name", ""),
|
||||
"arguments": func.get("arguments", ""),
|
||||
}
|
||||
)
|
||||
elif not content:
|
||||
input_items.append({"role": "assistant", "content": content})
|
||||
elif role == "tool":
|
||||
input_items.append(
|
||||
{
|
||||
"type": "function_call_output",
|
||||
"call_id": msg.get("tool_call_id", ""),
|
||||
"output": content if isinstance(content, str) else str(content),
|
||||
}
|
||||
)
|
||||
|
||||
instructions = "\n".join(instructions_parts) if instructions_parts else None
|
||||
return input_items, instructions
|
||||
Reference in New Issue
Block a user