chore: initial snapshot for gitea/github upload

This commit is contained in:
Your Name
2026-03-26 16:04:46 +08:00
commit a699a1ac98
3497 changed files with 1586237 additions and 0 deletions

View File

@@ -0,0 +1,19 @@
"""
OpenAI Responses API token counting implementation.
"""
from litellm.llms.openai.responses.count_tokens.handler import (
OpenAICountTokensHandler,
)
from litellm.llms.openai.responses.count_tokens.token_counter import (
OpenAITokenCounter,
)
from litellm.llms.openai.responses.count_tokens.transformation import (
OpenAICountTokensConfig,
)
__all__ = [
"OpenAICountTokensHandler",
"OpenAICountTokensConfig",
"OpenAITokenCounter",
]

View File

@@ -0,0 +1,107 @@
"""
OpenAI Responses API token counting handler.
Uses httpx for HTTP requests to OpenAI's /v1/responses/input_tokens endpoint.
"""
import json
from typing import Any, Dict, List, Optional, Union
import httpx
import litellm
from litellm._logging import verbose_logger
from litellm.llms.custom_httpx.http_handler import get_async_httpx_client
from litellm.llms.openai.common_utils import OpenAIError
from litellm.llms.openai.responses.count_tokens.transformation import (
OpenAICountTokensConfig,
)
class OpenAICountTokensHandler(OpenAICountTokensConfig):
"""
Handler for OpenAI Responses API token counting requests.
"""
async def handle_count_tokens_request(
self,
model: str,
input: Union[str, List[Any]],
api_key: str,
api_base: Optional[str] = None,
timeout: Optional[Union[float, httpx.Timeout]] = None,
tools: Optional[List[Dict[str, Any]]] = None,
instructions: Optional[str] = None,
) -> Dict[str, Any]:
"""
Handle a token counting request to OpenAI's Responses API.
Returns:
Dictionary containing {"input_tokens": <number>}
Raises:
OpenAIError: If the API request fails
"""
try:
self.validate_request(model, input)
verbose_logger.debug(
f"Processing OpenAI CountTokens request for model: {model}"
)
request_body = self.transform_request_to_count_tokens(
model=model,
input=input,
tools=tools,
instructions=instructions,
)
endpoint_url = self.get_openai_count_tokens_endpoint(api_base)
verbose_logger.debug(f"Making request to: {endpoint_url}")
headers = self.get_required_headers(api_key)
async_client = get_async_httpx_client(
llm_provider=litellm.LlmProviders.OPENAI
)
request_timeout = (
timeout if timeout is not None else litellm.request_timeout
)
response = await async_client.post(
endpoint_url,
headers=headers,
json=request_body,
timeout=request_timeout,
)
verbose_logger.debug(f"Response status: {response.status_code}")
if response.status_code != 200:
error_text = response.text
verbose_logger.error(f"OpenAI API error: {error_text}")
raise OpenAIError(
status_code=response.status_code,
message=error_text,
)
openai_response = response.json()
verbose_logger.debug(f"OpenAI response: {openai_response}")
return openai_response
except OpenAIError:
raise
except httpx.HTTPStatusError as e:
verbose_logger.error(f"HTTP error in CountTokens handler: {str(e)}")
raise OpenAIError(
status_code=e.response.status_code,
message=e.response.text,
)
except (httpx.RequestError, json.JSONDecodeError, ValueError) as e:
verbose_logger.error(f"Error in CountTokens handler: {str(e)}")
raise OpenAIError(
status_code=500,
message=f"CountTokens processing error: {str(e)}",
)

View File

@@ -0,0 +1,118 @@
"""
OpenAI Token Counter implementation using the Responses API /input_tokens endpoint.
"""
import os
from typing import Any, Dict, List, Optional
from litellm._logging import verbose_logger
from litellm.llms.base_llm.base_utils import BaseTokenCounter
from litellm.llms.openai.common_utils import OpenAIError
from litellm.llms.openai.responses.count_tokens.handler import (
OpenAICountTokensHandler,
)
from litellm.llms.openai.responses.count_tokens.transformation import (
OpenAICountTokensConfig,
)
from litellm.types.utils import LlmProviders, TokenCountResponse
# Global handler instance - reuse across all token counting requests
openai_count_tokens_handler = OpenAICountTokensHandler()
class OpenAITokenCounter(BaseTokenCounter):
"""Token counter implementation for OpenAI provider using the Responses API."""
def should_use_token_counting_api(
self,
custom_llm_provider: Optional[str] = None,
) -> bool:
return custom_llm_provider == LlmProviders.OPENAI.value
async def count_tokens(
self,
model_to_use: str,
messages: Optional[List[Dict[str, Any]]],
contents: Optional[List[Dict[str, Any]]],
deployment: Optional[Dict[str, Any]] = None,
request_model: str = "",
tools: Optional[List[Dict[str, Any]]] = None,
system: Optional[Any] = None,
) -> Optional[TokenCountResponse]:
"""
Count tokens using OpenAI's Responses API /input_tokens endpoint.
"""
if not messages:
return None
deployment = deployment or {}
litellm_params = deployment.get("litellm_params", {})
# Get OpenAI API key from deployment config or environment
api_key = litellm_params.get("api_key")
if not api_key:
api_key = os.getenv("OPENAI_API_KEY")
if not api_key:
verbose_logger.warning("No OpenAI API key found for token counting")
return None
api_base = litellm_params.get("api_base")
# Convert chat messages to Responses API input format
input_items, instructions = OpenAICountTokensConfig.messages_to_responses_input(
messages
)
# Use system param if instructions not extracted from messages
if instructions is None and system is not None:
instructions = system if isinstance(system, str) else str(system)
# If no input items were produced (e.g., system-only messages), fall back to local counting
if not input_items:
return None
try:
result = await openai_count_tokens_handler.handle_count_tokens_request(
model=model_to_use,
input=input_items if input_items is not None else [],
api_key=api_key,
api_base=api_base,
tools=tools,
instructions=instructions,
)
if result is not None:
return TokenCountResponse(
total_tokens=result.get("input_tokens", 0),
request_model=request_model,
model_used=model_to_use,
tokenizer_type="openai_api",
original_response=result,
)
except OpenAIError as e:
verbose_logger.warning(
f"OpenAI CountTokens API error: status={e.status_code}, message={e.message}"
)
return TokenCountResponse(
total_tokens=0,
request_model=request_model,
model_used=model_to_use,
tokenizer_type="openai_api",
error=True,
error_message=e.message,
status_code=e.status_code,
)
except Exception as e:
verbose_logger.warning(f"Error calling OpenAI CountTokens API: {e}")
return TokenCountResponse(
total_tokens=0,
request_model=request_model,
model_used=model_to_use,
tokenizer_type="openai_api",
error=True,
error_message=str(e),
status_code=500,
)
return None

View File

@@ -0,0 +1,160 @@
"""
OpenAI Responses API token counting transformation logic.
This module handles the transformation of requests to OpenAI's /v1/responses/input_tokens endpoint.
"""
from typing import Any, Dict, List, Optional, Union
class OpenAICountTokensConfig:
"""
Configuration and transformation logic for OpenAI Responses API token counting.
OpenAI Responses API Token Counting Specification:
- Endpoint: POST https://api.openai.com/v1/responses/input_tokens
- Response: {"input_tokens": <number>}
"""
def get_openai_count_tokens_endpoint(self, api_base: Optional[str] = None) -> str:
base = api_base or "https://api.openai.com/v1"
base = base.rstrip("/")
return f"{base}/responses/input_tokens"
def transform_request_to_count_tokens(
self,
model: str,
input: Union[str, List[Any]],
tools: Optional[List[Dict[str, Any]]] = None,
instructions: Optional[str] = None,
) -> Dict[str, Any]:
"""
Transform request to OpenAI Responses API token counting format.
The Responses API uses `input` (not `messages`) and `instructions` (not `system`).
"""
request: Dict[str, Any] = {
"model": model,
"input": input,
}
if instructions is not None:
request["instructions"] = instructions
if tools is not None:
request["tools"] = self._transform_tools_for_responses_api(tools)
return request
def get_required_headers(self, api_key: str) -> Dict[str, str]:
return {
"Content-Type": "application/json",
"Authorization": f"Bearer {api_key}",
}
def validate_request(self, model: str, input: Union[str, List[Any]]) -> None:
if not model:
raise ValueError("model parameter is required")
if not input:
raise ValueError("input parameter is required")
@staticmethod
def _transform_tools_for_responses_api(
tools: List[Dict[str, Any]],
) -> List[Dict[str, Any]]:
"""
Transform OpenAI chat tools format to Responses API tools format.
Chat format: {"type": "function", "function": {"name": "...", "parameters": {...}}}
Responses format: {"type": "function", "name": "...", "parameters": {...}}
"""
transformed = []
for tool in tools:
if tool.get("type") == "function" and "function" in tool:
func = tool["function"]
item: Dict[str, Any] = {
"type": "function",
"name": func.get("name", ""),
"description": func.get("description", ""),
"parameters": func.get("parameters", {}),
}
if "strict" in func:
item["strict"] = func["strict"]
transformed.append(item)
else:
# Pass through non-function tools (e.g., web_search, file_search)
transformed.append(tool)
return transformed
@staticmethod
def messages_to_responses_input(
messages: List[Dict[str, Any]],
) -> tuple:
"""
Convert standard chat messages format to OpenAI Responses API input format.
Returns:
(input_items, instructions) tuple where instructions is extracted
from system/developer messages.
"""
input_items: List[Dict[str, Any]] = []
instructions_parts: List[str] = []
for msg in messages:
role = msg.get("role", "")
content = msg.get("content") or ""
if role in ("system", "developer"):
# Extract system/developer messages as instructions
if isinstance(content, str):
instructions_parts.append(content)
elif isinstance(content, list):
# Handle content blocks - extract text
text_parts = []
for block in content:
if isinstance(block, dict) and block.get("type") == "text":
text_parts.append(block.get("text", ""))
elif isinstance(block, str):
text_parts.append(block)
instructions_parts.append("\n".join(text_parts))
elif role == "user":
if isinstance(content, list):
# Extract text from content blocks for Responses API
text_parts = []
for block in content:
if isinstance(block, dict) and block.get("type") == "text":
text_parts.append(block.get("text", ""))
elif isinstance(block, str):
text_parts.append(block)
content = "\n".join(text_parts)
input_items.append({"role": "user", "content": content})
elif role == "assistant":
# Map tool_calls to Responses API function_call items
tool_calls = msg.get("tool_calls")
if content:
input_items.append({"role": "assistant", "content": content})
if tool_calls:
for tc in tool_calls:
func = tc.get("function", {})
input_items.append(
{
"type": "function_call",
"call_id": tc.get("id", ""),
"name": func.get("name", ""),
"arguments": func.get("arguments", ""),
}
)
elif not content:
input_items.append({"role": "assistant", "content": content})
elif role == "tool":
input_items.append(
{
"type": "function_call_output",
"call_id": msg.get("tool_call_id", ""),
"output": content if isinstance(content, str) else str(content),
}
)
instructions = "\n".join(instructions_parts) if instructions_parts else None
return input_items, instructions

View File

@@ -0,0 +1,119 @@
# OpenAI Responses API Guardrail Translation Handler
This module provides guardrail translation support for the OpenAI Responses API format.
## Overview
The `OpenAIResponsesHandler` class handles the translation of guardrail operations for both input and output of the Responses API. It follows the same pattern as the Chat Completions handler but is adapted for the Responses API's specific data structures.
## Responses API Format
### Input Format
The Responses API accepts input in two formats:
1. **String input**: Simple text string
```python
{"input": "Hello world", "model": "gpt-4"}
```
2. **List input**: Array of message objects (ResponseInputParam)
```python
{
"input": [
{
"role": "user",
"content": "Hello", # Can be string or list of content items
"type": "message"
}
],
"model": "gpt-4"
}
```
### Output Format
The Responses API returns a `ResponsesAPIResponse` object with:
```python
{
"id": "resp_123",
"output": [
{
"type": "message",
"id": "msg_123",
"status": "completed",
"role": "assistant",
"content": [
{
"type": "output_text",
"text": "Assistant response",
"annotations": []
}
]
}
]
}
```
## Usage
The handler is automatically discovered and registered for `CallTypes.responses` and `CallTypes.aresponses`.
### Example
```python
from litellm.llms import get_guardrail_translation_mapping
from litellm.types.utils import CallTypes
# Get the handler
handler_class = get_guardrail_translation_mapping(CallTypes.responses)
handler = handler_class()
# Process input
data = {"input": "User message", "model": "gpt-4"}
processed_data = await handler.process_input_messages(data, guardrail_instance)
# Process output
response = await litellm.aresponses(**processed_data)
processed_response = await handler.process_output_response(response, guardrail_instance)
```
## Key Methods
### `process_input_messages(data, guardrail_to_apply)`
Processes input data by:
1. Handling both string and list input formats
2. Extracting text content from messages
3. Applying guardrails to text content in parallel
4. Mapping guardrail responses back to the original structure
### `process_output_response(response, guardrail_to_apply)`
Processes output response by:
1. Extracting text from output items' content
2. Applying guardrails to all text content in parallel
3. Replacing original text with guardrailed versions
## Extending the Handler
The handler can be customized by overriding these methods:
- `_extract_input_text_and_create_tasks()`: Customize input text extraction logic
- `_apply_guardrail_responses_to_input()`: Customize how guardrail responses are applied to input
- `_extract_output_text_and_create_tasks()`: Customize output text extraction logic
- `_apply_guardrail_responses_to_output()`: Customize how guardrail responses are applied to output
- `_has_text_content()`: Customize text content detection
## Testing
Comprehensive tests are available in `tests/llm_translation/test_openai_responses_guardrail_handler.py`:
```bash
pytest tests/llm_translation/test_openai_responses_guardrail_handler.py -v
```
## Implementation Details
- **Parallel Processing**: All text content is processed in parallel using `asyncio.gather()`
- **Mapping Tracking**: Uses tuples to track the location of each text segment for accurate replacement
- **Type Safety**: Handles both Pydantic objects and dict representations
- **Multimodal Support**: Properly handles mixed content with text and other media types

View File

@@ -0,0 +1,12 @@
"""OpenAI Responses API handler for Unified Guardrails."""
from litellm.llms.openai.responses.guardrail_translation.handler import (
OpenAIResponsesHandler,
)
from litellm.types.utils import CallTypes
guardrail_translation_mappings = {
CallTypes.responses: OpenAIResponsesHandler,
CallTypes.aresponses: OpenAIResponsesHandler,
}
__all__ = ["guardrail_translation_mappings"]

View File

@@ -0,0 +1,760 @@
"""
OpenAI Responses API Handler for Unified Guardrails
This module provides a class-based handler for OpenAI Responses API format.
The class methods can be overridden for custom behavior.
Pattern Overview:
-----------------
1. Extract text content from input/output (both string and list formats)
2. Create async tasks to apply guardrails to each text segment
3. Track mappings to know where each response belongs
4. Apply guardrail responses back to the original structure
Responses API Format:
---------------------
Input: Union[str, List[Dict]] where each dict has:
- role: str
- content: Union[str, List[Dict]] (can have text items)
- type: str (e.g., "message")
Output: response.output is List[GenericResponseOutputItem] where each has:
- type: str (e.g., "message")
- id: str
- status: str
- role: str
- content: List[OutputText] where OutputText has:
- type: str (e.g., "output_text")
- text: str
"""
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union, cast
from openai.types.responses.response_function_tool_call import ResponseFunctionToolCall
from pydantic import BaseModel
from litellm._logging import verbose_proxy_logger
from litellm.completion_extras.litellm_responses_transformation.transformation import (
LiteLLMResponsesTransformationHandler,
OpenAiResponsesToChatCompletionStreamIterator,
)
from litellm.llms.base_llm.guardrail_translation.base_translation import BaseTranslation
from litellm.responses.litellm_completion_transformation.transformation import (
LiteLLMCompletionResponsesConfig,
)
from litellm.types.llms.openai import (
ChatCompletionToolCallChunk,
ChatCompletionToolParam,
)
from litellm.types.responses.main import (
GenericResponseOutputItem,
OutputFunctionToolCall,
OutputText,
)
from litellm.types.utils import GenericGuardrailAPIInputs
if TYPE_CHECKING:
from litellm.integrations.custom_guardrail import CustomGuardrail
from litellm.types.llms.openai import ResponseInputParam
from litellm.types.utils import ResponsesAPIResponse
class OpenAIResponsesHandler(BaseTranslation):
"""
Handler for processing OpenAI Responses API with guardrails.
This class provides methods to:
1. Process input (pre-call hook)
2. Process output response (post-call hook)
Methods can be overridden to customize behavior for different message formats.
"""
async def process_input_messages(
self,
data: dict,
guardrail_to_apply: "CustomGuardrail",
litellm_logging_obj: Optional[Any] = None,
) -> Any:
"""
Process input by applying guardrails to text content.
Handles both string input and list of message objects.
"""
input_data: Optional[Union[str, "ResponseInputParam"]] = data.get("input")
tools_to_check: List[ChatCompletionToolParam] = []
if input_data is None:
return data
structured_messages = (
LiteLLMCompletionResponsesConfig.transform_responses_api_input_to_messages(
input=input_data,
responses_api_request=data,
)
)
# Handle simple string input
if isinstance(input_data, str):
inputs = GenericGuardrailAPIInputs(texts=[input_data])
original_tools: List[Dict[str, Any]] = []
# Extract and transform tools if present
if "tools" in data and data["tools"]:
original_tools = list(data["tools"])
self._extract_and_transform_tools(data["tools"], tools_to_check)
if tools_to_check:
inputs["tools"] = tools_to_check
if structured_messages:
inputs["structured_messages"] = structured_messages # type: ignore
# Include model information if available
model = data.get("model")
if model:
inputs["model"] = model
guardrailed_inputs = await guardrail_to_apply.apply_guardrail(
inputs=inputs,
request_data=data,
input_type="request",
logging_obj=litellm_logging_obj,
)
guardrailed_texts = guardrailed_inputs.get("texts", [])
data["input"] = guardrailed_texts[0] if guardrailed_texts else input_data
self._apply_guardrailed_tools_to_data(
data, original_tools, guardrailed_inputs.get("tools")
)
verbose_proxy_logger.debug("OpenAI Responses API: Processed string input")
return data
# Handle list input (ResponseInputParam)
if not isinstance(input_data, list):
return data
texts_to_check: List[str] = []
images_to_check: List[str] = []
task_mappings: List[Tuple[int, Optional[int]]] = []
original_tools_list: List[Dict[str, Any]] = list(data.get("tools") or [])
# Step 1: Extract all text content, images, and tools
for msg_idx, message in enumerate(input_data):
self._extract_input_text_and_images(
message=message,
msg_idx=msg_idx,
texts_to_check=texts_to_check,
images_to_check=images_to_check,
task_mappings=task_mappings,
)
# Extract and transform tools if present
if "tools" in data and data["tools"]:
self._extract_and_transform_tools(data["tools"], tools_to_check)
# Step 2: Apply guardrail to all texts in batch
if texts_to_check:
inputs = GenericGuardrailAPIInputs(texts=texts_to_check)
if images_to_check:
inputs["images"] = images_to_check
if tools_to_check:
inputs["tools"] = tools_to_check
if structured_messages:
inputs["structured_messages"] = structured_messages # type: ignore
# Include model information if available
model = data.get("model")
if model:
inputs["model"] = model
guardrailed_inputs = await guardrail_to_apply.apply_guardrail(
inputs=inputs,
request_data=data,
input_type="request",
logging_obj=litellm_logging_obj,
)
guardrailed_texts = guardrailed_inputs.get("texts", [])
self._apply_guardrailed_tools_to_data(
data,
original_tools_list,
guardrailed_inputs.get("tools"),
)
# Step 3: Map guardrail responses back to original input structure
await self._apply_guardrail_responses_to_input(
messages=input_data,
responses=guardrailed_texts,
task_mappings=task_mappings,
)
verbose_proxy_logger.debug(
"OpenAI Responses API: Processed input messages: %s", input_data
)
return data
def extract_request_tool_names(self, data: dict) -> List[str]:
"""Extract tool names from Responses API request (tools[].name for function, tools[].server_label for mcp)."""
names: List[str] = []
for tool in data.get("tools") or []:
if not isinstance(tool, dict):
continue
if tool.get("type") == "function" and tool.get("name"):
names.append(str(tool["name"]))
elif tool.get("type") == "mcp" and tool.get("server_label"):
names.append(str(tool["server_label"]))
return names
def _extract_and_transform_tools(
self,
tools: List[Dict[str, Any]],
tools_to_check: List[ChatCompletionToolParam],
) -> None:
"""
Extract and transform tools from Responses API format to Chat Completion format.
Uses the LiteLLM transformation function to convert Responses API tools
to Chat Completion tools that can be passed to guardrails.
"""
if tools is not None and isinstance(tools, list):
# Transform Responses API tools to Chat Completion tools
(
transformed_tools,
_,
) = LiteLLMCompletionResponsesConfig.transform_responses_api_tools_to_chat_completion_tools(
tools # type: ignore
)
tools_to_check.extend(
cast(List[ChatCompletionToolParam], transformed_tools)
)
def _remap_tools_to_responses_api_format(
self, guardrailed_tools: List[Any]
) -> List[Dict[str, Any]]:
"""
Remap guardrail-returned tools (Chat Completion format) back to
Responses API request tool format.
"""
return LiteLLMCompletionResponsesConfig.transform_chat_completion_tool_params_to_responses_api_tools(
guardrailed_tools # type: ignore
)
def _merge_tools_after_guardrail(
self,
original_tools: List[Dict[str, Any]],
remapped: List[Dict[str, Any]],
) -> List[Dict[str, Any]]:
"""
Merge remapped guardrailed tools with original tools that were not sent
to the guardrail (e.g. web_search, web_search_preview), preserving order.
"""
if not original_tools:
return remapped
result: List[Dict[str, Any]] = []
j = 0
for tool in original_tools:
if isinstance(tool, dict) and tool.get("type") in (
"web_search",
"web_search_preview",
):
result.append(tool)
else:
if j < len(remapped):
result.append(remapped[j])
j += 1
return result
def _apply_guardrailed_tools_to_data(
self,
data: dict,
original_tools: List[Dict[str, Any]],
guardrailed_tools: Optional[List[Any]],
) -> None:
"""Remap guardrailed tools to Responses API format and merge with original, then set data['tools']."""
if guardrailed_tools is not None:
remapped = self._remap_tools_to_responses_api_format(guardrailed_tools)
data["tools"] = self._merge_tools_after_guardrail(original_tools, remapped)
def _extract_input_text_and_images(
self,
message: Any, # Can be Dict[str, Any] or ResponseInputParam
msg_idx: int,
texts_to_check: List[str],
images_to_check: List[str],
task_mappings: List[Tuple[int, Optional[int]]],
) -> None:
"""
Extract text content and images from an input message.
Override this method to customize text/image extraction logic.
"""
content = message.get("content", None)
if content is None:
return
if isinstance(content, str):
# Simple string content
texts_to_check.append(content)
task_mappings.append((msg_idx, None))
elif isinstance(content, list):
# List content (e.g., multimodal with text and images)
for content_idx, content_item in enumerate(content):
if isinstance(content_item, dict):
# Extract text
text_str = content_item.get("text", None)
if text_str is not None:
texts_to_check.append(text_str)
task_mappings.append((msg_idx, int(content_idx)))
# Extract images
if content_item.get("type") == "image_url":
image_url = content_item.get("image_url", {})
if isinstance(image_url, dict):
url = image_url.get("url")
if url:
images_to_check.append(url)
async def _apply_guardrail_responses_to_input(
self,
messages: Any, # Can be List[Dict[str, Any]] or ResponseInputParam
responses: List[str],
task_mappings: List[Tuple[int, Optional[int]]],
) -> None:
"""
Apply guardrail responses back to input messages.
Override this method to customize how responses are applied.
"""
for task_idx, guardrail_response in enumerate(responses):
mapping = task_mappings[task_idx]
msg_idx = cast(int, mapping[0])
content_idx_optional = cast(Optional[int], mapping[1])
content = messages[msg_idx].get("content", None)
if content is None:
continue
if isinstance(content, str) and content_idx_optional is None:
# Replace string content with guardrail response
messages[msg_idx]["content"] = guardrail_response
elif isinstance(content, list) and content_idx_optional is not None:
# Replace specific text item in list content
if isinstance(messages[msg_idx]["content"][content_idx_optional], dict):
messages[msg_idx]["content"][content_idx_optional][
"text"
] = guardrail_response
async def process_output_response(
self,
response: "ResponsesAPIResponse",
guardrail_to_apply: "CustomGuardrail",
litellm_logging_obj: Optional[Any] = None,
user_api_key_dict: Optional[Any] = None,
) -> Any:
"""
Process output response by applying guardrails to text content and tool calls.
Args:
response: LiteLLM ResponsesAPIResponse object
guardrail_to_apply: The guardrail instance to apply
litellm_logging_obj: Optional logging object
user_api_key_dict: User API key metadata to pass to guardrails
Returns:
Modified response with guardrail applied to content
Response Format Support:
- response.output is a list of output items
- Each output item can be:
* GenericResponseOutputItem with a content list of OutputText objects
* ResponseFunctionToolCall with tool call data
- Each OutputText object has a text field
"""
texts_to_check: List[str] = []
images_to_check: List[str] = []
tool_calls_to_check: List[ChatCompletionToolCallChunk] = []
task_mappings: List[Tuple[int, int]] = []
# Track (output_item_index, content_index) for each text
# Handle both dict and Pydantic object responses
if isinstance(response, dict):
response_output = response.get("output", [])
elif hasattr(response, "output"):
response_output = response.output or []
else:
verbose_proxy_logger.debug(
"OpenAI Responses API: No output found in response"
)
return response
if not response_output:
verbose_proxy_logger.debug("OpenAI Responses API: Empty output in response")
return response
# Step 1: Extract all text content and tool calls from response output
for output_idx, output_item in enumerate(response_output):
self._extract_output_text_and_images(
output_item=output_item,
output_idx=output_idx,
texts_to_check=texts_to_check,
images_to_check=images_to_check,
task_mappings=task_mappings,
tool_calls_to_check=tool_calls_to_check,
)
# Step 2: Apply guardrail to all texts in batch
if texts_to_check or tool_calls_to_check:
# Create a request_data dict with response info and user API key metadata
request_data: dict = {"response": response}
# Add user API key metadata with prefixed keys
user_metadata = self.transform_user_api_key_dict_to_metadata(
user_api_key_dict
)
if user_metadata:
request_data["litellm_metadata"] = user_metadata
inputs = GenericGuardrailAPIInputs(texts=texts_to_check)
if images_to_check:
inputs["images"] = images_to_check
if tool_calls_to_check:
inputs["tool_calls"] = tool_calls_to_check
# Include model information from the response if available
response_model = None
if isinstance(response, dict):
response_model = response.get("model")
elif hasattr(response, "model"):
response_model = getattr(response, "model", None)
if response_model:
inputs["model"] = response_model
guardrailed_inputs = await guardrail_to_apply.apply_guardrail(
inputs=inputs,
request_data=request_data,
input_type="response",
logging_obj=litellm_logging_obj,
)
guardrailed_texts = guardrailed_inputs.get("texts", [])
# Step 3: Map guardrail responses back to original response structure
await self._apply_guardrail_responses_to_output(
response=response,
responses=guardrailed_texts,
task_mappings=task_mappings,
)
verbose_proxy_logger.debug(
"OpenAI Responses API: Processed output response: %s", response
)
return response
async def process_output_streaming_response(
self,
responses_so_far: List[Any],
guardrail_to_apply: "CustomGuardrail",
litellm_logging_obj: Optional[Any] = None,
user_api_key_dict: Optional[Any] = None,
) -> List[Any]:
"""
Process output streaming response by applying guardrails to text content.
"""
final_chunk = responses_so_far[-1]
if final_chunk.get("type") == "response.output_item.done":
# convert openai response to model response
model_response_stream = OpenAiResponsesToChatCompletionStreamIterator.translate_responses_chunk_to_openai_stream(
final_chunk
)
tool_calls = model_response_stream.choices[0].delta.tool_calls
if tool_calls:
inputs = GenericGuardrailAPIInputs()
inputs["tool_calls"] = cast(
List[ChatCompletionToolCallChunk], tool_calls
)
# Include model information if available
if (
hasattr(model_response_stream, "model")
and model_response_stream.model
):
inputs["model"] = model_response_stream.model
_guardrailed_inputs = await guardrail_to_apply.apply_guardrail(
inputs=inputs,
request_data={},
input_type="response",
logging_obj=litellm_logging_obj,
)
return responses_so_far
elif final_chunk.get("type") == "response.completed":
# convert openai response to model response
outputs = final_chunk.get("response", {}).get("output", [])
model_response_choices = LiteLLMResponsesTransformationHandler._convert_response_output_to_choices(
output_items=outputs,
handle_raw_dict_callback=None,
)
if model_response_choices:
tool_calls = model_response_choices[0].message.tool_calls
text = model_response_choices[0].message.content
guardrail_inputs = GenericGuardrailAPIInputs()
if text:
guardrail_inputs["texts"] = [text]
if tool_calls:
guardrail_inputs["tool_calls"] = cast(
List[ChatCompletionToolCallChunk], tool_calls
)
# Include model information from the response if available
response_model = final_chunk.get("response", {}).get("model")
if response_model:
guardrail_inputs["model"] = response_model
if tool_calls or text:
_guardrailed_inputs = await guardrail_to_apply.apply_guardrail(
inputs=guardrail_inputs,
request_data={},
input_type="response",
logging_obj=litellm_logging_obj,
)
return responses_so_far
else:
verbose_proxy_logger.debug(
"Skipping output guardrail - model response has no choices"
)
# model_response_stream = OpenAiResponsesToChatCompletionStreamIterator.translate_responses_chunk_to_openai_stream(final_chunk)
# tool_calls = model_response_stream.choices[0].tool_calls
# convert openai response to model response
string_so_far = self.get_streaming_string_so_far(responses_so_far)
inputs = GenericGuardrailAPIInputs(texts=[string_so_far])
# Try to get model from the final chunk if available
if isinstance(final_chunk, dict):
response_model = (
final_chunk.get("response", {}).get("model")
if isinstance(final_chunk.get("response"), dict)
else None
)
if response_model:
inputs["model"] = response_model
_guardrailed_inputs = await guardrail_to_apply.apply_guardrail(
inputs=inputs,
request_data={},
input_type="response",
logging_obj=litellm_logging_obj,
)
return responses_so_far
def _check_streaming_has_ended(self, responses_so_far: List[Any]) -> bool:
"""
Check if the streaming has ended.
"""
return all(
response.choices[0].finish_reason is not None
for response in responses_so_far
)
def get_streaming_string_so_far(self, responses_so_far: List[Any]) -> str:
"""
Get the string so far from the responses so far.
"""
return "".join([response.get("text", "") for response in responses_so_far])
def _has_text_content(self, response: "ResponsesAPIResponse") -> bool:
"""
Check if response has any text content to process.
Override this method to customize text content detection.
"""
if not hasattr(response, "output") or response.output is None:
return False
for output_item in response.output:
if isinstance(output_item, BaseModel):
try:
generic_response_output_item = (
GenericResponseOutputItem.model_validate(
output_item.model_dump()
)
)
if generic_response_output_item.content:
output_item = generic_response_output_item
except Exception:
continue
if isinstance(output_item, (GenericResponseOutputItem, dict)):
content = (
output_item.content
if isinstance(output_item, GenericResponseOutputItem)
else output_item.get("content", [])
)
if content:
for content_item in content:
# Check if it's an OutputText with text
if isinstance(content_item, OutputText):
if content_item.text:
return True
elif isinstance(content_item, dict):
if content_item.get("text"):
return True
return False
def _extract_output_text_and_images(
self,
output_item: Any,
output_idx: int,
texts_to_check: List[str],
images_to_check: List[str],
task_mappings: List[Tuple[int, int]],
tool_calls_to_check: Optional[List[ChatCompletionToolCallChunk]] = None,
) -> None:
"""
Extract text content, images, and tool calls from a response output item.
Override this method to customize text/image/tool extraction logic.
"""
# Check if this is a tool call (OutputFunctionToolCall)
if isinstance(output_item, OutputFunctionToolCall):
if tool_calls_to_check is not None:
tool_call_dict = LiteLLMCompletionResponsesConfig.convert_response_function_tool_call_to_chat_completion_tool_call(
tool_call_item=output_item,
index=output_idx,
)
tool_calls_to_check.append(
cast(ChatCompletionToolCallChunk, tool_call_dict)
)
return
elif (
isinstance(output_item, BaseModel)
and hasattr(output_item, "type")
and getattr(output_item, "type") == "function_call"
):
if tool_calls_to_check is not None:
tool_call_dict = LiteLLMCompletionResponsesConfig.convert_response_function_tool_call_to_chat_completion_tool_call(
tool_call_item=output_item,
index=output_idx,
)
tool_calls_to_check.append(
cast(ChatCompletionToolCallChunk, tool_call_dict)
)
return
elif (
isinstance(output_item, dict) and output_item.get("type") == "function_call"
):
# Handle dict representation of tool call
if tool_calls_to_check is not None:
# Convert dict to ResponseFunctionToolCall for processing
try:
tool_call_obj = ResponseFunctionToolCall(**output_item)
tool_call_dict = LiteLLMCompletionResponsesConfig.convert_response_function_tool_call_to_chat_completion_tool_call(
tool_call_item=tool_call_obj,
index=output_idx,
)
tool_calls_to_check.append(
cast(ChatCompletionToolCallChunk, tool_call_dict)
)
except Exception:
pass
return
# Handle both GenericResponseOutputItem and dict
content: Optional[Union[List[OutputText], List[dict]]] = None
if isinstance(output_item, BaseModel):
try:
output_item_dump = output_item.model_dump()
generic_response_output_item = GenericResponseOutputItem.model_validate(
output_item_dump
)
if generic_response_output_item.content:
content = generic_response_output_item.content
except Exception:
# Try to extract content directly from output_item if validation fails
if hasattr(output_item, "content") and output_item.content: # type: ignore
content = output_item.content # type: ignore
else:
return
elif isinstance(output_item, dict):
content = output_item.get("content", [])
else:
return
if not content:
return
verbose_proxy_logger.debug(
"OpenAI Responses API: Processing output item: %s", output_item
)
# Iterate through content items (list of OutputText objects)
for content_idx, content_item in enumerate(content):
# Handle both OutputText objects and dicts
if isinstance(content_item, OutputText):
text_content = content_item.text
elif isinstance(content_item, dict):
text_content = content_item.get("text")
else:
continue
if text_content:
texts_to_check.append(text_content)
task_mappings.append((output_idx, int(content_idx)))
async def _apply_guardrail_responses_to_output(
self,
response: "ResponsesAPIResponse",
responses: List[str],
task_mappings: List[Tuple[int, int]],
) -> None:
"""
Apply guardrail responses back to output response.
Override this method to customize how responses are applied.
"""
# Handle both dict and Pydantic object responses
if isinstance(response, dict):
response_output = response.get("output", [])
elif hasattr(response, "output"):
response_output = response.output or []
else:
return
for task_idx, guardrail_response in enumerate(responses):
mapping = task_mappings[task_idx]
output_idx = cast(int, mapping[0])
content_idx = cast(int, mapping[1])
if output_idx >= len(response_output):
continue
output_item = response_output[output_idx]
# Handle both GenericResponseOutputItem, BaseModel, and dict
if isinstance(output_item, GenericResponseOutputItem):
if output_item.content and content_idx < len(output_item.content):
content_item = output_item.content[content_idx]
if isinstance(content_item, OutputText):
content_item.text = guardrail_response
elif isinstance(content_item, dict):
content_item["text"] = guardrail_response
elif isinstance(output_item, BaseModel):
# Handle other Pydantic models by converting to GenericResponseOutputItem
try:
generic_item = GenericResponseOutputItem.model_validate(
output_item.model_dump()
)
if generic_item.content and content_idx < len(generic_item.content):
content_item = generic_item.content[content_idx]
if isinstance(content_item, OutputText):
content_item.text = guardrail_response
# Update the original response output
if hasattr(output_item, "content") and output_item.content: # type: ignore
original_content = output_item.content[content_idx] # type: ignore
if hasattr(original_content, "text"):
original_content.text = guardrail_response # type: ignore
except Exception:
pass
elif isinstance(output_item, dict):
content = output_item.get("content", [])
if content and content_idx < len(content):
if isinstance(content[content_idx], dict):
content[content_idx]["text"] = guardrail_response
elif hasattr(content[content_idx], "text"):
content[content_idx].text = guardrail_response

View File

@@ -0,0 +1,580 @@
from typing import TYPE_CHECKING, Any, Dict, Optional, Union, cast, get_type_hints
import httpx
from openai.types.responses import ResponseReasoningItem
from pydantic import BaseModel, ValidationError
import litellm
from litellm._logging import verbose_logger
from litellm.litellm_core_utils.core_helpers import process_response_headers
from litellm.litellm_core_utils.llm_response_utils.convert_dict_to_response import (
_safe_convert_created_field,
)
from litellm.llms.base_llm.responses.transformation import BaseResponsesAPIConfig
from litellm.secret_managers.main import get_secret_str
from litellm.types.llms.openai import *
from litellm.types.responses.main import *
from litellm.types.router import GenericLiteLLMParams
from litellm.types.utils import LlmProviders
from ..common_utils import OpenAIError
if TYPE_CHECKING:
from litellm.litellm_core_utils.litellm_logging import Logging as _LiteLLMLoggingObj
LiteLLMLoggingObj = _LiteLLMLoggingObj
else:
LiteLLMLoggingObj = Any
class OpenAIResponsesAPIConfig(BaseResponsesAPIConfig):
@property
def custom_llm_provider(self) -> LlmProviders:
return LlmProviders.OPENAI
def get_supported_openai_params(self, model: str) -> list:
"""
All OpenAI Responses API params are supported
"""
supported_params = get_type_hints(ResponsesAPIRequestParams).keys()
return list(
set(
[
"input",
"model",
"extra_headers",
"extra_query",
"extra_body",
"timeout",
]
+ list(supported_params)
)
)
def map_openai_params(
self,
response_api_optional_params: ResponsesAPIOptionalRequestParams,
model: str,
drop_params: bool,
) -> Dict:
"""No mapping applied since inputs are in OpenAI spec already"""
return dict(response_api_optional_params)
def transform_responses_api_request(
self,
model: str,
input: Union[str, ResponseInputParam],
response_api_optional_request_params: Dict,
litellm_params: GenericLiteLLMParams,
headers: dict,
) -> Dict:
"""No transform applied since inputs are in OpenAI spec already"""
input = self._validate_input_param(input)
final_request_params = dict(
ResponsesAPIRequestParams(
model=model, input=input, **response_api_optional_request_params
)
)
return final_request_params
def _validate_input_param(
self, input: Union[str, ResponseInputParam]
) -> Union[str, ResponseInputParam]:
"""
Ensure all input fields if pydantic are converted to dict
OpenAI API Fails when we try to JSON dumps specific input pydantic fields.
This function ensures all input fields are converted to dict.
"""
if isinstance(input, list):
validated_input = []
for item in input:
# if it's pydantic, convert to dict
if isinstance(item, BaseModel):
validated_input.append(item.model_dump(exclude_none=True))
elif isinstance(item, dict):
# Handle reasoning items specifically to filter out status=None
if item.get("type") == "reasoning":
verbose_logger.debug(f"Handling reasoning item: {item}")
# Type assertion since we know it's a dict at this point
dict_item = cast(Dict[str, Any], item)
filtered_item = self._handle_reasoning_item(dict_item)
else:
# For other dict items, just pass through
filtered_item = cast(Dict[str, Any], item)
validated_input.append(filtered_item)
else:
validated_input.append(item)
return validated_input # type: ignore
# Input is expected to be either str or List, no single BaseModel expected
return input
def _handle_reasoning_item(self, item: Dict[str, Any]) -> Dict[str, Any]:
"""
Handle reasoning items specifically to filter out status=None using OpenAI's model.
Issue: https://github.com/BerriAI/litellm/issues/13484
OpenAI API does not accept ReasoningItem(status=None), so we need to:
1. Check if the item is a reasoning type
2. Create a ResponseReasoningItem object with the item data
3. Convert it back to dict with exclude_none=True to filter None values
"""
if item.get("type") == "reasoning":
try:
# Ensure required fields are present for ResponseReasoningItem
item_data = dict(item)
if "summary" not in item_data:
item_data["summary"] = (
item_data.get("reasoning_content", "")[:100] + "..."
if len(item_data.get("reasoning_content", "")) > 100
else item_data.get("reasoning_content", "")
)
# Create ResponseReasoningItem object from the item data
reasoning_item = ResponseReasoningItem(**item_data)
# Convert back to dict with exclude_none=True to exclude None fields
dict_reasoning_item = reasoning_item.model_dump(exclude_none=True)
return dict_reasoning_item
except Exception as e:
verbose_logger.debug(
f"Failed to create ResponseReasoningItem, falling back to manual filtering: {e}"
)
# Fallback: manually filter out known None fields
filtered_item = {
k: v
for k, v in item.items()
if v is not None
or k not in {"status", "content", "encrypted_content"}
}
return filtered_item
return item
def transform_response_api_response(
self,
model: str,
raw_response: httpx.Response,
logging_obj: LiteLLMLoggingObj,
) -> ResponsesAPIResponse:
"""No transform applied since outputs are in OpenAI spec already"""
try:
logging_obj.post_call(
original_response=raw_response.text,
additional_args={"complete_input_dict": {}},
)
raw_response_json = raw_response.json()
raw_response_json["created_at"] = _safe_convert_created_field(
raw_response_json["created_at"]
)
except Exception:
raise OpenAIError(
message=raw_response.text, status_code=raw_response.status_code
)
raw_response_headers = dict(raw_response.headers)
processed_headers = process_response_headers(raw_response_headers)
try:
response = ResponsesAPIResponse(**raw_response_json)
except Exception:
verbose_logger.debug(
f"Error constructing ResponsesAPIResponse: {raw_response_json}, using model_construct"
)
response = ResponsesAPIResponse.model_construct(**raw_response_json)
# Store processed headers in additional_headers so they get returned to the client
response._hidden_params["additional_headers"] = processed_headers
response._hidden_params["headers"] = raw_response_headers
return response
def validate_environment(
self, headers: dict, model: str, litellm_params: Optional[GenericLiteLLMParams]
) -> dict:
litellm_params = litellm_params or GenericLiteLLMParams()
api_key = (
litellm_params.api_key
or litellm.api_key
or litellm.openai_key
or get_secret_str("OPENAI_API_KEY")
)
headers.update(
{
"Authorization": f"Bearer {api_key}",
}
)
return headers
def get_complete_url(
self,
api_base: Optional[str],
litellm_params: dict,
) -> str:
"""
Get the endpoint for OpenAI responses API
"""
api_base = (
api_base
or litellm.api_base
or get_secret_str("OPENAI_BASE_URL")
or get_secret_str("OPENAI_API_BASE")
or "https://api.openai.com/v1"
)
# Remove trailing slashes
api_base = api_base.rstrip("/")
return f"{api_base}/responses"
def transform_streaming_response(
self,
model: str,
parsed_chunk: dict,
logging_obj: LiteLLMLoggingObj,
) -> ResponsesAPIStreamingResponse:
"""
Transform a parsed streaming response chunk into a ResponsesAPIStreamingResponse
"""
# Convert the dictionary to a properly typed ResponsesAPIStreamingResponse
verbose_logger.debug("Raw OpenAI Chunk=%s", parsed_chunk)
event_type = str(parsed_chunk.get("type"))
event_pydantic_model = OpenAIResponsesAPIConfig.get_event_model_class(
event_type=event_type
)
# Some OpenAI-compatible providers send error.code: null; coalesce so validation succeeds.
try:
error_obj = parsed_chunk.get("error")
if isinstance(error_obj, dict) and error_obj.get("code") is None:
parsed_chunk = dict(parsed_chunk)
parsed_chunk["error"] = dict(error_obj)
parsed_chunk["error"]["code"] = "unknown_error"
except Exception:
verbose_logger.debug("Failed to coalesce error.code in parsed_chunk")
try:
return event_pydantic_model(**parsed_chunk)
except ValidationError:
verbose_logger.debug(
"Pydantic validation failed for %s with chunk %s, "
"falling back to model_construct",
event_pydantic_model.__name__,
parsed_chunk,
)
return event_pydantic_model.model_construct(**parsed_chunk)
@staticmethod
def get_event_model_class(event_type: str) -> Any:
"""
Returns the appropriate event model class based on the event type.
Args:
event_type (str): The type of event from the response chunk
Returns:
Any: The corresponding event model class
Raises:
ValueError: If the event type is unknown
"""
event_models = {
ResponsesAPIStreamEvents.RESPONSE_CREATED: ResponseCreatedEvent,
ResponsesAPIStreamEvents.RESPONSE_IN_PROGRESS: ResponseInProgressEvent,
ResponsesAPIStreamEvents.RESPONSE_COMPLETED: ResponseCompletedEvent,
ResponsesAPIStreamEvents.RESPONSE_FAILED: ResponseFailedEvent,
ResponsesAPIStreamEvents.RESPONSE_INCOMPLETE: ResponseIncompleteEvent,
ResponsesAPIStreamEvents.OUTPUT_ITEM_ADDED: OutputItemAddedEvent,
ResponsesAPIStreamEvents.OUTPUT_ITEM_DONE: OutputItemDoneEvent,
ResponsesAPIStreamEvents.CONTENT_PART_ADDED: ContentPartAddedEvent,
ResponsesAPIStreamEvents.CONTENT_PART_DONE: ContentPartDoneEvent,
ResponsesAPIStreamEvents.OUTPUT_TEXT_DELTA: OutputTextDeltaEvent,
ResponsesAPIStreamEvents.OUTPUT_TEXT_ANNOTATION_ADDED: OutputTextAnnotationAddedEvent,
ResponsesAPIStreamEvents.OUTPUT_TEXT_DONE: OutputTextDoneEvent,
ResponsesAPIStreamEvents.REFUSAL_DELTA: RefusalDeltaEvent,
ResponsesAPIStreamEvents.REFUSAL_DONE: RefusalDoneEvent,
ResponsesAPIStreamEvents.FUNCTION_CALL_ARGUMENTS_DELTA: FunctionCallArgumentsDeltaEvent,
ResponsesAPIStreamEvents.FUNCTION_CALL_ARGUMENTS_DONE: FunctionCallArgumentsDoneEvent,
ResponsesAPIStreamEvents.FILE_SEARCH_CALL_IN_PROGRESS: FileSearchCallInProgressEvent,
ResponsesAPIStreamEvents.FILE_SEARCH_CALL_SEARCHING: FileSearchCallSearchingEvent,
ResponsesAPIStreamEvents.FILE_SEARCH_CALL_COMPLETED: FileSearchCallCompletedEvent,
ResponsesAPIStreamEvents.WEB_SEARCH_CALL_IN_PROGRESS: WebSearchCallInProgressEvent,
ResponsesAPIStreamEvents.WEB_SEARCH_CALL_SEARCHING: WebSearchCallSearchingEvent,
ResponsesAPIStreamEvents.WEB_SEARCH_CALL_COMPLETED: WebSearchCallCompletedEvent,
ResponsesAPIStreamEvents.MCP_LIST_TOOLS_IN_PROGRESS: MCPListToolsInProgressEvent,
ResponsesAPIStreamEvents.MCP_LIST_TOOLS_COMPLETED: MCPListToolsCompletedEvent,
ResponsesAPIStreamEvents.MCP_LIST_TOOLS_FAILED: MCPListToolsFailedEvent,
ResponsesAPIStreamEvents.MCP_CALL_IN_PROGRESS: MCPCallInProgressEvent,
ResponsesAPIStreamEvents.MCP_CALL_ARGUMENTS_DELTA: MCPCallArgumentsDeltaEvent,
ResponsesAPIStreamEvents.MCP_CALL_ARGUMENTS_DONE: MCPCallArgumentsDoneEvent,
ResponsesAPIStreamEvents.MCP_CALL_COMPLETED: MCPCallCompletedEvent,
ResponsesAPIStreamEvents.MCP_CALL_FAILED: MCPCallFailedEvent,
ResponsesAPIStreamEvents.IMAGE_GENERATION_PARTIAL_IMAGE: ImageGenerationPartialImageEvent,
ResponsesAPIStreamEvents.ERROR: ErrorEvent,
# Shell tool events: passthrough as GenericEvent so payload is preserved
ResponsesAPIStreamEvents.SHELL_CALL_IN_PROGRESS: GenericEvent,
ResponsesAPIStreamEvents.SHELL_CALL_COMPLETED: GenericEvent,
ResponsesAPIStreamEvents.SHELL_CALL_OUTPUT: GenericEvent,
}
model_class = event_models.get(cast(ResponsesAPIStreamEvents, event_type))
if not model_class:
return GenericEvent
return model_class
def should_fake_stream(
self,
model: Optional[str],
stream: Optional[bool],
custom_llm_provider: Optional[str] = None,
) -> bool:
if stream is not True:
return False
if model is not None:
try:
if (
litellm.utils.supports_native_streaming(
model=model,
custom_llm_provider=custom_llm_provider,
)
is False
):
return True
except Exception as e:
verbose_logger.debug(
f"Error getting model info in OpenAIResponsesAPIConfig: {e}"
)
return False
def supports_native_websocket(self) -> bool:
"""OpenAI supports native WebSocket for Responses API"""
return True
#########################################################
########## DELETE RESPONSE API TRANSFORMATION ##############
#########################################################
def transform_delete_response_api_request(
self,
response_id: str,
api_base: str,
litellm_params: GenericLiteLLMParams,
headers: dict,
) -> Tuple[str, Dict]:
"""
Transform the delete response API request into a URL and data
OpenAI API expects the following request
- DELETE /v1/responses/{response_id}
"""
url = f"{api_base}/{response_id}"
data: Dict = {}
return url, data
def transform_delete_response_api_response(
self,
raw_response: httpx.Response,
logging_obj: LiteLLMLoggingObj,
) -> DeleteResponseResult:
"""
Transform the delete response API response into a DeleteResponseResult
"""
try:
raw_response_json = raw_response.json()
except Exception:
raise OpenAIError(
message=raw_response.text, status_code=raw_response.status_code
)
return DeleteResponseResult(**raw_response_json)
#########################################################
########## GET RESPONSE API TRANSFORMATION ###############
#########################################################
def transform_get_response_api_request(
self,
response_id: str,
api_base: str,
litellm_params: GenericLiteLLMParams,
headers: dict,
) -> Tuple[str, Dict]:
"""
Transform the get response API request into a URL and data
OpenAI API expects the following request
- GET /v1/responses/{response_id}
"""
url = f"{api_base}/{response_id}"
data: Dict = {}
return url, data
def transform_get_response_api_response(
self,
raw_response: httpx.Response,
logging_obj: LiteLLMLoggingObj,
) -> ResponsesAPIResponse:
"""
Transform the get response API response into a ResponsesAPIResponse
"""
try:
raw_response_json = raw_response.json()
except Exception:
raise OpenAIError(
message=raw_response.text, status_code=raw_response.status_code
)
raw_response_headers = dict(raw_response.headers)
processed_headers = process_response_headers(raw_response_headers)
response = ResponsesAPIResponse(**raw_response_json)
response._hidden_params["additional_headers"] = processed_headers
response._hidden_params["headers"] = raw_response_headers
return response
#########################################################
########## LIST INPUT ITEMS TRANSFORMATION #############
#########################################################
def transform_list_input_items_request(
self,
response_id: str,
api_base: str,
litellm_params: GenericLiteLLMParams,
headers: dict,
after: Optional[str] = None,
before: Optional[str] = None,
include: Optional[List[str]] = None,
limit: int = 20,
order: Literal["asc", "desc"] = "desc",
) -> Tuple[str, Dict]:
url = f"{api_base}/{response_id}/input_items"
params: Dict[str, Any] = {}
if after is not None:
params["after"] = after
if before is not None:
params["before"] = before
if include:
params["include"] = ",".join(include)
if limit is not None:
params["limit"] = limit
if order is not None:
params["order"] = order
return url, params
def transform_list_input_items_response(
self,
raw_response: httpx.Response,
logging_obj: LiteLLMLoggingObj,
) -> Dict:
try:
return raw_response.json()
except Exception:
raise OpenAIError(
message=raw_response.text, status_code=raw_response.status_code
)
#########################################################
########## CANCEL RESPONSE API TRANSFORMATION ##########
#########################################################
def transform_cancel_response_api_request(
self,
response_id: str,
api_base: str,
litellm_params: GenericLiteLLMParams,
headers: dict,
) -> Tuple[str, Dict]:
"""
Transform the cancel response API request into a URL and data
OpenAI API expects the following request
- POST /v1/responses/{response_id}/cancel
"""
url = f"{api_base}/{response_id}/cancel"
data: Dict = {}
return url, data
def transform_cancel_response_api_response(
self,
raw_response: httpx.Response,
logging_obj: LiteLLMLoggingObj,
) -> ResponsesAPIResponse:
"""
Transform the cancel response API response into a ResponsesAPIResponse
"""
try:
raw_response_json = raw_response.json()
except Exception:
raise OpenAIError(
message=raw_response.text, status_code=raw_response.status_code
)
raw_response_headers = dict(raw_response.headers)
processed_headers = process_response_headers(raw_response_headers)
response = ResponsesAPIResponse(**raw_response_json)
response._hidden_params["additional_headers"] = processed_headers
response._hidden_params["headers"] = raw_response_headers
return response
#########################################################
########## COMPACT RESPONSE API TRANSFORMATION ##########
#########################################################
def transform_compact_response_api_request(
self,
model: str,
input: Union[str, ResponseInputParam],
response_api_optional_request_params: Dict,
api_base: str,
litellm_params: GenericLiteLLMParams,
headers: dict,
) -> Tuple[str, Dict]:
"""
Transform the compact response API request into a URL and data
OpenAI API expects the following request
- POST /v1/responses/compact
"""
# Preserve query params (e.g., api-version) while appending /compact.
parsed_url = httpx.URL(api_base)
compact_path = parsed_url.path.rstrip("/") + "/compact"
url = str(parsed_url.copy_with(path=compact_path))
input = self._validate_input_param(input)
data = dict(
ResponsesAPIRequestParams(
model=model, input=input, **response_api_optional_request_params
)
)
return url, data
def transform_compact_response_api_response(
self,
raw_response: httpx.Response,
logging_obj: LiteLLMLoggingObj,
) -> ResponsesAPIResponse:
"""
Transform the compact response API response into a ResponsesAPIResponse
"""
try:
logging_obj.post_call(
original_response=raw_response.text,
additional_args={"complete_input_dict": {}},
)
raw_response_json = raw_response.json()
raw_response_json["created_at"] = _safe_convert_created_field(
raw_response_json["created_at"]
)
except Exception:
raise OpenAIError(
message=raw_response.text, status_code=raw_response.status_code
)
raw_response_headers = dict(raw_response.headers)
processed_headers = process_response_headers(raw_response_headers)
try:
response = ResponsesAPIResponse(**raw_response_json)
except Exception:
verbose_logger.debug(
f"Error constructing ResponsesAPIResponse: {raw_response_json}, using model_construct"
)
response = ResponsesAPIResponse.model_construct(**raw_response_json)
response._hidden_params["additional_headers"] = processed_headers
response._hidden_params["headers"] = raw_response_headers
return response