chore: initial snapshot for gitea/github upload
This commit is contained in:
@@ -0,0 +1,19 @@
|
||||
"""
|
||||
OpenAI Responses API token counting implementation.
|
||||
"""
|
||||
|
||||
from litellm.llms.openai.responses.count_tokens.handler import (
|
||||
OpenAICountTokensHandler,
|
||||
)
|
||||
from litellm.llms.openai.responses.count_tokens.token_counter import (
|
||||
OpenAITokenCounter,
|
||||
)
|
||||
from litellm.llms.openai.responses.count_tokens.transformation import (
|
||||
OpenAICountTokensConfig,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"OpenAICountTokensHandler",
|
||||
"OpenAICountTokensConfig",
|
||||
"OpenAITokenCounter",
|
||||
]
|
||||
@@ -0,0 +1,107 @@
|
||||
"""
|
||||
OpenAI Responses API token counting handler.
|
||||
|
||||
Uses httpx for HTTP requests to OpenAI's /v1/responses/input_tokens endpoint.
|
||||
"""
|
||||
|
||||
import json
|
||||
from typing import Any, Dict, List, Optional, Union
|
||||
|
||||
import httpx
|
||||
|
||||
import litellm
|
||||
from litellm._logging import verbose_logger
|
||||
from litellm.llms.custom_httpx.http_handler import get_async_httpx_client
|
||||
from litellm.llms.openai.common_utils import OpenAIError
|
||||
from litellm.llms.openai.responses.count_tokens.transformation import (
|
||||
OpenAICountTokensConfig,
|
||||
)
|
||||
|
||||
|
||||
class OpenAICountTokensHandler(OpenAICountTokensConfig):
|
||||
"""
|
||||
Handler for OpenAI Responses API token counting requests.
|
||||
"""
|
||||
|
||||
async def handle_count_tokens_request(
|
||||
self,
|
||||
model: str,
|
||||
input: Union[str, List[Any]],
|
||||
api_key: str,
|
||||
api_base: Optional[str] = None,
|
||||
timeout: Optional[Union[float, httpx.Timeout]] = None,
|
||||
tools: Optional[List[Dict[str, Any]]] = None,
|
||||
instructions: Optional[str] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Handle a token counting request to OpenAI's Responses API.
|
||||
|
||||
Returns:
|
||||
Dictionary containing {"input_tokens": <number>}
|
||||
|
||||
Raises:
|
||||
OpenAIError: If the API request fails
|
||||
"""
|
||||
try:
|
||||
self.validate_request(model, input)
|
||||
|
||||
verbose_logger.debug(
|
||||
f"Processing OpenAI CountTokens request for model: {model}"
|
||||
)
|
||||
|
||||
request_body = self.transform_request_to_count_tokens(
|
||||
model=model,
|
||||
input=input,
|
||||
tools=tools,
|
||||
instructions=instructions,
|
||||
)
|
||||
|
||||
endpoint_url = self.get_openai_count_tokens_endpoint(api_base)
|
||||
|
||||
verbose_logger.debug(f"Making request to: {endpoint_url}")
|
||||
|
||||
headers = self.get_required_headers(api_key)
|
||||
|
||||
async_client = get_async_httpx_client(
|
||||
llm_provider=litellm.LlmProviders.OPENAI
|
||||
)
|
||||
|
||||
request_timeout = (
|
||||
timeout if timeout is not None else litellm.request_timeout
|
||||
)
|
||||
|
||||
response = await async_client.post(
|
||||
endpoint_url,
|
||||
headers=headers,
|
||||
json=request_body,
|
||||
timeout=request_timeout,
|
||||
)
|
||||
|
||||
verbose_logger.debug(f"Response status: {response.status_code}")
|
||||
|
||||
if response.status_code != 200:
|
||||
error_text = response.text
|
||||
verbose_logger.error(f"OpenAI API error: {error_text}")
|
||||
raise OpenAIError(
|
||||
status_code=response.status_code,
|
||||
message=error_text,
|
||||
)
|
||||
|
||||
openai_response = response.json()
|
||||
verbose_logger.debug(f"OpenAI response: {openai_response}")
|
||||
return openai_response
|
||||
|
||||
except OpenAIError:
|
||||
raise
|
||||
except httpx.HTTPStatusError as e:
|
||||
verbose_logger.error(f"HTTP error in CountTokens handler: {str(e)}")
|
||||
raise OpenAIError(
|
||||
status_code=e.response.status_code,
|
||||
message=e.response.text,
|
||||
)
|
||||
except (httpx.RequestError, json.JSONDecodeError, ValueError) as e:
|
||||
verbose_logger.error(f"Error in CountTokens handler: {str(e)}")
|
||||
raise OpenAIError(
|
||||
status_code=500,
|
||||
message=f"CountTokens processing error: {str(e)}",
|
||||
)
|
||||
@@ -0,0 +1,118 @@
|
||||
"""
|
||||
OpenAI Token Counter implementation using the Responses API /input_tokens endpoint.
|
||||
"""
|
||||
|
||||
import os
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from litellm._logging import verbose_logger
|
||||
from litellm.llms.base_llm.base_utils import BaseTokenCounter
|
||||
from litellm.llms.openai.common_utils import OpenAIError
|
||||
from litellm.llms.openai.responses.count_tokens.handler import (
|
||||
OpenAICountTokensHandler,
|
||||
)
|
||||
from litellm.llms.openai.responses.count_tokens.transformation import (
|
||||
OpenAICountTokensConfig,
|
||||
)
|
||||
from litellm.types.utils import LlmProviders, TokenCountResponse
|
||||
|
||||
# Global handler instance - reuse across all token counting requests
|
||||
openai_count_tokens_handler = OpenAICountTokensHandler()
|
||||
|
||||
|
||||
class OpenAITokenCounter(BaseTokenCounter):
|
||||
"""Token counter implementation for OpenAI provider using the Responses API."""
|
||||
|
||||
def should_use_token_counting_api(
|
||||
self,
|
||||
custom_llm_provider: Optional[str] = None,
|
||||
) -> bool:
|
||||
return custom_llm_provider == LlmProviders.OPENAI.value
|
||||
|
||||
async def count_tokens(
|
||||
self,
|
||||
model_to_use: str,
|
||||
messages: Optional[List[Dict[str, Any]]],
|
||||
contents: Optional[List[Dict[str, Any]]],
|
||||
deployment: Optional[Dict[str, Any]] = None,
|
||||
request_model: str = "",
|
||||
tools: Optional[List[Dict[str, Any]]] = None,
|
||||
system: Optional[Any] = None,
|
||||
) -> Optional[TokenCountResponse]:
|
||||
"""
|
||||
Count tokens using OpenAI's Responses API /input_tokens endpoint.
|
||||
"""
|
||||
if not messages:
|
||||
return None
|
||||
|
||||
deployment = deployment or {}
|
||||
litellm_params = deployment.get("litellm_params", {})
|
||||
|
||||
# Get OpenAI API key from deployment config or environment
|
||||
api_key = litellm_params.get("api_key")
|
||||
if not api_key:
|
||||
api_key = os.getenv("OPENAI_API_KEY")
|
||||
|
||||
if not api_key:
|
||||
verbose_logger.warning("No OpenAI API key found for token counting")
|
||||
return None
|
||||
|
||||
api_base = litellm_params.get("api_base")
|
||||
|
||||
# Convert chat messages to Responses API input format
|
||||
input_items, instructions = OpenAICountTokensConfig.messages_to_responses_input(
|
||||
messages
|
||||
)
|
||||
|
||||
# Use system param if instructions not extracted from messages
|
||||
if instructions is None and system is not None:
|
||||
instructions = system if isinstance(system, str) else str(system)
|
||||
|
||||
# If no input items were produced (e.g., system-only messages), fall back to local counting
|
||||
if not input_items:
|
||||
return None
|
||||
|
||||
try:
|
||||
result = await openai_count_tokens_handler.handle_count_tokens_request(
|
||||
model=model_to_use,
|
||||
input=input_items if input_items is not None else [],
|
||||
api_key=api_key,
|
||||
api_base=api_base,
|
||||
tools=tools,
|
||||
instructions=instructions,
|
||||
)
|
||||
|
||||
if result is not None:
|
||||
return TokenCountResponse(
|
||||
total_tokens=result.get("input_tokens", 0),
|
||||
request_model=request_model,
|
||||
model_used=model_to_use,
|
||||
tokenizer_type="openai_api",
|
||||
original_response=result,
|
||||
)
|
||||
except OpenAIError as e:
|
||||
verbose_logger.warning(
|
||||
f"OpenAI CountTokens API error: status={e.status_code}, message={e.message}"
|
||||
)
|
||||
return TokenCountResponse(
|
||||
total_tokens=0,
|
||||
request_model=request_model,
|
||||
model_used=model_to_use,
|
||||
tokenizer_type="openai_api",
|
||||
error=True,
|
||||
error_message=e.message,
|
||||
status_code=e.status_code,
|
||||
)
|
||||
except Exception as e:
|
||||
verbose_logger.warning(f"Error calling OpenAI CountTokens API: {e}")
|
||||
return TokenCountResponse(
|
||||
total_tokens=0,
|
||||
request_model=request_model,
|
||||
model_used=model_to_use,
|
||||
tokenizer_type="openai_api",
|
||||
error=True,
|
||||
error_message=str(e),
|
||||
status_code=500,
|
||||
)
|
||||
|
||||
return None
|
||||
@@ -0,0 +1,160 @@
|
||||
"""
|
||||
OpenAI Responses API token counting transformation logic.
|
||||
|
||||
This module handles the transformation of requests to OpenAI's /v1/responses/input_tokens endpoint.
|
||||
"""
|
||||
|
||||
from typing import Any, Dict, List, Optional, Union
|
||||
|
||||
|
||||
class OpenAICountTokensConfig:
|
||||
"""
|
||||
Configuration and transformation logic for OpenAI Responses API token counting.
|
||||
|
||||
OpenAI Responses API Token Counting Specification:
|
||||
- Endpoint: POST https://api.openai.com/v1/responses/input_tokens
|
||||
- Response: {"input_tokens": <number>}
|
||||
"""
|
||||
|
||||
def get_openai_count_tokens_endpoint(self, api_base: Optional[str] = None) -> str:
|
||||
base = api_base or "https://api.openai.com/v1"
|
||||
base = base.rstrip("/")
|
||||
return f"{base}/responses/input_tokens"
|
||||
|
||||
def transform_request_to_count_tokens(
|
||||
self,
|
||||
model: str,
|
||||
input: Union[str, List[Any]],
|
||||
tools: Optional[List[Dict[str, Any]]] = None,
|
||||
instructions: Optional[str] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Transform request to OpenAI Responses API token counting format.
|
||||
|
||||
The Responses API uses `input` (not `messages`) and `instructions` (not `system`).
|
||||
"""
|
||||
request: Dict[str, Any] = {
|
||||
"model": model,
|
||||
"input": input,
|
||||
}
|
||||
|
||||
if instructions is not None:
|
||||
request["instructions"] = instructions
|
||||
|
||||
if tools is not None:
|
||||
request["tools"] = self._transform_tools_for_responses_api(tools)
|
||||
|
||||
return request
|
||||
|
||||
def get_required_headers(self, api_key: str) -> Dict[str, str]:
|
||||
return {
|
||||
"Content-Type": "application/json",
|
||||
"Authorization": f"Bearer {api_key}",
|
||||
}
|
||||
|
||||
def validate_request(self, model: str, input: Union[str, List[Any]]) -> None:
|
||||
if not model:
|
||||
raise ValueError("model parameter is required")
|
||||
|
||||
if not input:
|
||||
raise ValueError("input parameter is required")
|
||||
|
||||
@staticmethod
|
||||
def _transform_tools_for_responses_api(
|
||||
tools: List[Dict[str, Any]],
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Transform OpenAI chat tools format to Responses API tools format.
|
||||
|
||||
Chat format: {"type": "function", "function": {"name": "...", "parameters": {...}}}
|
||||
Responses format: {"type": "function", "name": "...", "parameters": {...}}
|
||||
"""
|
||||
transformed = []
|
||||
for tool in tools:
|
||||
if tool.get("type") == "function" and "function" in tool:
|
||||
func = tool["function"]
|
||||
item: Dict[str, Any] = {
|
||||
"type": "function",
|
||||
"name": func.get("name", ""),
|
||||
"description": func.get("description", ""),
|
||||
"parameters": func.get("parameters", {}),
|
||||
}
|
||||
if "strict" in func:
|
||||
item["strict"] = func["strict"]
|
||||
transformed.append(item)
|
||||
else:
|
||||
# Pass through non-function tools (e.g., web_search, file_search)
|
||||
transformed.append(tool)
|
||||
return transformed
|
||||
|
||||
@staticmethod
|
||||
def messages_to_responses_input(
|
||||
messages: List[Dict[str, Any]],
|
||||
) -> tuple:
|
||||
"""
|
||||
Convert standard chat messages format to OpenAI Responses API input format.
|
||||
|
||||
Returns:
|
||||
(input_items, instructions) tuple where instructions is extracted
|
||||
from system/developer messages.
|
||||
"""
|
||||
input_items: List[Dict[str, Any]] = []
|
||||
instructions_parts: List[str] = []
|
||||
|
||||
for msg in messages:
|
||||
role = msg.get("role", "")
|
||||
content = msg.get("content") or ""
|
||||
|
||||
if role in ("system", "developer"):
|
||||
# Extract system/developer messages as instructions
|
||||
if isinstance(content, str):
|
||||
instructions_parts.append(content)
|
||||
elif isinstance(content, list):
|
||||
# Handle content blocks - extract text
|
||||
text_parts = []
|
||||
for block in content:
|
||||
if isinstance(block, dict) and block.get("type") == "text":
|
||||
text_parts.append(block.get("text", ""))
|
||||
elif isinstance(block, str):
|
||||
text_parts.append(block)
|
||||
instructions_parts.append("\n".join(text_parts))
|
||||
elif role == "user":
|
||||
if isinstance(content, list):
|
||||
# Extract text from content blocks for Responses API
|
||||
text_parts = []
|
||||
for block in content:
|
||||
if isinstance(block, dict) and block.get("type") == "text":
|
||||
text_parts.append(block.get("text", ""))
|
||||
elif isinstance(block, str):
|
||||
text_parts.append(block)
|
||||
content = "\n".join(text_parts)
|
||||
input_items.append({"role": "user", "content": content})
|
||||
elif role == "assistant":
|
||||
# Map tool_calls to Responses API function_call items
|
||||
tool_calls = msg.get("tool_calls")
|
||||
if content:
|
||||
input_items.append({"role": "assistant", "content": content})
|
||||
if tool_calls:
|
||||
for tc in tool_calls:
|
||||
func = tc.get("function", {})
|
||||
input_items.append(
|
||||
{
|
||||
"type": "function_call",
|
||||
"call_id": tc.get("id", ""),
|
||||
"name": func.get("name", ""),
|
||||
"arguments": func.get("arguments", ""),
|
||||
}
|
||||
)
|
||||
elif not content:
|
||||
input_items.append({"role": "assistant", "content": content})
|
||||
elif role == "tool":
|
||||
input_items.append(
|
||||
{
|
||||
"type": "function_call_output",
|
||||
"call_id": msg.get("tool_call_id", ""),
|
||||
"output": content if isinstance(content, str) else str(content),
|
||||
}
|
||||
)
|
||||
|
||||
instructions = "\n".join(instructions_parts) if instructions_parts else None
|
||||
return input_items, instructions
|
||||
@@ -0,0 +1,119 @@
|
||||
# OpenAI Responses API Guardrail Translation Handler
|
||||
|
||||
This module provides guardrail translation support for the OpenAI Responses API format.
|
||||
|
||||
## Overview
|
||||
|
||||
The `OpenAIResponsesHandler` class handles the translation of guardrail operations for both input and output of the Responses API. It follows the same pattern as the Chat Completions handler but is adapted for the Responses API's specific data structures.
|
||||
|
||||
## Responses API Format
|
||||
|
||||
### Input Format
|
||||
The Responses API accepts input in two formats:
|
||||
|
||||
1. **String input**: Simple text string
|
||||
```python
|
||||
{"input": "Hello world", "model": "gpt-4"}
|
||||
```
|
||||
|
||||
2. **List input**: Array of message objects (ResponseInputParam)
|
||||
```python
|
||||
{
|
||||
"input": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Hello", # Can be string or list of content items
|
||||
"type": "message"
|
||||
}
|
||||
],
|
||||
"model": "gpt-4"
|
||||
}
|
||||
```
|
||||
|
||||
### Output Format
|
||||
The Responses API returns a `ResponsesAPIResponse` object with:
|
||||
|
||||
```python
|
||||
{
|
||||
"id": "resp_123",
|
||||
"output": [
|
||||
{
|
||||
"type": "message",
|
||||
"id": "msg_123",
|
||||
"status": "completed",
|
||||
"role": "assistant",
|
||||
"content": [
|
||||
{
|
||||
"type": "output_text",
|
||||
"text": "Assistant response",
|
||||
"annotations": []
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
The handler is automatically discovered and registered for `CallTypes.responses` and `CallTypes.aresponses`.
|
||||
|
||||
### Example
|
||||
|
||||
```python
|
||||
from litellm.llms import get_guardrail_translation_mapping
|
||||
from litellm.types.utils import CallTypes
|
||||
|
||||
# Get the handler
|
||||
handler_class = get_guardrail_translation_mapping(CallTypes.responses)
|
||||
handler = handler_class()
|
||||
|
||||
# Process input
|
||||
data = {"input": "User message", "model": "gpt-4"}
|
||||
processed_data = await handler.process_input_messages(data, guardrail_instance)
|
||||
|
||||
# Process output
|
||||
response = await litellm.aresponses(**processed_data)
|
||||
processed_response = await handler.process_output_response(response, guardrail_instance)
|
||||
```
|
||||
|
||||
## Key Methods
|
||||
|
||||
### `process_input_messages(data, guardrail_to_apply)`
|
||||
Processes input data by:
|
||||
1. Handling both string and list input formats
|
||||
2. Extracting text content from messages
|
||||
3. Applying guardrails to text content in parallel
|
||||
4. Mapping guardrail responses back to the original structure
|
||||
|
||||
### `process_output_response(response, guardrail_to_apply)`
|
||||
Processes output response by:
|
||||
1. Extracting text from output items' content
|
||||
2. Applying guardrails to all text content in parallel
|
||||
3. Replacing original text with guardrailed versions
|
||||
|
||||
## Extending the Handler
|
||||
|
||||
The handler can be customized by overriding these methods:
|
||||
|
||||
- `_extract_input_text_and_create_tasks()`: Customize input text extraction logic
|
||||
- `_apply_guardrail_responses_to_input()`: Customize how guardrail responses are applied to input
|
||||
- `_extract_output_text_and_create_tasks()`: Customize output text extraction logic
|
||||
- `_apply_guardrail_responses_to_output()`: Customize how guardrail responses are applied to output
|
||||
- `_has_text_content()`: Customize text content detection
|
||||
|
||||
## Testing
|
||||
|
||||
Comprehensive tests are available in `tests/llm_translation/test_openai_responses_guardrail_handler.py`:
|
||||
|
||||
```bash
|
||||
pytest tests/llm_translation/test_openai_responses_guardrail_handler.py -v
|
||||
```
|
||||
|
||||
## Implementation Details
|
||||
|
||||
- **Parallel Processing**: All text content is processed in parallel using `asyncio.gather()`
|
||||
- **Mapping Tracking**: Uses tuples to track the location of each text segment for accurate replacement
|
||||
- **Type Safety**: Handles both Pydantic objects and dict representations
|
||||
- **Multimodal Support**: Properly handles mixed content with text and other media types
|
||||
|
||||
@@ -0,0 +1,12 @@
|
||||
"""OpenAI Responses API handler for Unified Guardrails."""
|
||||
|
||||
from litellm.llms.openai.responses.guardrail_translation.handler import (
|
||||
OpenAIResponsesHandler,
|
||||
)
|
||||
from litellm.types.utils import CallTypes
|
||||
|
||||
guardrail_translation_mappings = {
|
||||
CallTypes.responses: OpenAIResponsesHandler,
|
||||
CallTypes.aresponses: OpenAIResponsesHandler,
|
||||
}
|
||||
__all__ = ["guardrail_translation_mappings"]
|
||||
@@ -0,0 +1,760 @@
|
||||
"""
|
||||
OpenAI Responses API Handler for Unified Guardrails
|
||||
|
||||
This module provides a class-based handler for OpenAI Responses API format.
|
||||
The class methods can be overridden for custom behavior.
|
||||
|
||||
Pattern Overview:
|
||||
-----------------
|
||||
1. Extract text content from input/output (both string and list formats)
|
||||
2. Create async tasks to apply guardrails to each text segment
|
||||
3. Track mappings to know where each response belongs
|
||||
4. Apply guardrail responses back to the original structure
|
||||
|
||||
Responses API Format:
|
||||
---------------------
|
||||
Input: Union[str, List[Dict]] where each dict has:
|
||||
- role: str
|
||||
- content: Union[str, List[Dict]] (can have text items)
|
||||
- type: str (e.g., "message")
|
||||
|
||||
Output: response.output is List[GenericResponseOutputItem] where each has:
|
||||
- type: str (e.g., "message")
|
||||
- id: str
|
||||
- status: str
|
||||
- role: str
|
||||
- content: List[OutputText] where OutputText has:
|
||||
- type: str (e.g., "output_text")
|
||||
- text: str
|
||||
"""
|
||||
|
||||
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union, cast
|
||||
|
||||
from openai.types.responses.response_function_tool_call import ResponseFunctionToolCall
|
||||
from pydantic import BaseModel
|
||||
|
||||
from litellm._logging import verbose_proxy_logger
|
||||
from litellm.completion_extras.litellm_responses_transformation.transformation import (
|
||||
LiteLLMResponsesTransformationHandler,
|
||||
OpenAiResponsesToChatCompletionStreamIterator,
|
||||
)
|
||||
from litellm.llms.base_llm.guardrail_translation.base_translation import BaseTranslation
|
||||
from litellm.responses.litellm_completion_transformation.transformation import (
|
||||
LiteLLMCompletionResponsesConfig,
|
||||
)
|
||||
from litellm.types.llms.openai import (
|
||||
ChatCompletionToolCallChunk,
|
||||
ChatCompletionToolParam,
|
||||
)
|
||||
from litellm.types.responses.main import (
|
||||
GenericResponseOutputItem,
|
||||
OutputFunctionToolCall,
|
||||
OutputText,
|
||||
)
|
||||
from litellm.types.utils import GenericGuardrailAPIInputs
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from litellm.integrations.custom_guardrail import CustomGuardrail
|
||||
from litellm.types.llms.openai import ResponseInputParam
|
||||
from litellm.types.utils import ResponsesAPIResponse
|
||||
|
||||
|
||||
class OpenAIResponsesHandler(BaseTranslation):
|
||||
"""
|
||||
Handler for processing OpenAI Responses API with guardrails.
|
||||
|
||||
This class provides methods to:
|
||||
1. Process input (pre-call hook)
|
||||
2. Process output response (post-call hook)
|
||||
|
||||
Methods can be overridden to customize behavior for different message formats.
|
||||
"""
|
||||
|
||||
async def process_input_messages(
|
||||
self,
|
||||
data: dict,
|
||||
guardrail_to_apply: "CustomGuardrail",
|
||||
litellm_logging_obj: Optional[Any] = None,
|
||||
) -> Any:
|
||||
"""
|
||||
Process input by applying guardrails to text content.
|
||||
|
||||
Handles both string input and list of message objects.
|
||||
"""
|
||||
input_data: Optional[Union[str, "ResponseInputParam"]] = data.get("input")
|
||||
tools_to_check: List[ChatCompletionToolParam] = []
|
||||
if input_data is None:
|
||||
return data
|
||||
|
||||
structured_messages = (
|
||||
LiteLLMCompletionResponsesConfig.transform_responses_api_input_to_messages(
|
||||
input=input_data,
|
||||
responses_api_request=data,
|
||||
)
|
||||
)
|
||||
|
||||
# Handle simple string input
|
||||
if isinstance(input_data, str):
|
||||
inputs = GenericGuardrailAPIInputs(texts=[input_data])
|
||||
original_tools: List[Dict[str, Any]] = []
|
||||
|
||||
# Extract and transform tools if present
|
||||
if "tools" in data and data["tools"]:
|
||||
original_tools = list(data["tools"])
|
||||
self._extract_and_transform_tools(data["tools"], tools_to_check)
|
||||
if tools_to_check:
|
||||
inputs["tools"] = tools_to_check
|
||||
if structured_messages:
|
||||
inputs["structured_messages"] = structured_messages # type: ignore
|
||||
# Include model information if available
|
||||
model = data.get("model")
|
||||
if model:
|
||||
inputs["model"] = model
|
||||
|
||||
guardrailed_inputs = await guardrail_to_apply.apply_guardrail(
|
||||
inputs=inputs,
|
||||
request_data=data,
|
||||
input_type="request",
|
||||
logging_obj=litellm_logging_obj,
|
||||
)
|
||||
guardrailed_texts = guardrailed_inputs.get("texts", [])
|
||||
data["input"] = guardrailed_texts[0] if guardrailed_texts else input_data
|
||||
self._apply_guardrailed_tools_to_data(
|
||||
data, original_tools, guardrailed_inputs.get("tools")
|
||||
)
|
||||
verbose_proxy_logger.debug("OpenAI Responses API: Processed string input")
|
||||
return data
|
||||
|
||||
# Handle list input (ResponseInputParam)
|
||||
if not isinstance(input_data, list):
|
||||
return data
|
||||
|
||||
texts_to_check: List[str] = []
|
||||
images_to_check: List[str] = []
|
||||
task_mappings: List[Tuple[int, Optional[int]]] = []
|
||||
original_tools_list: List[Dict[str, Any]] = list(data.get("tools") or [])
|
||||
|
||||
# Step 1: Extract all text content, images, and tools
|
||||
for msg_idx, message in enumerate(input_data):
|
||||
self._extract_input_text_and_images(
|
||||
message=message,
|
||||
msg_idx=msg_idx,
|
||||
texts_to_check=texts_to_check,
|
||||
images_to_check=images_to_check,
|
||||
task_mappings=task_mappings,
|
||||
)
|
||||
|
||||
# Extract and transform tools if present
|
||||
if "tools" in data and data["tools"]:
|
||||
self._extract_and_transform_tools(data["tools"], tools_to_check)
|
||||
|
||||
# Step 2: Apply guardrail to all texts in batch
|
||||
if texts_to_check:
|
||||
inputs = GenericGuardrailAPIInputs(texts=texts_to_check)
|
||||
if images_to_check:
|
||||
inputs["images"] = images_to_check
|
||||
if tools_to_check:
|
||||
inputs["tools"] = tools_to_check
|
||||
if structured_messages:
|
||||
inputs["structured_messages"] = structured_messages # type: ignore
|
||||
# Include model information if available
|
||||
model = data.get("model")
|
||||
if model:
|
||||
inputs["model"] = model
|
||||
guardrailed_inputs = await guardrail_to_apply.apply_guardrail(
|
||||
inputs=inputs,
|
||||
request_data=data,
|
||||
input_type="request",
|
||||
logging_obj=litellm_logging_obj,
|
||||
)
|
||||
|
||||
guardrailed_texts = guardrailed_inputs.get("texts", [])
|
||||
self._apply_guardrailed_tools_to_data(
|
||||
data,
|
||||
original_tools_list,
|
||||
guardrailed_inputs.get("tools"),
|
||||
)
|
||||
|
||||
# Step 3: Map guardrail responses back to original input structure
|
||||
await self._apply_guardrail_responses_to_input(
|
||||
messages=input_data,
|
||||
responses=guardrailed_texts,
|
||||
task_mappings=task_mappings,
|
||||
)
|
||||
|
||||
verbose_proxy_logger.debug(
|
||||
"OpenAI Responses API: Processed input messages: %s", input_data
|
||||
)
|
||||
|
||||
return data
|
||||
|
||||
def extract_request_tool_names(self, data: dict) -> List[str]:
|
||||
"""Extract tool names from Responses API request (tools[].name for function, tools[].server_label for mcp)."""
|
||||
names: List[str] = []
|
||||
for tool in data.get("tools") or []:
|
||||
if not isinstance(tool, dict):
|
||||
continue
|
||||
if tool.get("type") == "function" and tool.get("name"):
|
||||
names.append(str(tool["name"]))
|
||||
elif tool.get("type") == "mcp" and tool.get("server_label"):
|
||||
names.append(str(tool["server_label"]))
|
||||
return names
|
||||
|
||||
def _extract_and_transform_tools(
|
||||
self,
|
||||
tools: List[Dict[str, Any]],
|
||||
tools_to_check: List[ChatCompletionToolParam],
|
||||
) -> None:
|
||||
"""
|
||||
Extract and transform tools from Responses API format to Chat Completion format.
|
||||
|
||||
Uses the LiteLLM transformation function to convert Responses API tools
|
||||
to Chat Completion tools that can be passed to guardrails.
|
||||
"""
|
||||
if tools is not None and isinstance(tools, list):
|
||||
# Transform Responses API tools to Chat Completion tools
|
||||
(
|
||||
transformed_tools,
|
||||
_,
|
||||
) = LiteLLMCompletionResponsesConfig.transform_responses_api_tools_to_chat_completion_tools(
|
||||
tools # type: ignore
|
||||
)
|
||||
tools_to_check.extend(
|
||||
cast(List[ChatCompletionToolParam], transformed_tools)
|
||||
)
|
||||
|
||||
def _remap_tools_to_responses_api_format(
|
||||
self, guardrailed_tools: List[Any]
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Remap guardrail-returned tools (Chat Completion format) back to
|
||||
Responses API request tool format.
|
||||
"""
|
||||
return LiteLLMCompletionResponsesConfig.transform_chat_completion_tool_params_to_responses_api_tools(
|
||||
guardrailed_tools # type: ignore
|
||||
)
|
||||
|
||||
def _merge_tools_after_guardrail(
|
||||
self,
|
||||
original_tools: List[Dict[str, Any]],
|
||||
remapped: List[Dict[str, Any]],
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Merge remapped guardrailed tools with original tools that were not sent
|
||||
to the guardrail (e.g. web_search, web_search_preview), preserving order.
|
||||
"""
|
||||
if not original_tools:
|
||||
return remapped
|
||||
result: List[Dict[str, Any]] = []
|
||||
j = 0
|
||||
for tool in original_tools:
|
||||
if isinstance(tool, dict) and tool.get("type") in (
|
||||
"web_search",
|
||||
"web_search_preview",
|
||||
):
|
||||
result.append(tool)
|
||||
else:
|
||||
if j < len(remapped):
|
||||
result.append(remapped[j])
|
||||
j += 1
|
||||
return result
|
||||
|
||||
def _apply_guardrailed_tools_to_data(
|
||||
self,
|
||||
data: dict,
|
||||
original_tools: List[Dict[str, Any]],
|
||||
guardrailed_tools: Optional[List[Any]],
|
||||
) -> None:
|
||||
"""Remap guardrailed tools to Responses API format and merge with original, then set data['tools']."""
|
||||
if guardrailed_tools is not None:
|
||||
remapped = self._remap_tools_to_responses_api_format(guardrailed_tools)
|
||||
data["tools"] = self._merge_tools_after_guardrail(original_tools, remapped)
|
||||
|
||||
def _extract_input_text_and_images(
|
||||
self,
|
||||
message: Any, # Can be Dict[str, Any] or ResponseInputParam
|
||||
msg_idx: int,
|
||||
texts_to_check: List[str],
|
||||
images_to_check: List[str],
|
||||
task_mappings: List[Tuple[int, Optional[int]]],
|
||||
) -> None:
|
||||
"""
|
||||
Extract text content and images from an input message.
|
||||
|
||||
Override this method to customize text/image extraction logic.
|
||||
"""
|
||||
content = message.get("content", None)
|
||||
if content is None:
|
||||
return
|
||||
|
||||
if isinstance(content, str):
|
||||
# Simple string content
|
||||
texts_to_check.append(content)
|
||||
task_mappings.append((msg_idx, None))
|
||||
|
||||
elif isinstance(content, list):
|
||||
# List content (e.g., multimodal with text and images)
|
||||
for content_idx, content_item in enumerate(content):
|
||||
if isinstance(content_item, dict):
|
||||
# Extract text
|
||||
text_str = content_item.get("text", None)
|
||||
if text_str is not None:
|
||||
texts_to_check.append(text_str)
|
||||
task_mappings.append((msg_idx, int(content_idx)))
|
||||
|
||||
# Extract images
|
||||
if content_item.get("type") == "image_url":
|
||||
image_url = content_item.get("image_url", {})
|
||||
if isinstance(image_url, dict):
|
||||
url = image_url.get("url")
|
||||
if url:
|
||||
images_to_check.append(url)
|
||||
|
||||
async def _apply_guardrail_responses_to_input(
|
||||
self,
|
||||
messages: Any, # Can be List[Dict[str, Any]] or ResponseInputParam
|
||||
responses: List[str],
|
||||
task_mappings: List[Tuple[int, Optional[int]]],
|
||||
) -> None:
|
||||
"""
|
||||
Apply guardrail responses back to input messages.
|
||||
|
||||
Override this method to customize how responses are applied.
|
||||
"""
|
||||
for task_idx, guardrail_response in enumerate(responses):
|
||||
mapping = task_mappings[task_idx]
|
||||
msg_idx = cast(int, mapping[0])
|
||||
content_idx_optional = cast(Optional[int], mapping[1])
|
||||
|
||||
content = messages[msg_idx].get("content", None)
|
||||
if content is None:
|
||||
continue
|
||||
|
||||
if isinstance(content, str) and content_idx_optional is None:
|
||||
# Replace string content with guardrail response
|
||||
messages[msg_idx]["content"] = guardrail_response
|
||||
|
||||
elif isinstance(content, list) and content_idx_optional is not None:
|
||||
# Replace specific text item in list content
|
||||
if isinstance(messages[msg_idx]["content"][content_idx_optional], dict):
|
||||
messages[msg_idx]["content"][content_idx_optional][
|
||||
"text"
|
||||
] = guardrail_response
|
||||
|
||||
async def process_output_response(
|
||||
self,
|
||||
response: "ResponsesAPIResponse",
|
||||
guardrail_to_apply: "CustomGuardrail",
|
||||
litellm_logging_obj: Optional[Any] = None,
|
||||
user_api_key_dict: Optional[Any] = None,
|
||||
) -> Any:
|
||||
"""
|
||||
Process output response by applying guardrails to text content and tool calls.
|
||||
|
||||
Args:
|
||||
response: LiteLLM ResponsesAPIResponse object
|
||||
guardrail_to_apply: The guardrail instance to apply
|
||||
litellm_logging_obj: Optional logging object
|
||||
user_api_key_dict: User API key metadata to pass to guardrails
|
||||
|
||||
Returns:
|
||||
Modified response with guardrail applied to content
|
||||
|
||||
Response Format Support:
|
||||
- response.output is a list of output items
|
||||
- Each output item can be:
|
||||
* GenericResponseOutputItem with a content list of OutputText objects
|
||||
* ResponseFunctionToolCall with tool call data
|
||||
- Each OutputText object has a text field
|
||||
"""
|
||||
|
||||
texts_to_check: List[str] = []
|
||||
images_to_check: List[str] = []
|
||||
tool_calls_to_check: List[ChatCompletionToolCallChunk] = []
|
||||
task_mappings: List[Tuple[int, int]] = []
|
||||
# Track (output_item_index, content_index) for each text
|
||||
|
||||
# Handle both dict and Pydantic object responses
|
||||
if isinstance(response, dict):
|
||||
response_output = response.get("output", [])
|
||||
elif hasattr(response, "output"):
|
||||
response_output = response.output or []
|
||||
else:
|
||||
verbose_proxy_logger.debug(
|
||||
"OpenAI Responses API: No output found in response"
|
||||
)
|
||||
return response
|
||||
|
||||
if not response_output:
|
||||
verbose_proxy_logger.debug("OpenAI Responses API: Empty output in response")
|
||||
return response
|
||||
|
||||
# Step 1: Extract all text content and tool calls from response output
|
||||
for output_idx, output_item in enumerate(response_output):
|
||||
self._extract_output_text_and_images(
|
||||
output_item=output_item,
|
||||
output_idx=output_idx,
|
||||
texts_to_check=texts_to_check,
|
||||
images_to_check=images_to_check,
|
||||
task_mappings=task_mappings,
|
||||
tool_calls_to_check=tool_calls_to_check,
|
||||
)
|
||||
|
||||
# Step 2: Apply guardrail to all texts in batch
|
||||
if texts_to_check or tool_calls_to_check:
|
||||
# Create a request_data dict with response info and user API key metadata
|
||||
request_data: dict = {"response": response}
|
||||
|
||||
# Add user API key metadata with prefixed keys
|
||||
user_metadata = self.transform_user_api_key_dict_to_metadata(
|
||||
user_api_key_dict
|
||||
)
|
||||
if user_metadata:
|
||||
request_data["litellm_metadata"] = user_metadata
|
||||
|
||||
inputs = GenericGuardrailAPIInputs(texts=texts_to_check)
|
||||
if images_to_check:
|
||||
inputs["images"] = images_to_check
|
||||
if tool_calls_to_check:
|
||||
inputs["tool_calls"] = tool_calls_to_check
|
||||
# Include model information from the response if available
|
||||
response_model = None
|
||||
if isinstance(response, dict):
|
||||
response_model = response.get("model")
|
||||
elif hasattr(response, "model"):
|
||||
response_model = getattr(response, "model", None)
|
||||
if response_model:
|
||||
inputs["model"] = response_model
|
||||
|
||||
guardrailed_inputs = await guardrail_to_apply.apply_guardrail(
|
||||
inputs=inputs,
|
||||
request_data=request_data,
|
||||
input_type="response",
|
||||
logging_obj=litellm_logging_obj,
|
||||
)
|
||||
|
||||
guardrailed_texts = guardrailed_inputs.get("texts", [])
|
||||
|
||||
# Step 3: Map guardrail responses back to original response structure
|
||||
await self._apply_guardrail_responses_to_output(
|
||||
response=response,
|
||||
responses=guardrailed_texts,
|
||||
task_mappings=task_mappings,
|
||||
)
|
||||
|
||||
verbose_proxy_logger.debug(
|
||||
"OpenAI Responses API: Processed output response: %s", response
|
||||
)
|
||||
|
||||
return response
|
||||
|
||||
async def process_output_streaming_response(
|
||||
self,
|
||||
responses_so_far: List[Any],
|
||||
guardrail_to_apply: "CustomGuardrail",
|
||||
litellm_logging_obj: Optional[Any] = None,
|
||||
user_api_key_dict: Optional[Any] = None,
|
||||
) -> List[Any]:
|
||||
"""
|
||||
Process output streaming response by applying guardrails to text content.
|
||||
"""
|
||||
|
||||
final_chunk = responses_so_far[-1]
|
||||
|
||||
if final_chunk.get("type") == "response.output_item.done":
|
||||
# convert openai response to model response
|
||||
model_response_stream = OpenAiResponsesToChatCompletionStreamIterator.translate_responses_chunk_to_openai_stream(
|
||||
final_chunk
|
||||
)
|
||||
|
||||
tool_calls = model_response_stream.choices[0].delta.tool_calls
|
||||
if tool_calls:
|
||||
inputs = GenericGuardrailAPIInputs()
|
||||
inputs["tool_calls"] = cast(
|
||||
List[ChatCompletionToolCallChunk], tool_calls
|
||||
)
|
||||
# Include model information if available
|
||||
if (
|
||||
hasattr(model_response_stream, "model")
|
||||
and model_response_stream.model
|
||||
):
|
||||
inputs["model"] = model_response_stream.model
|
||||
_guardrailed_inputs = await guardrail_to_apply.apply_guardrail(
|
||||
inputs=inputs,
|
||||
request_data={},
|
||||
input_type="response",
|
||||
logging_obj=litellm_logging_obj,
|
||||
)
|
||||
return responses_so_far
|
||||
elif final_chunk.get("type") == "response.completed":
|
||||
# convert openai response to model response
|
||||
outputs = final_chunk.get("response", {}).get("output", [])
|
||||
|
||||
model_response_choices = LiteLLMResponsesTransformationHandler._convert_response_output_to_choices(
|
||||
output_items=outputs,
|
||||
handle_raw_dict_callback=None,
|
||||
)
|
||||
|
||||
if model_response_choices:
|
||||
tool_calls = model_response_choices[0].message.tool_calls
|
||||
text = model_response_choices[0].message.content
|
||||
guardrail_inputs = GenericGuardrailAPIInputs()
|
||||
if text:
|
||||
guardrail_inputs["texts"] = [text]
|
||||
if tool_calls:
|
||||
guardrail_inputs["tool_calls"] = cast(
|
||||
List[ChatCompletionToolCallChunk], tool_calls
|
||||
)
|
||||
# Include model information from the response if available
|
||||
response_model = final_chunk.get("response", {}).get("model")
|
||||
if response_model:
|
||||
guardrail_inputs["model"] = response_model
|
||||
if tool_calls or text:
|
||||
_guardrailed_inputs = await guardrail_to_apply.apply_guardrail(
|
||||
inputs=guardrail_inputs,
|
||||
request_data={},
|
||||
input_type="response",
|
||||
logging_obj=litellm_logging_obj,
|
||||
)
|
||||
return responses_so_far
|
||||
else:
|
||||
verbose_proxy_logger.debug(
|
||||
"Skipping output guardrail - model response has no choices"
|
||||
)
|
||||
# model_response_stream = OpenAiResponsesToChatCompletionStreamIterator.translate_responses_chunk_to_openai_stream(final_chunk)
|
||||
# tool_calls = model_response_stream.choices[0].tool_calls
|
||||
# convert openai response to model response
|
||||
string_so_far = self.get_streaming_string_so_far(responses_so_far)
|
||||
inputs = GenericGuardrailAPIInputs(texts=[string_so_far])
|
||||
# Try to get model from the final chunk if available
|
||||
if isinstance(final_chunk, dict):
|
||||
response_model = (
|
||||
final_chunk.get("response", {}).get("model")
|
||||
if isinstance(final_chunk.get("response"), dict)
|
||||
else None
|
||||
)
|
||||
if response_model:
|
||||
inputs["model"] = response_model
|
||||
_guardrailed_inputs = await guardrail_to_apply.apply_guardrail(
|
||||
inputs=inputs,
|
||||
request_data={},
|
||||
input_type="response",
|
||||
logging_obj=litellm_logging_obj,
|
||||
)
|
||||
return responses_so_far
|
||||
|
||||
def _check_streaming_has_ended(self, responses_so_far: List[Any]) -> bool:
|
||||
"""
|
||||
Check if the streaming has ended.
|
||||
"""
|
||||
return all(
|
||||
response.choices[0].finish_reason is not None
|
||||
for response in responses_so_far
|
||||
)
|
||||
|
||||
def get_streaming_string_so_far(self, responses_so_far: List[Any]) -> str:
|
||||
"""
|
||||
Get the string so far from the responses so far.
|
||||
"""
|
||||
return "".join([response.get("text", "") for response in responses_so_far])
|
||||
|
||||
def _has_text_content(self, response: "ResponsesAPIResponse") -> bool:
|
||||
"""
|
||||
Check if response has any text content to process.
|
||||
|
||||
Override this method to customize text content detection.
|
||||
"""
|
||||
if not hasattr(response, "output") or response.output is None:
|
||||
return False
|
||||
|
||||
for output_item in response.output:
|
||||
if isinstance(output_item, BaseModel):
|
||||
try:
|
||||
generic_response_output_item = (
|
||||
GenericResponseOutputItem.model_validate(
|
||||
output_item.model_dump()
|
||||
)
|
||||
)
|
||||
if generic_response_output_item.content:
|
||||
output_item = generic_response_output_item
|
||||
except Exception:
|
||||
continue
|
||||
if isinstance(output_item, (GenericResponseOutputItem, dict)):
|
||||
content = (
|
||||
output_item.content
|
||||
if isinstance(output_item, GenericResponseOutputItem)
|
||||
else output_item.get("content", [])
|
||||
)
|
||||
if content:
|
||||
for content_item in content:
|
||||
# Check if it's an OutputText with text
|
||||
if isinstance(content_item, OutputText):
|
||||
if content_item.text:
|
||||
return True
|
||||
elif isinstance(content_item, dict):
|
||||
if content_item.get("text"):
|
||||
return True
|
||||
return False
|
||||
|
||||
def _extract_output_text_and_images(
|
||||
self,
|
||||
output_item: Any,
|
||||
output_idx: int,
|
||||
texts_to_check: List[str],
|
||||
images_to_check: List[str],
|
||||
task_mappings: List[Tuple[int, int]],
|
||||
tool_calls_to_check: Optional[List[ChatCompletionToolCallChunk]] = None,
|
||||
) -> None:
|
||||
"""
|
||||
Extract text content, images, and tool calls from a response output item.
|
||||
|
||||
Override this method to customize text/image/tool extraction logic.
|
||||
"""
|
||||
|
||||
# Check if this is a tool call (OutputFunctionToolCall)
|
||||
if isinstance(output_item, OutputFunctionToolCall):
|
||||
if tool_calls_to_check is not None:
|
||||
tool_call_dict = LiteLLMCompletionResponsesConfig.convert_response_function_tool_call_to_chat_completion_tool_call(
|
||||
tool_call_item=output_item,
|
||||
index=output_idx,
|
||||
)
|
||||
tool_calls_to_check.append(
|
||||
cast(ChatCompletionToolCallChunk, tool_call_dict)
|
||||
)
|
||||
return
|
||||
elif (
|
||||
isinstance(output_item, BaseModel)
|
||||
and hasattr(output_item, "type")
|
||||
and getattr(output_item, "type") == "function_call"
|
||||
):
|
||||
if tool_calls_to_check is not None:
|
||||
tool_call_dict = LiteLLMCompletionResponsesConfig.convert_response_function_tool_call_to_chat_completion_tool_call(
|
||||
tool_call_item=output_item,
|
||||
index=output_idx,
|
||||
)
|
||||
tool_calls_to_check.append(
|
||||
cast(ChatCompletionToolCallChunk, tool_call_dict)
|
||||
)
|
||||
return
|
||||
elif (
|
||||
isinstance(output_item, dict) and output_item.get("type") == "function_call"
|
||||
):
|
||||
# Handle dict representation of tool call
|
||||
if tool_calls_to_check is not None:
|
||||
# Convert dict to ResponseFunctionToolCall for processing
|
||||
try:
|
||||
tool_call_obj = ResponseFunctionToolCall(**output_item)
|
||||
tool_call_dict = LiteLLMCompletionResponsesConfig.convert_response_function_tool_call_to_chat_completion_tool_call(
|
||||
tool_call_item=tool_call_obj,
|
||||
index=output_idx,
|
||||
)
|
||||
tool_calls_to_check.append(
|
||||
cast(ChatCompletionToolCallChunk, tool_call_dict)
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
return
|
||||
|
||||
# Handle both GenericResponseOutputItem and dict
|
||||
content: Optional[Union[List[OutputText], List[dict]]] = None
|
||||
if isinstance(output_item, BaseModel):
|
||||
try:
|
||||
output_item_dump = output_item.model_dump()
|
||||
generic_response_output_item = GenericResponseOutputItem.model_validate(
|
||||
output_item_dump
|
||||
)
|
||||
if generic_response_output_item.content:
|
||||
content = generic_response_output_item.content
|
||||
except Exception:
|
||||
# Try to extract content directly from output_item if validation fails
|
||||
if hasattr(output_item, "content") and output_item.content: # type: ignore
|
||||
content = output_item.content # type: ignore
|
||||
else:
|
||||
return
|
||||
elif isinstance(output_item, dict):
|
||||
content = output_item.get("content", [])
|
||||
else:
|
||||
return
|
||||
|
||||
if not content:
|
||||
return
|
||||
|
||||
verbose_proxy_logger.debug(
|
||||
"OpenAI Responses API: Processing output item: %s", output_item
|
||||
)
|
||||
|
||||
# Iterate through content items (list of OutputText objects)
|
||||
for content_idx, content_item in enumerate(content):
|
||||
# Handle both OutputText objects and dicts
|
||||
if isinstance(content_item, OutputText):
|
||||
text_content = content_item.text
|
||||
elif isinstance(content_item, dict):
|
||||
text_content = content_item.get("text")
|
||||
else:
|
||||
continue
|
||||
|
||||
if text_content:
|
||||
texts_to_check.append(text_content)
|
||||
task_mappings.append((output_idx, int(content_idx)))
|
||||
|
||||
async def _apply_guardrail_responses_to_output(
|
||||
self,
|
||||
response: "ResponsesAPIResponse",
|
||||
responses: List[str],
|
||||
task_mappings: List[Tuple[int, int]],
|
||||
) -> None:
|
||||
"""
|
||||
Apply guardrail responses back to output response.
|
||||
|
||||
Override this method to customize how responses are applied.
|
||||
"""
|
||||
# Handle both dict and Pydantic object responses
|
||||
if isinstance(response, dict):
|
||||
response_output = response.get("output", [])
|
||||
elif hasattr(response, "output"):
|
||||
response_output = response.output or []
|
||||
else:
|
||||
return
|
||||
|
||||
for task_idx, guardrail_response in enumerate(responses):
|
||||
mapping = task_mappings[task_idx]
|
||||
output_idx = cast(int, mapping[0])
|
||||
content_idx = cast(int, mapping[1])
|
||||
|
||||
if output_idx >= len(response_output):
|
||||
continue
|
||||
|
||||
output_item = response_output[output_idx]
|
||||
|
||||
# Handle both GenericResponseOutputItem, BaseModel, and dict
|
||||
if isinstance(output_item, GenericResponseOutputItem):
|
||||
if output_item.content and content_idx < len(output_item.content):
|
||||
content_item = output_item.content[content_idx]
|
||||
if isinstance(content_item, OutputText):
|
||||
content_item.text = guardrail_response
|
||||
elif isinstance(content_item, dict):
|
||||
content_item["text"] = guardrail_response
|
||||
elif isinstance(output_item, BaseModel):
|
||||
# Handle other Pydantic models by converting to GenericResponseOutputItem
|
||||
try:
|
||||
generic_item = GenericResponseOutputItem.model_validate(
|
||||
output_item.model_dump()
|
||||
)
|
||||
if generic_item.content and content_idx < len(generic_item.content):
|
||||
content_item = generic_item.content[content_idx]
|
||||
if isinstance(content_item, OutputText):
|
||||
content_item.text = guardrail_response
|
||||
# Update the original response output
|
||||
if hasattr(output_item, "content") and output_item.content: # type: ignore
|
||||
original_content = output_item.content[content_idx] # type: ignore
|
||||
if hasattr(original_content, "text"):
|
||||
original_content.text = guardrail_response # type: ignore
|
||||
except Exception:
|
||||
pass
|
||||
elif isinstance(output_item, dict):
|
||||
content = output_item.get("content", [])
|
||||
if content and content_idx < len(content):
|
||||
if isinstance(content[content_idx], dict):
|
||||
content[content_idx]["text"] = guardrail_response
|
||||
elif hasattr(content[content_idx], "text"):
|
||||
content[content_idx].text = guardrail_response
|
||||
@@ -0,0 +1,580 @@
|
||||
from typing import TYPE_CHECKING, Any, Dict, Optional, Union, cast, get_type_hints
|
||||
|
||||
import httpx
|
||||
from openai.types.responses import ResponseReasoningItem
|
||||
from pydantic import BaseModel, ValidationError
|
||||
|
||||
import litellm
|
||||
from litellm._logging import verbose_logger
|
||||
from litellm.litellm_core_utils.core_helpers import process_response_headers
|
||||
from litellm.litellm_core_utils.llm_response_utils.convert_dict_to_response import (
|
||||
_safe_convert_created_field,
|
||||
)
|
||||
from litellm.llms.base_llm.responses.transformation import BaseResponsesAPIConfig
|
||||
from litellm.secret_managers.main import get_secret_str
|
||||
from litellm.types.llms.openai import *
|
||||
from litellm.types.responses.main import *
|
||||
from litellm.types.router import GenericLiteLLMParams
|
||||
from litellm.types.utils import LlmProviders
|
||||
|
||||
from ..common_utils import OpenAIError
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from litellm.litellm_core_utils.litellm_logging import Logging as _LiteLLMLoggingObj
|
||||
|
||||
LiteLLMLoggingObj = _LiteLLMLoggingObj
|
||||
else:
|
||||
LiteLLMLoggingObj = Any
|
||||
|
||||
|
||||
class OpenAIResponsesAPIConfig(BaseResponsesAPIConfig):
|
||||
@property
|
||||
def custom_llm_provider(self) -> LlmProviders:
|
||||
return LlmProviders.OPENAI
|
||||
|
||||
def get_supported_openai_params(self, model: str) -> list:
|
||||
"""
|
||||
All OpenAI Responses API params are supported
|
||||
"""
|
||||
supported_params = get_type_hints(ResponsesAPIRequestParams).keys()
|
||||
return list(
|
||||
set(
|
||||
[
|
||||
"input",
|
||||
"model",
|
||||
"extra_headers",
|
||||
"extra_query",
|
||||
"extra_body",
|
||||
"timeout",
|
||||
]
|
||||
+ list(supported_params)
|
||||
)
|
||||
)
|
||||
|
||||
def map_openai_params(
|
||||
self,
|
||||
response_api_optional_params: ResponsesAPIOptionalRequestParams,
|
||||
model: str,
|
||||
drop_params: bool,
|
||||
) -> Dict:
|
||||
"""No mapping applied since inputs are in OpenAI spec already"""
|
||||
return dict(response_api_optional_params)
|
||||
|
||||
def transform_responses_api_request(
|
||||
self,
|
||||
model: str,
|
||||
input: Union[str, ResponseInputParam],
|
||||
response_api_optional_request_params: Dict,
|
||||
litellm_params: GenericLiteLLMParams,
|
||||
headers: dict,
|
||||
) -> Dict:
|
||||
"""No transform applied since inputs are in OpenAI spec already"""
|
||||
|
||||
input = self._validate_input_param(input)
|
||||
final_request_params = dict(
|
||||
ResponsesAPIRequestParams(
|
||||
model=model, input=input, **response_api_optional_request_params
|
||||
)
|
||||
)
|
||||
|
||||
return final_request_params
|
||||
|
||||
def _validate_input_param(
|
||||
self, input: Union[str, ResponseInputParam]
|
||||
) -> Union[str, ResponseInputParam]:
|
||||
"""
|
||||
Ensure all input fields if pydantic are converted to dict
|
||||
|
||||
OpenAI API Fails when we try to JSON dumps specific input pydantic fields.
|
||||
This function ensures all input fields are converted to dict.
|
||||
"""
|
||||
if isinstance(input, list):
|
||||
validated_input = []
|
||||
for item in input:
|
||||
# if it's pydantic, convert to dict
|
||||
if isinstance(item, BaseModel):
|
||||
validated_input.append(item.model_dump(exclude_none=True))
|
||||
elif isinstance(item, dict):
|
||||
# Handle reasoning items specifically to filter out status=None
|
||||
if item.get("type") == "reasoning":
|
||||
verbose_logger.debug(f"Handling reasoning item: {item}")
|
||||
# Type assertion since we know it's a dict at this point
|
||||
dict_item = cast(Dict[str, Any], item)
|
||||
filtered_item = self._handle_reasoning_item(dict_item)
|
||||
else:
|
||||
# For other dict items, just pass through
|
||||
filtered_item = cast(Dict[str, Any], item)
|
||||
validated_input.append(filtered_item)
|
||||
else:
|
||||
validated_input.append(item)
|
||||
return validated_input # type: ignore
|
||||
# Input is expected to be either str or List, no single BaseModel expected
|
||||
return input
|
||||
|
||||
def _handle_reasoning_item(self, item: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
Handle reasoning items specifically to filter out status=None using OpenAI's model.
|
||||
Issue: https://github.com/BerriAI/litellm/issues/13484
|
||||
OpenAI API does not accept ReasoningItem(status=None), so we need to:
|
||||
1. Check if the item is a reasoning type
|
||||
2. Create a ResponseReasoningItem object with the item data
|
||||
3. Convert it back to dict with exclude_none=True to filter None values
|
||||
"""
|
||||
if item.get("type") == "reasoning":
|
||||
try:
|
||||
# Ensure required fields are present for ResponseReasoningItem
|
||||
item_data = dict(item)
|
||||
if "summary" not in item_data:
|
||||
item_data["summary"] = (
|
||||
item_data.get("reasoning_content", "")[:100] + "..."
|
||||
if len(item_data.get("reasoning_content", "")) > 100
|
||||
else item_data.get("reasoning_content", "")
|
||||
)
|
||||
|
||||
# Create ResponseReasoningItem object from the item data
|
||||
reasoning_item = ResponseReasoningItem(**item_data)
|
||||
|
||||
# Convert back to dict with exclude_none=True to exclude None fields
|
||||
dict_reasoning_item = reasoning_item.model_dump(exclude_none=True)
|
||||
|
||||
return dict_reasoning_item
|
||||
except Exception as e:
|
||||
verbose_logger.debug(
|
||||
f"Failed to create ResponseReasoningItem, falling back to manual filtering: {e}"
|
||||
)
|
||||
# Fallback: manually filter out known None fields
|
||||
filtered_item = {
|
||||
k: v
|
||||
for k, v in item.items()
|
||||
if v is not None
|
||||
or k not in {"status", "content", "encrypted_content"}
|
||||
}
|
||||
return filtered_item
|
||||
return item
|
||||
|
||||
def transform_response_api_response(
|
||||
self,
|
||||
model: str,
|
||||
raw_response: httpx.Response,
|
||||
logging_obj: LiteLLMLoggingObj,
|
||||
) -> ResponsesAPIResponse:
|
||||
"""No transform applied since outputs are in OpenAI spec already"""
|
||||
try:
|
||||
logging_obj.post_call(
|
||||
original_response=raw_response.text,
|
||||
additional_args={"complete_input_dict": {}},
|
||||
)
|
||||
raw_response_json = raw_response.json()
|
||||
raw_response_json["created_at"] = _safe_convert_created_field(
|
||||
raw_response_json["created_at"]
|
||||
)
|
||||
except Exception:
|
||||
raise OpenAIError(
|
||||
message=raw_response.text, status_code=raw_response.status_code
|
||||
)
|
||||
raw_response_headers = dict(raw_response.headers)
|
||||
processed_headers = process_response_headers(raw_response_headers)
|
||||
try:
|
||||
response = ResponsesAPIResponse(**raw_response_json)
|
||||
except Exception:
|
||||
verbose_logger.debug(
|
||||
f"Error constructing ResponsesAPIResponse: {raw_response_json}, using model_construct"
|
||||
)
|
||||
response = ResponsesAPIResponse.model_construct(**raw_response_json)
|
||||
|
||||
# Store processed headers in additional_headers so they get returned to the client
|
||||
response._hidden_params["additional_headers"] = processed_headers
|
||||
response._hidden_params["headers"] = raw_response_headers
|
||||
return response
|
||||
|
||||
def validate_environment(
|
||||
self, headers: dict, model: str, litellm_params: Optional[GenericLiteLLMParams]
|
||||
) -> dict:
|
||||
litellm_params = litellm_params or GenericLiteLLMParams()
|
||||
api_key = (
|
||||
litellm_params.api_key
|
||||
or litellm.api_key
|
||||
or litellm.openai_key
|
||||
or get_secret_str("OPENAI_API_KEY")
|
||||
)
|
||||
headers.update(
|
||||
{
|
||||
"Authorization": f"Bearer {api_key}",
|
||||
}
|
||||
)
|
||||
return headers
|
||||
|
||||
def get_complete_url(
|
||||
self,
|
||||
api_base: Optional[str],
|
||||
litellm_params: dict,
|
||||
) -> str:
|
||||
"""
|
||||
Get the endpoint for OpenAI responses API
|
||||
"""
|
||||
api_base = (
|
||||
api_base
|
||||
or litellm.api_base
|
||||
or get_secret_str("OPENAI_BASE_URL")
|
||||
or get_secret_str("OPENAI_API_BASE")
|
||||
or "https://api.openai.com/v1"
|
||||
)
|
||||
|
||||
# Remove trailing slashes
|
||||
api_base = api_base.rstrip("/")
|
||||
|
||||
return f"{api_base}/responses"
|
||||
|
||||
def transform_streaming_response(
|
||||
self,
|
||||
model: str,
|
||||
parsed_chunk: dict,
|
||||
logging_obj: LiteLLMLoggingObj,
|
||||
) -> ResponsesAPIStreamingResponse:
|
||||
"""
|
||||
Transform a parsed streaming response chunk into a ResponsesAPIStreamingResponse
|
||||
"""
|
||||
# Convert the dictionary to a properly typed ResponsesAPIStreamingResponse
|
||||
verbose_logger.debug("Raw OpenAI Chunk=%s", parsed_chunk)
|
||||
event_type = str(parsed_chunk.get("type"))
|
||||
event_pydantic_model = OpenAIResponsesAPIConfig.get_event_model_class(
|
||||
event_type=event_type
|
||||
)
|
||||
# Some OpenAI-compatible providers send error.code: null; coalesce so validation succeeds.
|
||||
try:
|
||||
error_obj = parsed_chunk.get("error")
|
||||
if isinstance(error_obj, dict) and error_obj.get("code") is None:
|
||||
parsed_chunk = dict(parsed_chunk)
|
||||
parsed_chunk["error"] = dict(error_obj)
|
||||
parsed_chunk["error"]["code"] = "unknown_error"
|
||||
except Exception:
|
||||
verbose_logger.debug("Failed to coalesce error.code in parsed_chunk")
|
||||
|
||||
try:
|
||||
return event_pydantic_model(**parsed_chunk)
|
||||
except ValidationError:
|
||||
verbose_logger.debug(
|
||||
"Pydantic validation failed for %s with chunk %s, "
|
||||
"falling back to model_construct",
|
||||
event_pydantic_model.__name__,
|
||||
parsed_chunk,
|
||||
)
|
||||
return event_pydantic_model.model_construct(**parsed_chunk)
|
||||
|
||||
@staticmethod
|
||||
def get_event_model_class(event_type: str) -> Any:
|
||||
"""
|
||||
Returns the appropriate event model class based on the event type.
|
||||
|
||||
Args:
|
||||
event_type (str): The type of event from the response chunk
|
||||
|
||||
Returns:
|
||||
Any: The corresponding event model class
|
||||
|
||||
Raises:
|
||||
ValueError: If the event type is unknown
|
||||
"""
|
||||
event_models = {
|
||||
ResponsesAPIStreamEvents.RESPONSE_CREATED: ResponseCreatedEvent,
|
||||
ResponsesAPIStreamEvents.RESPONSE_IN_PROGRESS: ResponseInProgressEvent,
|
||||
ResponsesAPIStreamEvents.RESPONSE_COMPLETED: ResponseCompletedEvent,
|
||||
ResponsesAPIStreamEvents.RESPONSE_FAILED: ResponseFailedEvent,
|
||||
ResponsesAPIStreamEvents.RESPONSE_INCOMPLETE: ResponseIncompleteEvent,
|
||||
ResponsesAPIStreamEvents.OUTPUT_ITEM_ADDED: OutputItemAddedEvent,
|
||||
ResponsesAPIStreamEvents.OUTPUT_ITEM_DONE: OutputItemDoneEvent,
|
||||
ResponsesAPIStreamEvents.CONTENT_PART_ADDED: ContentPartAddedEvent,
|
||||
ResponsesAPIStreamEvents.CONTENT_PART_DONE: ContentPartDoneEvent,
|
||||
ResponsesAPIStreamEvents.OUTPUT_TEXT_DELTA: OutputTextDeltaEvent,
|
||||
ResponsesAPIStreamEvents.OUTPUT_TEXT_ANNOTATION_ADDED: OutputTextAnnotationAddedEvent,
|
||||
ResponsesAPIStreamEvents.OUTPUT_TEXT_DONE: OutputTextDoneEvent,
|
||||
ResponsesAPIStreamEvents.REFUSAL_DELTA: RefusalDeltaEvent,
|
||||
ResponsesAPIStreamEvents.REFUSAL_DONE: RefusalDoneEvent,
|
||||
ResponsesAPIStreamEvents.FUNCTION_CALL_ARGUMENTS_DELTA: FunctionCallArgumentsDeltaEvent,
|
||||
ResponsesAPIStreamEvents.FUNCTION_CALL_ARGUMENTS_DONE: FunctionCallArgumentsDoneEvent,
|
||||
ResponsesAPIStreamEvents.FILE_SEARCH_CALL_IN_PROGRESS: FileSearchCallInProgressEvent,
|
||||
ResponsesAPIStreamEvents.FILE_SEARCH_CALL_SEARCHING: FileSearchCallSearchingEvent,
|
||||
ResponsesAPIStreamEvents.FILE_SEARCH_CALL_COMPLETED: FileSearchCallCompletedEvent,
|
||||
ResponsesAPIStreamEvents.WEB_SEARCH_CALL_IN_PROGRESS: WebSearchCallInProgressEvent,
|
||||
ResponsesAPIStreamEvents.WEB_SEARCH_CALL_SEARCHING: WebSearchCallSearchingEvent,
|
||||
ResponsesAPIStreamEvents.WEB_SEARCH_CALL_COMPLETED: WebSearchCallCompletedEvent,
|
||||
ResponsesAPIStreamEvents.MCP_LIST_TOOLS_IN_PROGRESS: MCPListToolsInProgressEvent,
|
||||
ResponsesAPIStreamEvents.MCP_LIST_TOOLS_COMPLETED: MCPListToolsCompletedEvent,
|
||||
ResponsesAPIStreamEvents.MCP_LIST_TOOLS_FAILED: MCPListToolsFailedEvent,
|
||||
ResponsesAPIStreamEvents.MCP_CALL_IN_PROGRESS: MCPCallInProgressEvent,
|
||||
ResponsesAPIStreamEvents.MCP_CALL_ARGUMENTS_DELTA: MCPCallArgumentsDeltaEvent,
|
||||
ResponsesAPIStreamEvents.MCP_CALL_ARGUMENTS_DONE: MCPCallArgumentsDoneEvent,
|
||||
ResponsesAPIStreamEvents.MCP_CALL_COMPLETED: MCPCallCompletedEvent,
|
||||
ResponsesAPIStreamEvents.MCP_CALL_FAILED: MCPCallFailedEvent,
|
||||
ResponsesAPIStreamEvents.IMAGE_GENERATION_PARTIAL_IMAGE: ImageGenerationPartialImageEvent,
|
||||
ResponsesAPIStreamEvents.ERROR: ErrorEvent,
|
||||
# Shell tool events: passthrough as GenericEvent so payload is preserved
|
||||
ResponsesAPIStreamEvents.SHELL_CALL_IN_PROGRESS: GenericEvent,
|
||||
ResponsesAPIStreamEvents.SHELL_CALL_COMPLETED: GenericEvent,
|
||||
ResponsesAPIStreamEvents.SHELL_CALL_OUTPUT: GenericEvent,
|
||||
}
|
||||
|
||||
model_class = event_models.get(cast(ResponsesAPIStreamEvents, event_type))
|
||||
if not model_class:
|
||||
return GenericEvent
|
||||
|
||||
return model_class
|
||||
|
||||
def should_fake_stream(
|
||||
self,
|
||||
model: Optional[str],
|
||||
stream: Optional[bool],
|
||||
custom_llm_provider: Optional[str] = None,
|
||||
) -> bool:
|
||||
if stream is not True:
|
||||
return False
|
||||
if model is not None:
|
||||
try:
|
||||
if (
|
||||
litellm.utils.supports_native_streaming(
|
||||
model=model,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
)
|
||||
is False
|
||||
):
|
||||
return True
|
||||
except Exception as e:
|
||||
verbose_logger.debug(
|
||||
f"Error getting model info in OpenAIResponsesAPIConfig: {e}"
|
||||
)
|
||||
return False
|
||||
|
||||
def supports_native_websocket(self) -> bool:
|
||||
"""OpenAI supports native WebSocket for Responses API"""
|
||||
return True
|
||||
|
||||
#########################################################
|
||||
########## DELETE RESPONSE API TRANSFORMATION ##############
|
||||
#########################################################
|
||||
def transform_delete_response_api_request(
|
||||
self,
|
||||
response_id: str,
|
||||
api_base: str,
|
||||
litellm_params: GenericLiteLLMParams,
|
||||
headers: dict,
|
||||
) -> Tuple[str, Dict]:
|
||||
"""
|
||||
Transform the delete response API request into a URL and data
|
||||
|
||||
OpenAI API expects the following request
|
||||
- DELETE /v1/responses/{response_id}
|
||||
"""
|
||||
url = f"{api_base}/{response_id}"
|
||||
data: Dict = {}
|
||||
return url, data
|
||||
|
||||
def transform_delete_response_api_response(
|
||||
self,
|
||||
raw_response: httpx.Response,
|
||||
logging_obj: LiteLLMLoggingObj,
|
||||
) -> DeleteResponseResult:
|
||||
"""
|
||||
Transform the delete response API response into a DeleteResponseResult
|
||||
"""
|
||||
try:
|
||||
raw_response_json = raw_response.json()
|
||||
except Exception:
|
||||
raise OpenAIError(
|
||||
message=raw_response.text, status_code=raw_response.status_code
|
||||
)
|
||||
return DeleteResponseResult(**raw_response_json)
|
||||
|
||||
#########################################################
|
||||
########## GET RESPONSE API TRANSFORMATION ###############
|
||||
#########################################################
|
||||
def transform_get_response_api_request(
|
||||
self,
|
||||
response_id: str,
|
||||
api_base: str,
|
||||
litellm_params: GenericLiteLLMParams,
|
||||
headers: dict,
|
||||
) -> Tuple[str, Dict]:
|
||||
"""
|
||||
Transform the get response API request into a URL and data
|
||||
|
||||
OpenAI API expects the following request
|
||||
- GET /v1/responses/{response_id}
|
||||
"""
|
||||
url = f"{api_base}/{response_id}"
|
||||
data: Dict = {}
|
||||
return url, data
|
||||
|
||||
def transform_get_response_api_response(
|
||||
self,
|
||||
raw_response: httpx.Response,
|
||||
logging_obj: LiteLLMLoggingObj,
|
||||
) -> ResponsesAPIResponse:
|
||||
"""
|
||||
Transform the get response API response into a ResponsesAPIResponse
|
||||
"""
|
||||
try:
|
||||
raw_response_json = raw_response.json()
|
||||
except Exception:
|
||||
raise OpenAIError(
|
||||
message=raw_response.text, status_code=raw_response.status_code
|
||||
)
|
||||
raw_response_headers = dict(raw_response.headers)
|
||||
processed_headers = process_response_headers(raw_response_headers)
|
||||
response = ResponsesAPIResponse(**raw_response_json)
|
||||
response._hidden_params["additional_headers"] = processed_headers
|
||||
response._hidden_params["headers"] = raw_response_headers
|
||||
|
||||
return response
|
||||
|
||||
#########################################################
|
||||
########## LIST INPUT ITEMS TRANSFORMATION #############
|
||||
#########################################################
|
||||
def transform_list_input_items_request(
|
||||
self,
|
||||
response_id: str,
|
||||
api_base: str,
|
||||
litellm_params: GenericLiteLLMParams,
|
||||
headers: dict,
|
||||
after: Optional[str] = None,
|
||||
before: Optional[str] = None,
|
||||
include: Optional[List[str]] = None,
|
||||
limit: int = 20,
|
||||
order: Literal["asc", "desc"] = "desc",
|
||||
) -> Tuple[str, Dict]:
|
||||
url = f"{api_base}/{response_id}/input_items"
|
||||
params: Dict[str, Any] = {}
|
||||
if after is not None:
|
||||
params["after"] = after
|
||||
if before is not None:
|
||||
params["before"] = before
|
||||
if include:
|
||||
params["include"] = ",".join(include)
|
||||
if limit is not None:
|
||||
params["limit"] = limit
|
||||
if order is not None:
|
||||
params["order"] = order
|
||||
return url, params
|
||||
|
||||
def transform_list_input_items_response(
|
||||
self,
|
||||
raw_response: httpx.Response,
|
||||
logging_obj: LiteLLMLoggingObj,
|
||||
) -> Dict:
|
||||
try:
|
||||
return raw_response.json()
|
||||
except Exception:
|
||||
raise OpenAIError(
|
||||
message=raw_response.text, status_code=raw_response.status_code
|
||||
)
|
||||
|
||||
#########################################################
|
||||
########## CANCEL RESPONSE API TRANSFORMATION ##########
|
||||
#########################################################
|
||||
def transform_cancel_response_api_request(
|
||||
self,
|
||||
response_id: str,
|
||||
api_base: str,
|
||||
litellm_params: GenericLiteLLMParams,
|
||||
headers: dict,
|
||||
) -> Tuple[str, Dict]:
|
||||
"""
|
||||
Transform the cancel response API request into a URL and data
|
||||
|
||||
OpenAI API expects the following request
|
||||
- POST /v1/responses/{response_id}/cancel
|
||||
"""
|
||||
url = f"{api_base}/{response_id}/cancel"
|
||||
data: Dict = {}
|
||||
return url, data
|
||||
|
||||
def transform_cancel_response_api_response(
|
||||
self,
|
||||
raw_response: httpx.Response,
|
||||
logging_obj: LiteLLMLoggingObj,
|
||||
) -> ResponsesAPIResponse:
|
||||
"""
|
||||
Transform the cancel response API response into a ResponsesAPIResponse
|
||||
"""
|
||||
try:
|
||||
raw_response_json = raw_response.json()
|
||||
except Exception:
|
||||
raise OpenAIError(
|
||||
message=raw_response.text, status_code=raw_response.status_code
|
||||
)
|
||||
raw_response_headers = dict(raw_response.headers)
|
||||
processed_headers = process_response_headers(raw_response_headers)
|
||||
|
||||
response = ResponsesAPIResponse(**raw_response_json)
|
||||
response._hidden_params["additional_headers"] = processed_headers
|
||||
response._hidden_params["headers"] = raw_response_headers
|
||||
|
||||
return response
|
||||
|
||||
#########################################################
|
||||
########## COMPACT RESPONSE API TRANSFORMATION ##########
|
||||
#########################################################
|
||||
def transform_compact_response_api_request(
|
||||
self,
|
||||
model: str,
|
||||
input: Union[str, ResponseInputParam],
|
||||
response_api_optional_request_params: Dict,
|
||||
api_base: str,
|
||||
litellm_params: GenericLiteLLMParams,
|
||||
headers: dict,
|
||||
) -> Tuple[str, Dict]:
|
||||
"""
|
||||
Transform the compact response API request into a URL and data
|
||||
|
||||
OpenAI API expects the following request
|
||||
- POST /v1/responses/compact
|
||||
"""
|
||||
# Preserve query params (e.g., api-version) while appending /compact.
|
||||
parsed_url = httpx.URL(api_base)
|
||||
compact_path = parsed_url.path.rstrip("/") + "/compact"
|
||||
url = str(parsed_url.copy_with(path=compact_path))
|
||||
|
||||
input = self._validate_input_param(input)
|
||||
data = dict(
|
||||
ResponsesAPIRequestParams(
|
||||
model=model, input=input, **response_api_optional_request_params
|
||||
)
|
||||
)
|
||||
|
||||
return url, data
|
||||
|
||||
def transform_compact_response_api_response(
|
||||
self,
|
||||
raw_response: httpx.Response,
|
||||
logging_obj: LiteLLMLoggingObj,
|
||||
) -> ResponsesAPIResponse:
|
||||
"""
|
||||
Transform the compact response API response into a ResponsesAPIResponse
|
||||
"""
|
||||
try:
|
||||
logging_obj.post_call(
|
||||
original_response=raw_response.text,
|
||||
additional_args={"complete_input_dict": {}},
|
||||
)
|
||||
raw_response_json = raw_response.json()
|
||||
raw_response_json["created_at"] = _safe_convert_created_field(
|
||||
raw_response_json["created_at"]
|
||||
)
|
||||
except Exception:
|
||||
raise OpenAIError(
|
||||
message=raw_response.text, status_code=raw_response.status_code
|
||||
)
|
||||
raw_response_headers = dict(raw_response.headers)
|
||||
processed_headers = process_response_headers(raw_response_headers)
|
||||
|
||||
try:
|
||||
response = ResponsesAPIResponse(**raw_response_json)
|
||||
except Exception:
|
||||
verbose_logger.debug(
|
||||
f"Error constructing ResponsesAPIResponse: {raw_response_json}, using model_construct"
|
||||
)
|
||||
response = ResponsesAPIResponse.model_construct(**raw_response_json)
|
||||
|
||||
response._hidden_params["additional_headers"] = processed_headers
|
||||
response._hidden_params["headers"] = raw_response_headers
|
||||
|
||||
return response
|
||||
Reference in New Issue
Block a user