chore: initial public snapshot for github upload

This commit is contained in:
Your Name
2026-03-26 20:06:14 +08:00
commit 0e5ecd930e
3497 changed files with 1586236 additions and 0 deletions

View File

@@ -0,0 +1,178 @@
# OpenAI Text-to-Speech Guardrail Translation Handler
Handler for processing OpenAI's text-to-speech endpoint (`/v1/audio/speech`) with guardrails.
## Overview
This handler processes text-to-speech requests by:
1. Extracting the input text from the request
2. Applying guardrails to the input text
3. Updating the request with the guardrailed text
4. Returning the output unchanged (audio is binary, not text)
## Data Format
### Input Format
```json
{
"model": "tts-1",
"input": "The quick brown fox jumped over the lazy dog.",
"voice": "alloy",
"response_format": "mp3",
"speed": 1.0
}
```
### Output Format
The output is binary audio data (MP3, WAV, etc.), not text, so it cannot be guardrailed.
## Usage
The handler is automatically discovered and applied when guardrails are used with the text-to-speech endpoint.
### Example: Using Guardrails with Text-to-Speech
```bash
curl -X POST 'http://localhost:4000/v1/audio/speech' \
-H 'Content-Type: application/json' \
-H 'Authorization: Bearer your-api-key' \
-d '{
"model": "tts-1",
"input": "The quick brown fox jumped over the lazy dog.",
"voice": "alloy",
"guardrails": ["content_moderation"]
}' \
--output speech.mp3
```
The guardrail will be applied to the input text before the text-to-speech conversion.
### Example: PII Masking in TTS Input
```bash
curl -X POST 'http://localhost:4000/v1/audio/speech' \
-H 'Content-Type: application/json' \
-H 'Authorization: Bearer your-api-key' \
-d '{
"model": "tts-1",
"input": "Please call John Doe at john@example.com",
"voice": "nova",
"guardrails": ["mask_pii"]
}' \
--output speech.mp3
```
The audio will say: "Please call [NAME_REDACTED] at [EMAIL_REDACTED]"
### Example: Content Filtering Before TTS
```bash
curl -X POST 'http://localhost:4000/v1/audio/speech' \
-H 'Content-Type: application/json' \
-H 'Authorization: Bearer your-api-key' \
-d '{
"model": "tts-1-hd",
"input": "This is the text that will be spoken",
"voice": "shimmer",
"guardrails": ["content_filter"]
}' \
--output speech.mp3
```
## Implementation Details
### Input Processing
- **Field**: `input` (string)
- **Processing**: Applies guardrail to input text
- **Result**: Updated input text in request
### Output Processing
- **Processing**: Not applicable (audio is binary data)
- **Result**: Response returned unchanged
## Use Cases
1. **PII Protection**: Remove personally identifiable information before converting to speech
2. **Content Filtering**: Remove inappropriate content before TTS conversion
3. **Compliance**: Ensure text meets requirements before voice synthesis
4. **Text Sanitization**: Clean up text before audio generation
## Extension
Override these methods to customize behavior:
- `process_input_messages()`: Customize how input text is processed
- `process_output_response()`: Currently a no-op, but can be overridden if needed
## Supported Call Types
- `CallTypes.speech` - Synchronous text-to-speech
- `CallTypes.aspeech` - Asynchronous text-to-speech
## Notes
- Only the input text is processed by guardrails
- Output processing is a no-op since audio cannot be text-guardrailed
- Both sync and async call types use the same handler
- Works with all TTS models (tts-1, tts-1-hd, etc.)
- Works with all voice options
## Common Patterns
### Remove PII Before TTS
```python
import litellm
from pathlib import Path
speech_file_path = Path(__file__).parent / "speech.mp3"
response = litellm.speech(
model="tts-1",
voice="alloy",
input="Hi, this is John Doe calling from john@company.com",
guardrails=["mask_pii"],
)
response.stream_to_file(speech_file_path)
# Audio will have PII masked
```
### Content Moderation Before TTS
```python
import litellm
from pathlib import Path
speech_file_path = Path(__file__).parent / "speech.mp3"
response = litellm.speech(
model="tts-1-hd",
voice="nova",
input="Your text here",
guardrails=["content_moderation"],
)
response.stream_to_file(speech_file_path)
```
### Async TTS with Guardrails
```python
import litellm
import asyncio
from pathlib import Path
async def generate_speech():
speech_file_path = Path(__file__).parent / "speech.mp3"
response = await litellm.aspeech(
model="tts-1",
voice="echo",
input="Text to convert to speech",
guardrails=["pii_mask"],
)
response.stream_to_file(speech_file_path)
asyncio.run(generate_speech())
```

View File

@@ -0,0 +1,13 @@
"""OpenAI Text-to-Speech handler for Unified Guardrails."""
from litellm.llms.openai.speech.guardrail_translation.handler import (
OpenAITextToSpeechHandler,
)
from litellm.types.utils import CallTypes
guardrail_translation_mappings = {
CallTypes.speech: OpenAITextToSpeechHandler,
CallTypes.aspeech: OpenAITextToSpeechHandler,
}
__all__ = ["guardrail_translation_mappings", "OpenAITextToSpeechHandler"]

View File

@@ -0,0 +1,108 @@
"""
OpenAI Text-to-Speech Handler for Unified Guardrails
This module provides guardrail translation support for OpenAI's text-to-speech endpoint.
The handler processes the 'input' text parameter (output is audio, so no text to guardrail).
"""
from typing import TYPE_CHECKING, Any, Optional
from litellm._logging import verbose_proxy_logger
from litellm.llms.base_llm.guardrail_translation.base_translation import BaseTranslation
from litellm.types.utils import GenericGuardrailAPIInputs
if TYPE_CHECKING:
from litellm.integrations.custom_guardrail import CustomGuardrail
from litellm.types.llms.openai import HttpxBinaryResponseContent
class OpenAITextToSpeechHandler(BaseTranslation):
"""
Handler for processing OpenAI text-to-speech requests with guardrails.
This class provides methods to:
1. Process input text (pre-call hook)
Note: Output processing is not applicable since the output is audio (binary),
not text. Only the input text is processed.
"""
async def process_input_messages(
self,
data: dict,
guardrail_to_apply: "CustomGuardrail",
litellm_logging_obj: Optional[Any] = None,
) -> Any:
"""
Process input text by applying guardrails.
Args:
data: Request data dictionary containing 'input' parameter
guardrail_to_apply: The guardrail instance to apply
Returns:
Modified data with guardrails applied to input text
"""
input_text = data.get("input")
if input_text is None:
verbose_proxy_logger.debug(
"OpenAI Text-to-Speech: No input text found in request data"
)
return data
if isinstance(input_text, str):
inputs = GenericGuardrailAPIInputs(texts=[input_text])
# Include model information if available (voice model)
model = data.get("model")
if model:
inputs["model"] = model
guardrailed_inputs = await guardrail_to_apply.apply_guardrail(
inputs=inputs,
request_data=data,
input_type="request",
logging_obj=litellm_logging_obj,
)
guardrailed_texts = guardrailed_inputs.get("texts", [])
data["input"] = guardrailed_texts[0] if guardrailed_texts else input_text
verbose_proxy_logger.debug(
"OpenAI Text-to-Speech: Applied guardrail to input text. "
"Original length: %d, New length: %d",
len(input_text),
len(data["input"]),
)
else:
verbose_proxy_logger.debug(
"OpenAI Text-to-Speech: Unexpected input type: %s. Expected string.",
type(input_text),
)
return data
async def process_output_response(
self,
response: "HttpxBinaryResponseContent",
guardrail_to_apply: "CustomGuardrail",
litellm_logging_obj: Optional[Any] = None,
user_api_key_dict: Optional[Any] = None,
) -> Any:
"""
Process output - not applicable for text-to-speech.
The output is audio (binary data), not text, so there's nothing to apply
guardrails to. This method returns the response unchanged.
Args:
response: Binary audio response
guardrail_to_apply: The guardrail instance (unused)
litellm_logging_obj: Optional logging object (unused)
user_api_key_dict: User API key metadata (unused)
Returns:
Unmodified response (audio data doesn't need text guardrails)
"""
verbose_proxy_logger.debug(
"OpenAI Text-to-Speech: Output processing not applicable "
"(output is audio data, not text)"
)
return response