chore: initial snapshot for gitea/github upload

2026-03-26 16:04:46 +08:00
commit a699a1ac98
3497 changed files with 1586237 additions and 0 deletions
--- a/llm-gateway-competitors/litellm-wheel-src/litellm/llms/openai/transcriptions/gpt_transformation.py
+++ b/llm-gateway-competitors/litellm-wheel-src/litellm/llms/openai/transcriptions/gpt_transformation.py
@@ -0,0 +1,41 @@
+from typing import List
+
+from litellm.llms.base_llm.audio_transcription.transformation import (
+    AudioTranscriptionRequestData,
+)
+from litellm.types.llms.openai import OpenAIAudioTranscriptionOptionalParams
+from litellm.types.utils import FileTypes
+
+from .whisper_transformation import OpenAIWhisperAudioTranscriptionConfig
+
+
+class OpenAIGPTAudioTranscriptionConfig(OpenAIWhisperAudioTranscriptionConfig):
+    def get_supported_openai_params(
+        self, model: str
+    ) -> List[OpenAIAudioTranscriptionOptionalParams]:
+        """
+        Get the supported OpenAI params for the `gpt-4o-transcribe` models
+        """
+        return [
+            "language",
+            "prompt",
+            "response_format",
+            "temperature",
+            "include",
+        ]
+
+    def transform_audio_transcription_request(
+        self,
+        model: str,
+        audio_file: FileTypes,
+        optional_params: dict,
+        litellm_params: dict,
+    ) -> AudioTranscriptionRequestData:
+        """
+        Transform the audio transcription request
+        """
+        data = {"model": model, "file": audio_file, **optional_params}
+
+        return AudioTranscriptionRequestData(
+            data=data,
+        )
--- a/llm-gateway-competitors/litellm-wheel-src/litellm/llms/openai/transcriptions/guardrail_translation/README.md
+++ b/llm-gateway-competitors/litellm-wheel-src/litellm/llms/openai/transcriptions/guardrail_translation/README.md
@@ -0,0 +1,159 @@
+# OpenAI Audio Transcription Guardrail Translation Handler
+
+Handler for processing OpenAI's audio transcription endpoint (`/v1/audio/transcriptions`) with guardrails.
+
+## Overview
+
+This handler processes audio transcription responses by:
+1. Applying guardrails to the transcribed text output
+2. Returning the input unchanged (since input is an audio file, not text)
+
+## Data Format
+
+### Input Format
+
+The input is an audio file, which cannot be guardrailed (it's binary data, not text).
+
+```json
+{
+  "model": "whisper-1",
+  "file": "<audio file>",
+  "response_format": "json",
+  "language": "en"
+}
+```
+
+### Output Format
+
+```json
+{
+  "text": "This is the transcribed text from the audio file."
+}
+```
+
+Or with additional metadata:
+
+```json
+{
+  "text": "This is the transcribed text from the audio file.",
+  "duration": 3.5,
+  "language": "en"
+}
+```
+
+## Usage
+
+The handler is automatically discovered and applied when guardrails are used with the audio transcription endpoint.
+
+### Example: Using Guardrails with Audio Transcription
+
+```bash
+curl -X POST 'http://localhost:4000/v1/audio/transcriptions' \
+-H 'Authorization: Bearer your-api-key' \
+-F 'file=@audio.mp3' \
+-F 'model=whisper-1' \
+-F 'guardrails=["pii_mask"]'
+```
+
+The guardrail will be applied to the **output** transcribed text only.
+
+### Example: PII Masking in Transcribed Text
+
+```bash
+curl -X POST 'http://localhost:4000/v1/audio/transcriptions' \
+-H 'Authorization: Bearer your-api-key' \
+-F 'file=@meeting_recording.mp3' \
+-F 'model=whisper-1' \
+-F 'guardrails=["mask_pii"]' \
+-F 'response_format=json'
+```
+
+If the audio contains: "My name is John Doe and my email is john@example.com"
+
+The transcription output will be: "My name is [NAME_REDACTED] and my email is [EMAIL_REDACTED]"
+
+### Example: Content Moderation on Transcriptions
+
+```bash
+curl -X POST 'http://localhost:4000/v1/audio/transcriptions' \
+-H 'Authorization: Bearer your-api-key' \
+-F 'file=@audio.wav' \
+-F 'model=whisper-1' \
+-F 'guardrails=["content_moderation"]'
+```
+
+## Implementation Details
+
+### Input Processing
+
+- **Status**: Not applicable
+- **Reason**: Input is an audio file (binary data), not text
+- **Result**: Request data returned unchanged
+
+### Output Processing
+
+- **Field**: `text` (string)
+- **Processing**: Applies guardrail to the transcribed text
+- **Result**: Updated text in response
+
+## Use Cases
+
+1. **PII Protection**: Automatically redact personally identifiable information from transcriptions
+2. **Content Filtering**: Remove or flag inappropriate content in transcribed audio
+3. **Compliance**: Ensure transcriptions meet regulatory requirements
+4. **Data Sanitization**: Clean up transcriptions before storage or further processing
+
+## Extension
+
+Override these methods to customize behavior:
+
+- `process_output_response()`: Customize how transcribed text is processed
+- `process_input_messages()`: Currently a no-op, but can be overridden if needed
+
+## Supported Call Types
+
+- `CallTypes.transcription` - Synchronous audio transcription
+- `CallTypes.atranscription` - Asynchronous audio transcription
+
+## Notes
+
+- Input processing is a no-op since audio files cannot be text-guardrailed
+- Only the transcribed text output is processed
+- Guardrails apply after transcription is complete
+- Both sync and async call types use the same handler
+- Works with all Whisper models and response formats
+
+## Common Patterns
+
+### Transcribe and Redact PII
+
+```python
+import litellm
+
+response = litellm.transcription(
+    model="whisper-1",
+    file=open("interview.mp3", "rb"),
+    guardrails=["mask_pii"],
+)
+
+# response.text will have PII redacted
+print(response.text)
+```
+
+### Async Transcription with Guardrails
+
+```python
+import litellm
+import asyncio
+
+async def transcribe_with_guardrails():
+    response = await litellm.atranscription(
+        model="whisper-1",
+        file=open("audio.mp3", "rb"),
+        guardrails=["content_filter"],
+    )
+    return response.text
+
+text = asyncio.run(transcribe_with_guardrails())
+```
+
--- a/llm-gateway-competitors/litellm-wheel-src/litellm/llms/openai/transcriptions/guardrail_translation/init.py
+++ b/llm-gateway-competitors/litellm-wheel-src/litellm/llms/openai/transcriptions/guardrail_translation/init.py
@@ -0,0 +1,13 @@
+"""OpenAI Audio Transcription handler for Unified Guardrails."""
+
+from litellm.llms.openai.transcriptions.guardrail_translation.handler import (
+    OpenAIAudioTranscriptionHandler,
+)
+from litellm.types.utils import CallTypes
+
+guardrail_translation_mappings = {
+    CallTypes.transcription: OpenAIAudioTranscriptionHandler,
+    CallTypes.atranscription: OpenAIAudioTranscriptionHandler,
+}
+
+__all__ = ["guardrail_translation_mappings", "OpenAIAudioTranscriptionHandler"]
--- a/llm-gateway-competitors/litellm-wheel-src/litellm/llms/openai/transcriptions/guardrail_translation/handler.py
+++ b/llm-gateway-competitors/litellm-wheel-src/litellm/llms/openai/transcriptions/guardrail_translation/handler.py
@@ -0,0 +1,117 @@
+"""
+OpenAI Audio Transcription Handler for Unified Guardrails
+
+This module provides guardrail translation support for OpenAI's audio transcription endpoint.
+The handler processes the output transcribed text (input is audio, so no text to guardrail).
+"""
+
+from typing import TYPE_CHECKING, Any, Optional
+
+from litellm._logging import verbose_proxy_logger
+from litellm.llms.base_llm.guardrail_translation.base_translation import BaseTranslation
+from litellm.types.utils import GenericGuardrailAPIInputs
+
+if TYPE_CHECKING:
+    from litellm.integrations.custom_guardrail import CustomGuardrail
+    from litellm.utils import TranscriptionResponse
+
+
+class OpenAIAudioTranscriptionHandler(BaseTranslation):
+    """
+    Handler for processing OpenAI audio transcription responses with guardrails.
+
+    This class provides methods to:
+    1. Process output transcription text (post-call hook)
+
+    Note: Input processing is not applicable since the input is an audio file,
+    not text. Only the transcribed text output is processed.
+    """
+
+    async def process_input_messages(
+        self,
+        data: dict,
+        guardrail_to_apply: "CustomGuardrail",
+        litellm_logging_obj: Optional[Any] = None,
+    ) -> Any:
+        """
+        Process input - not applicable for audio transcription.
+
+        The input is an audio file, not text, so there's nothing to apply
+        guardrails to. This method returns the data unchanged.
+
+        Args:
+            data: Request data dictionary containing audio file
+            guardrail_to_apply: The guardrail instance (unused)
+
+        Returns:
+            Unmodified data (audio files don't need text guardrails)
+        """
+        verbose_proxy_logger.debug(
+            "OpenAI Audio Transcription: Input processing not applicable "
+            "(input is audio file, not text)"
+        )
+        return data
+
+    async def process_output_response(
+        self,
+        response: "TranscriptionResponse",
+        guardrail_to_apply: "CustomGuardrail",
+        litellm_logging_obj: Optional[Any] = None,
+        user_api_key_dict: Optional[Any] = None,
+    ) -> Any:
+        """
+        Process output transcription by applying guardrails to transcribed text.
+
+        Args:
+            response: Transcription response object containing transcribed text
+            guardrail_to_apply: The guardrail instance to apply
+            litellm_logging_obj: Optional logging object
+            user_api_key_dict: User API key metadata to pass to guardrails
+
+        Returns:
+            Modified response with guardrails applied to transcribed text
+        """
+        if not hasattr(response, "text") or response.text is None:
+            verbose_proxy_logger.debug(
+                "OpenAI Audio Transcription: No text in response to process"
+            )
+            return response
+
+        if isinstance(response.text, str):
+            original_text = response.text
+            # Create a request_data dict with response info and user API key metadata
+            request_data: dict = {"response": response}
+
+            # Add user API key metadata with prefixed keys
+            user_metadata = self.transform_user_api_key_dict_to_metadata(
+                user_api_key_dict
+            )
+            if user_metadata:
+                request_data["litellm_metadata"] = user_metadata
+
+            inputs = GenericGuardrailAPIInputs(texts=[original_text])
+            # Include model information from the response if available
+            if hasattr(response, "model") and response.model:
+                inputs["model"] = response.model
+            guardrailed_inputs = await guardrail_to_apply.apply_guardrail(
+                inputs=inputs,
+                request_data=request_data,
+                input_type="response",
+                logging_obj=litellm_logging_obj,
+            )
+            guardrailed_texts = guardrailed_inputs.get("texts", [])
+            response.text = guardrailed_texts[0] if guardrailed_texts else original_text
+
+            verbose_proxy_logger.debug(
+                "OpenAI Audio Transcription: Applied guardrail to transcribed text. "
+                "Original length: %d, New length: %d",
+                len(original_text),
+                len(response.text),
+            )
+        else:
+            verbose_proxy_logger.debug(
+                "OpenAI Audio Transcription: Unexpected text type: %s. Expected string.",
+                type(response.text),
+            )
+
+        return response
--- a/llm-gateway-competitors/litellm-wheel-src/litellm/llms/openai/transcriptions/handler.py
+++ b/llm-gateway-competitors/litellm-wheel-src/litellm/llms/openai/transcriptions/handler.py
@@ -0,0 +1,231 @@
+from typing import TYPE_CHECKING, Optional, Union, cast
+
+import httpx
+from openai import AsyncOpenAI, OpenAI
+from pydantic import BaseModel
+
+import litellm
+
+if TYPE_CHECKING:
+    from aiohttp import ClientSession
+from litellm.litellm_core_utils.audio_utils.utils import get_audio_file_name
+from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
+from litellm.llms.base_llm.audio_transcription.transformation import (
+    BaseAudioTranscriptionConfig,
+)
+from litellm.types.utils import FileTypes
+from litellm.utils import (
+    TranscriptionResponse,
+    convert_to_model_response_object,
+    extract_duration_from_srt_or_vtt,
+)
+
+from ..openai import OpenAIChatCompletion
+
+
+class OpenAIAudioTranscription(OpenAIChatCompletion):
+    # Audio Transcriptions
+    async def make_openai_audio_transcriptions_request(
+        self,
+        openai_aclient: AsyncOpenAI,
+        data: dict,
+        timeout: Union[float, httpx.Timeout],
+    ):
+        """
+        Helper to:
+        - call openai_aclient.audio.transcriptions.with_raw_response when litellm.return_response_headers is True
+        - call openai_aclient.audio.transcriptions.create by default
+        """
+        try:
+            raw_response = (
+                await openai_aclient.audio.transcriptions.with_raw_response.create(
+                    **data, timeout=timeout
+                )
+            )  # type: ignore
+            headers = dict(raw_response.headers)
+            response = raw_response.parse()
+
+            return headers, response
+        except Exception as e:
+            raise e
+
+    def make_sync_openai_audio_transcriptions_request(
+        self,
+        openai_client: OpenAI,
+        data: dict,
+        timeout: Union[float, httpx.Timeout],
+    ):
+        """
+        Helper to:
+        - call openai_aclient.audio.transcriptions.with_raw_response when litellm.return_response_headers is True
+        - call openai_aclient.audio.transcriptions.create by default
+        """
+        try:
+            if litellm.return_response_headers is True:
+                raw_response = (
+                    openai_client.audio.transcriptions.with_raw_response.create(
+                        **data, timeout=timeout
+                    )
+                )  # type: ignore
+                headers = dict(raw_response.headers)
+                response = raw_response.parse()
+                return headers, response
+            else:
+                response = openai_client.audio.transcriptions.create(**data, timeout=timeout)  # type: ignore
+                return None, response
+        except Exception as e:
+            raise e
+
+    def audio_transcriptions(
+        self,
+        model: str,
+        audio_file: FileTypes,
+        optional_params: dict,
+        litellm_params: dict,
+        model_response: TranscriptionResponse,
+        timeout: float,
+        max_retries: int,
+        logging_obj: LiteLLMLoggingObj,
+        api_key: Optional[str],
+        api_base: Optional[str],
+        client=None,
+        atranscription: bool = False,
+        provider_config: Optional[BaseAudioTranscriptionConfig] = None,
+        shared_session: Optional["ClientSession"] = None,
+    ) -> TranscriptionResponse:
+        """
+        Handle audio transcription request
+        """
+        if provider_config is not None:
+            transformed_data = provider_config.transform_audio_transcription_request(
+                model=model,
+                audio_file=audio_file,
+                optional_params=optional_params,
+                litellm_params=litellm_params,
+            )
+
+            data = cast(dict, transformed_data.data)
+        else:
+            data = {"model": model, "file": audio_file, **optional_params}
+
+        if atranscription is True:
+            return self.async_audio_transcriptions(  # type: ignore
+                audio_file=audio_file,
+                data=data,
+                model_response=model_response,
+                timeout=timeout,
+                api_key=api_key,
+                api_base=api_base,
+                client=client,
+                max_retries=max_retries,
+                logging_obj=logging_obj,
+                shared_session=shared_session,
+            )
+
+        openai_client: OpenAI = self._get_openai_client(  # type: ignore
+            is_async=False,
+            api_key=api_key,
+            api_base=api_base,
+            timeout=timeout,
+            max_retries=max_retries,
+            client=client,
+        )
+
+        ## LOGGING
+        logging_obj.pre_call(
+            input=None,
+            api_key=openai_client.api_key,
+            additional_args={
+                "api_base": openai_client._base_url._uri_reference,
+                "atranscription": True,
+                "complete_input_dict": data,
+            },
+        )
+        _, response = self.make_sync_openai_audio_transcriptions_request(
+            openai_client=openai_client,
+            data=data,
+            timeout=timeout,
+        )
+
+        if isinstance(response, BaseModel):
+            stringified_response = response.model_dump()
+        else:
+            stringified_response = TranscriptionResponse(text=response).model_dump()
+
+        ## LOGGING
+        logging_obj.post_call(
+            input=get_audio_file_name(audio_file),
+            api_key=api_key,
+            additional_args={"complete_input_dict": data},
+            original_response=stringified_response,
+        )
+        hidden_params = {"model": model, "custom_llm_provider": "openai"}
+        final_response: TranscriptionResponse = convert_to_model_response_object(response_object=stringified_response, model_response_object=model_response, hidden_params=hidden_params, response_type="audio_transcription")  # type: ignore
+        return final_response
+
+    async def async_audio_transcriptions(
+        self,
+        audio_file: FileTypes,
+        data: dict,
+        model_response: TranscriptionResponse,
+        timeout: float,
+        logging_obj: LiteLLMLoggingObj,
+        api_key: Optional[str] = None,
+        api_base: Optional[str] = None,
+        client=None,
+        max_retries=None,
+        shared_session: Optional["ClientSession"] = None,
+    ):
+        try:
+            openai_aclient: AsyncOpenAI = self._get_openai_client(  # type: ignore
+                is_async=True,
+                api_key=api_key,
+                api_base=api_base,
+                timeout=timeout,
+                max_retries=max_retries,
+                client=client,
+                shared_session=shared_session,
+            )
+
+            ## LOGGING
+            logging_obj.pre_call(
+                input=None,
+                api_key=openai_aclient.api_key,
+                additional_args={
+                    "api_base": openai_aclient._base_url._uri_reference,
+                    "atranscription": True,
+                    "complete_input_dict": data,
+                },
+            )
+            headers, response = await self.make_openai_audio_transcriptions_request(
+                openai_aclient=openai_aclient,
+                data=data,
+                timeout=timeout,
+            )
+            logging_obj.model_call_details["response_headers"] = headers
+            if isinstance(response, BaseModel):
+                stringified_response = response.model_dump()
+            else:
+                duration = extract_duration_from_srt_or_vtt(response)
+                stringified_response = TranscriptionResponse(text=response).model_dump()
+                stringified_response["_audio_transcription_duration"] = duration
+            ## LOGGING
+            logging_obj.post_call(
+                input=get_audio_file_name(audio_file),
+                api_key=api_key,
+                additional_args={"complete_input_dict": data},
+                original_response=stringified_response,
+            )
+            # Extract the actual model from data instead of hardcoding "whisper-1"
+            actual_model = data.get("model", "whisper-1")
+            hidden_params = {"model": actual_model, "custom_llm_provider": "openai"}
+
+            return convert_to_model_response_object(response_object=stringified_response, model_response_object=model_response, hidden_params=hidden_params, response_type="audio_transcription")  # type: ignore
+        except Exception as e:
+            ## LOGGING
+            logging_obj.post_call(
+                input=input,
+                api_key=api_key,
+                original_response=str(e),
+            )
+            raise e
--- a/llm-gateway-competitors/litellm-wheel-src/litellm/llms/openai/transcriptions/whisper_transformation.py
+++ b/llm-gateway-competitors/litellm-wheel-src/litellm/llms/openai/transcriptions/whisper_transformation.py
@@ -0,0 +1,150 @@
+from typing import List, Optional, Union
+
+from httpx import Headers, Response
+
+from litellm.llms.base_llm.audio_transcription.transformation import (
+    AudioTranscriptionRequestData,
+    BaseAudioTranscriptionConfig,
+)
+from litellm.llms.base_llm.chat.transformation import BaseLLMException
+from litellm.secret_managers.main import get_secret_str
+from litellm.types.llms.openai import (
+    AllMessageValues,
+    OpenAIAudioTranscriptionOptionalParams,
+)
+from litellm.types.utils import FileTypes, TranscriptionResponse
+
+from ..common_utils import OpenAIError
+
+
+class OpenAIWhisperAudioTranscriptionConfig(BaseAudioTranscriptionConfig):
+    def get_complete_url(
+        self,
+        api_base: Optional[str],
+        api_key: Optional[str],
+        model: str,
+        optional_params: dict,
+        litellm_params: dict,
+        stream: Optional[bool] = None,
+    ) -> str:
+        """
+        OPTIONAL
+
+        Get the complete url for the request
+
+        Some providers need `model` in `api_base`
+        """
+        ## get the api base, attach the endpoint - v1/audio/transcriptions
+        # strip trailing slash if present
+        api_base = api_base.rstrip("/") if api_base else ""
+
+        # if endswith "/v1"
+        if api_base and api_base.endswith("/v1"):
+            api_base = f"{api_base}/audio/transcriptions"
+        else:
+            api_base = f"{api_base}/v1/audio/transcriptions"
+
+        return api_base or ""
+
+    def get_supported_openai_params(
+        self, model: str
+    ) -> List[OpenAIAudioTranscriptionOptionalParams]:
+        """
+        Get the supported OpenAI params for the `whisper-1` models
+        """
+        return [
+            "language",
+            "prompt",
+            "response_format",
+            "temperature",
+            "timestamp_granularities",
+        ]
+
+    def map_openai_params(
+        self,
+        non_default_params: dict,
+        optional_params: dict,
+        model: str,
+        drop_params: bool,
+    ) -> dict:
+        """
+        Map the OpenAI params to the Whisper params
+        """
+        supported_params = self.get_supported_openai_params(model)
+        for k, v in non_default_params.items():
+            if k in supported_params:
+                optional_params[k] = v
+        return optional_params
+
+    def validate_environment(
+        self,
+        headers: dict,
+        model: str,
+        messages: List[AllMessageValues],
+        optional_params: dict,
+        litellm_params: dict,
+        api_key: Optional[str] = None,
+        api_base: Optional[str] = None,
+    ) -> dict:
+        api_key = api_key or get_secret_str("OPENAI_API_KEY")
+
+        auth_header = {
+            "Authorization": f"Bearer {api_key}",
+        }
+
+        headers.update(auth_header)
+        return headers
+
+    def transform_audio_transcription_request(
+        self,
+        model: str,
+        audio_file: FileTypes,
+        optional_params: dict,
+        litellm_params: dict,
+    ) -> AudioTranscriptionRequestData:
+        """
+        Transform the audio transcription request
+        """
+        data = {"model": model, "file": audio_file, **optional_params}
+
+        if "response_format" not in data or (
+            data["response_format"] == "text" or data["response_format"] == "json"
+        ):
+            data[
+                "response_format"
+            ] = "verbose_json"  # ensures 'duration' is received - used for cost calculation
+
+        return AudioTranscriptionRequestData(
+            data=data,
+        )
+
+    def get_error_class(
+        self, error_message: str, status_code: int, headers: Union[dict, Headers]
+    ) -> BaseLLMException:
+        return OpenAIError(
+            status_code=status_code,
+            message=error_message,
+            headers=headers,
+        )
+
+    def transform_audio_transcription_response(
+        self,
+        raw_response: Response,
+    ) -> TranscriptionResponse:
+        try:
+            raw_response_json = raw_response.json()
+        except Exception as e:
+            raise ValueError(
+                f"Error transforming response to json: {str(e)}\nResponse: {raw_response.text}"
+            )
+
+        if any(
+            key in raw_response_json
+            for key in TranscriptionResponse.model_fields.keys()
+        ):
+            return TranscriptionResponse(**raw_response_json)
+        else:
+            raise ValueError(
+                "Invalid response format. Received response does not match the expected format. Got: ",
+                raw_response_json,
+            )