chore: initial public snapshot for github upload

2026-03-26 20:06:14 +08:00
commit 0e5ecd930e
3497 changed files with 1586236 additions and 0 deletions
--- a/llm-gateway-competitors/litellm-wheel-src/litellm/llms/watsonx/audio_transcription/init.py
+++ b/llm-gateway-competitors/litellm-wheel-src/litellm/llms/watsonx/audio_transcription/init.py
--- a/llm-gateway-competitors/litellm-wheel-src/litellm/llms/watsonx/audio_transcription/transformation.py
+++ b/llm-gateway-competitors/litellm-wheel-src/litellm/llms/watsonx/audio_transcription/transformation.py
@@ -0,0 +1,215 @@
+"""
+Translates from OpenAI's `/v1/audio/transcriptions` to IBM WatsonX's `/ml/v1/audio/transcriptions`
+
+WatsonX follows the OpenAI spec for audio transcription.
+"""
+
+from typing import Any, Dict, List, Optional
+
+import litellm
+from httpx import Response
+from litellm.litellm_core_utils.audio_utils.utils import process_audio_file
+from litellm.types.llms.openai import (
+    AllMessageValues,
+    OpenAIAudioTranscriptionOptionalParams,
+)
+from litellm.types.llms.watsonx import WatsonXAudioTranscriptionRequestBody
+from litellm.types.utils import FileTypes, TranscriptionResponse
+
+from ...base_llm.audio_transcription.transformation import (
+    AudioTranscriptionRequestData,
+)
+from ...openai.transcriptions.whisper_transformation import (
+    OpenAIWhisperAudioTranscriptionConfig,
+)
+from ..common_utils import IBMWatsonXMixin
+
+
+class IBMWatsonXAudioTranscriptionConfig(
+    IBMWatsonXMixin, OpenAIWhisperAudioTranscriptionConfig
+):
+    """
+    IBM WatsonX Audio Transcription Config
+
+    WatsonX follows the OpenAI spec for audio transcription, so this class
+    inherits from OpenAIWhisperAudioTranscriptionConfig and uses IBMWatsonXMixin
+    for authentication and URL construction.
+    """
+
+    def validate_environment(
+        self,
+        headers: Dict,
+        model: str,
+        messages: List[AllMessageValues],
+        optional_params: Dict,
+        litellm_params: dict,
+        api_key: Optional[str] = None,
+        api_base: Optional[str] = None,
+    ) -> Dict:
+        """
+        Validate environment for audio transcription.
+
+        Removes Content-Type header so httpx can set multipart/form-data automatically.
+        """
+        result = IBMWatsonXMixin.validate_environment(
+            self,
+            headers=headers,
+            model=model,
+            messages=messages,
+            optional_params=optional_params,
+            litellm_params=litellm_params,
+            api_key=api_key,
+            api_base=api_base,
+        )
+        # Remove Content-Type so httpx sets multipart/form-data automatically
+        result.pop("Content-Type", None)
+        return result
+
+    def get_supported_openai_params(
+        self, model: str
+    ) -> List[OpenAIAudioTranscriptionOptionalParams]:
+        """
+        Get the supported OpenAI params for WatsonX audio transcription.
+        """
+        return [
+            "language",
+            "prompt",
+            "response_format",
+            "temperature",
+            "timestamp_granularities",
+        ]
+
+    def transform_audio_transcription_request(
+        self,
+        model: str,
+        audio_file: FileTypes,
+        optional_params: dict,
+        litellm_params: dict,
+    ) -> AudioTranscriptionRequestData:
+        """
+        Transform the audio transcription request for WatsonX.
+
+        WatsonX expects multipart/form-data with:
+        - file: the audio file
+        - model: the model name (without watsonx/ prefix)
+        - project_id: the project ID (as form field, not query param)
+        - space_id: the space ID (as form field, not query param)
+        - other optional params
+        """
+        # Use common utility to process the audio file
+        processed_audio = process_audio_file(audio_file)
+        project_id = optional_params.get("project_id") or optional_params.get(
+            "watsonx_project"
+        )
+        space_id = optional_params.get("space_id")
+        # api_params = _get_api_params(params=optional_params, model=model)
+
+        # Initialize form data with required fields
+        form_data: WatsonXAudioTranscriptionRequestBody = {"model": model}
+
+        # Only add project_id or space_id if they were explicitly provided by the user
+        if project_id:
+            form_data["project_id"] = project_id
+        elif space_id:
+            form_data["space_id"] = space_id
+
+        # Add supported OpenAI params to form data
+        supported_params = self.get_supported_openai_params(model)
+        for key, value in optional_params.items():
+            if key in supported_params and value is not None:
+                form_data[key] = value  # type: ignore
+
+        # Prepare files dict with the audio file
+        files = {
+            "file": (
+                processed_audio.filename,
+                processed_audio.file_content,
+                processed_audio.content_type,
+            )
+        }
+
+        # Convert TypedDict to regular dict for AudioTranscriptionRequestData
+        form_data_dict: Dict[str, Any] = dict(form_data)
+
+        return AudioTranscriptionRequestData(data=form_data_dict, files=files)
+
+    def get_complete_url(
+        self,
+        api_base: Optional[str],
+        api_key: Optional[str],
+        model: str,
+        optional_params: dict,
+        litellm_params: dict,
+        stream: Optional[bool] = None,
+    ) -> str:
+        """
+        Construct the complete URL for WatsonX audio transcription.
+
+        URL format: {api_base}/ml/v1/audio/transcriptions?version={version}
+
+        Note: project_id or space_id is sent as form data, not as a query parameter
+        """
+        # Get base URL
+        url = self._get_base_url(api_base=api_base)
+        url = url.rstrip("/")
+
+        # Add the audio transcription endpoint
+        url = f"{url}/ml/v1/audio/transcriptions"
+
+        # Add version parameter (only version in query string, not project_id)
+        api_version = (
+            optional_params.get("api_version", None)
+            or litellm.WATSONX_DEFAULT_API_VERSION
+        )
+        url = f"{url}?version={api_version}"
+
+        return url
+
+    def transform_audio_transcription_response(
+        self,
+        raw_response: Response,
+    ) -> TranscriptionResponse:
+        """
+        Transform the audio transcription response from WatsonX.
+
+        WatsonX may include a 'model' field in the response, which needs to be
+        removed before creating the TranscriptionResponse object.
+        """
+        try:
+            raw_response_json = raw_response.json()
+        except Exception as e:
+            raise ValueError(
+                f"Error transforming response to json: {str(e)}\nResponse: {raw_response.text}"
+            )
+
+        # Extract only valid fields for TranscriptionResponse.__init__()
+        # TranscriptionResponse only accepts 'text' and 'usage' in __init__()
+        text = raw_response_json.get("text")
+        usage = raw_response_json.get("usage")
+
+        # Create response with only valid fields
+        response_kwargs = {}
+        if text is not None:
+            response_kwargs["text"] = text
+        if usage is not None:
+            response_kwargs["usage"] = usage
+
+        if not response_kwargs:
+            raise ValueError(
+                "Invalid response format. Received response does not match the expected format. Got: ",
+                raw_response_json,
+            )
+
+        response = TranscriptionResponse(**response_kwargs)
+
+        # Add other fields using dictionary-style assignment (like duration, task, etc.)
+        # Skip fields that TranscriptionResponse doesn't accept in __init__()
+        for key, value in raw_response_json.items():
+            if key not in [
+                "text",
+                "usage",
+                "model",
+            ]:  # text/usage already set, model should be excluded
+                response[key] = value
+
+        return response