chore: initial public snapshot for github upload

2026-03-26 20:06:14 +08:00
commit 0e5ecd930e
3497 changed files with 1586236 additions and 0 deletions
--- a/llm-gateway-competitors/litellm-wheel-src/litellm/llms/bedrock/files/handler.py
+++ b/llm-gateway-competitors/litellm-wheel-src/litellm/llms/bedrock/files/handler.py
@@ -0,0 +1,210 @@
+import asyncio
+import base64
+from typing import Any, Coroutine, Optional, Tuple, Union
+
+import httpx
+
+from litellm import LlmProviders
+from litellm.llms.custom_httpx.http_handler import get_async_httpx_client
+from litellm.types.llms.openai import (
+    FileContentRequest,
+    HttpxBinaryResponseContent,
+)
+from litellm.types.utils import SpecialEnums
+
+from ..base_aws_llm import BaseAWSLLM
+
+
+class BedrockFilesHandler(BaseAWSLLM):
+    """
+    Handles downloading files from S3 for Bedrock batch processing.
+
+    This implementation downloads files from S3 buckets where Bedrock
+    stores batch output files.
+    """
+
+    def __init__(self):
+        super().__init__()
+        self.async_httpx_client = get_async_httpx_client(
+            llm_provider=LlmProviders.BEDROCK,
+        )
+
+    def _extract_s3_uri_from_file_id(self, file_id: str) -> str:
+        """
+        Extract S3 URI from encoded file ID.
+
+        The file ID can be in two formats:
+        1. Base64-encoded unified file ID containing: llm_output_file_id,s3://bucket/path
+        2. Direct S3 URI: s3://bucket/path
+
+        Args:
+            file_id: Encoded file ID or direct S3 URI
+
+        Returns:
+            S3 URI (e.g., "s3://bucket-name/path/to/file")
+        """
+        # First, try to decode if it's a base64-encoded unified file ID
+        try:
+            # Add padding if needed
+            padded = file_id + "=" * (-len(file_id) % 4)
+            decoded = base64.urlsafe_b64decode(padded).decode()
+
+            # Check if it's a unified file ID format
+            if decoded.startswith(SpecialEnums.LITELM_MANAGED_FILE_ID_PREFIX.value):
+                # Extract llm_output_file_id from the decoded string
+                if "llm_output_file_id," in decoded:
+                    s3_uri = decoded.split("llm_output_file_id,")[1].split(";")[0]
+                    return s3_uri
+        except Exception:
+            pass
+
+        # If not base64 encoded or doesn't contain llm_output_file_id, assume it's already an S3 URI
+        if file_id.startswith("s3://"):
+            return file_id
+
+        # If it doesn't start with s3://, assume it's a direct S3 URI and add the prefix
+        return f"s3://{file_id}"
+
+    def _parse_s3_uri(self, s3_uri: str) -> Tuple[str, str]:
+        """
+        Parse S3 URI to extract bucket name and object key.
+
+        Args:
+            s3_uri: S3 URI (e.g., "s3://bucket-name/path/to/file")
+
+        Returns:
+            Tuple of (bucket_name, object_key)
+        """
+        if not s3_uri.startswith("s3://"):
+            raise ValueError(
+                f"Invalid S3 URI format: {s3_uri}. Expected format: s3://bucket-name/path/to/file"
+            )
+
+        # Remove 's3://' prefix
+        path = s3_uri[5:]
+
+        if "/" in path:
+            bucket_name, object_key = path.split("/", 1)
+        else:
+            bucket_name = path
+            object_key = ""
+
+        return bucket_name, object_key
+
+    async def afile_content(
+        self,
+        file_content_request: FileContentRequest,
+        optional_params: dict,
+        timeout: Union[float, httpx.Timeout],
+        max_retries: Optional[int],
+    ) -> HttpxBinaryResponseContent:
+        """
+        Download file content from S3 bucket for Bedrock files.
+
+        Args:
+            file_content_request: Contains file_id (encoded or S3 URI)
+            optional_params: Optional parameters containing AWS credentials
+            timeout: Request timeout
+            max_retries: Max retry attempts
+
+        Returns:
+            HttpxBinaryResponseContent: Binary content wrapped in compatible response format
+        """
+        import boto3
+        from botocore.credentials import Credentials
+
+        file_id = file_content_request.get("file_id")
+        if not file_id:
+            raise ValueError("file_id is required in file_content_request")
+
+        # Extract S3 URI from file ID
+        s3_uri = self._extract_s3_uri_from_file_id(file_id)
+        bucket_name, object_key = self._parse_s3_uri(s3_uri)
+
+        # Get AWS credentials
+        aws_region_name = self._get_aws_region_name(
+            optional_params=optional_params, model=""
+        )
+        credentials: Credentials = self.get_credentials(
+            aws_access_key_id=optional_params.get("aws_access_key_id"),
+            aws_secret_access_key=optional_params.get("aws_secret_access_key"),
+            aws_session_token=optional_params.get("aws_session_token"),
+            aws_region_name=aws_region_name,
+            aws_session_name=optional_params.get("aws_session_name"),
+            aws_profile_name=optional_params.get("aws_profile_name"),
+            aws_role_name=optional_params.get("aws_role_name"),
+            aws_web_identity_token=optional_params.get("aws_web_identity_token"),
+            aws_sts_endpoint=optional_params.get("aws_sts_endpoint"),
+        )
+
+        # Create S3 client
+        s3_client = boto3.client(
+            "s3",
+            aws_access_key_id=credentials.access_key,
+            aws_secret_access_key=credentials.secret_key,
+            aws_session_token=credentials.token,
+            region_name=aws_region_name,
+            verify=self._get_ssl_verify(),
+        )
+
+        # Download file from S3
+        try:
+            response = s3_client.get_object(Bucket=bucket_name, Key=object_key)
+            file_content = response["Body"].read()
+        except Exception as e:
+            raise ValueError(
+                f"Failed to download file from S3: {s3_uri}. Error: {str(e)}"
+            )
+
+        # Create mock HTTP response
+        mock_response = httpx.Response(
+            status_code=200,
+            content=file_content,
+            headers={"content-type": "application/octet-stream"},
+            request=httpx.Request(method="GET", url=s3_uri),
+        )
+
+        return HttpxBinaryResponseContent(response=mock_response)
+
+    def file_content(
+        self,
+        _is_async: bool,
+        file_content_request: FileContentRequest,
+        api_base: Optional[str],
+        optional_params: dict,
+        timeout: Union[float, httpx.Timeout],
+        max_retries: Optional[int],
+    ) -> Union[
+        HttpxBinaryResponseContent, Coroutine[Any, Any, HttpxBinaryResponseContent]
+    ]:
+        """
+        Download file content from S3 bucket for Bedrock files.
+        Supports both sync and async operations.
+
+        Args:
+            _is_async: Whether to run asynchronously
+            file_content_request: Contains file_id (encoded or S3 URI)
+            api_base: API base (unused for S3 operations)
+            optional_params: Optional parameters containing AWS credentials
+            timeout: Request timeout
+            max_retries: Max retry attempts
+
+        Returns:
+            HttpxBinaryResponseContent or Coroutine: Binary content wrapped in compatible response format
+        """
+        if _is_async:
+            return self.afile_content(
+                file_content_request=file_content_request,
+                optional_params=optional_params,
+                timeout=timeout,
+                max_retries=max_retries,
+            )
+        else:
+            return asyncio.run(
+                self.afile_content(
+                    file_content_request=file_content_request,
+                    optional_params=optional_params,
+                    timeout=timeout,
+                    max_retries=max_retries,
+                )
+            )
--- a/llm-gateway-competitors/litellm-wheel-src/litellm/llms/bedrock/files/transformation.py
+++ b/llm-gateway-competitors/litellm-wheel-src/litellm/llms/bedrock/files/transformation.py
@@ -0,0 +1,772 @@
+import json
+import os
+import time
+from typing import Any, Dict, List, Optional, Tuple, Union
+
+import httpx
+from httpx import Headers, Response
+from openai.types.file_deleted import FileDeleted
+
+from litellm._logging import verbose_logger
+from litellm._uuid import uuid
+from litellm.files.utils import FilesAPIUtils
+from litellm.litellm_core_utils.prompt_templates.common_utils import extract_file_data
+from litellm.llms.base_llm.chat.transformation import BaseLLMException
+from litellm.llms.base_llm.files.transformation import (
+    BaseFilesConfig,
+    LiteLLMLoggingObj,
+)
+from litellm.types.llms.openai import (
+    AllMessageValues,
+    CreateFileRequest,
+    FileTypes,
+    HttpxBinaryResponseContent,
+    OpenAICreateFileRequestOptionalParams,
+    OpenAIFileObject,
+    PathLike,
+)
+from litellm.types.utils import ExtractedFileData, LlmProviders
+from litellm.utils import get_llm_provider
+
+from ..base_aws_llm import BaseAWSLLM
+from ..common_utils import BedrockError
+
+
+class BedrockFilesConfig(BaseAWSLLM, BaseFilesConfig):
+    """
+    Config for Bedrock Files - handles S3 uploads for Bedrock batch processing
+    """
+
+    def __init__(self):
+        self.jsonl_transformation = BedrockJsonlFilesTransformation()
+        super().__init__()
+
+    @property
+    def custom_llm_provider(self) -> LlmProviders:
+        return LlmProviders.BEDROCK
+
+    @property
+    def file_upload_http_method(self) -> str:
+        """
+        Bedrock files are uploaded to S3, which requires PUT requests
+        """
+        return "PUT"
+
+    def validate_environment(
+        self,
+        headers: dict,
+        model: str,
+        messages: List[AllMessageValues],
+        optional_params: dict,
+        litellm_params: dict,
+        api_key: Optional[str] = None,
+        api_base: Optional[str] = None,
+    ) -> dict:
+        # No additional headers needed for S3 uploads - AWS credentials handled by BaseAWSLLM
+        return headers
+
+    def _get_content_from_openai_file(self, openai_file_content: FileTypes) -> str:
+        """
+        Helper to extract content from various OpenAI file types and return as string.
+
+        Handles:
+        - Direct content (str, bytes, IO[bytes])
+        - Tuple formats: (filename, content, [content_type], [headers])
+        - PathLike objects
+        """
+        content: Union[str, bytes] = b""
+        # Extract file content from tuple if necessary
+        if isinstance(openai_file_content, tuple):
+            # Take the second element which is always the file content
+            file_content = openai_file_content[1]
+        else:
+            file_content = openai_file_content
+
+        # Handle different file content types
+        if isinstance(file_content, str):
+            # String content can be used directly
+            content = file_content
+        elif isinstance(file_content, bytes):
+            # Bytes content can be decoded
+            content = file_content
+        elif isinstance(file_content, PathLike):  # PathLike
+            with open(str(file_content), "rb") as f:
+                content = f.read()
+        elif hasattr(file_content, "read"):  # IO[bytes]
+            # File-like objects need to be read
+            content = file_content.read()
+
+        # Ensure content is string
+        if isinstance(content, bytes):
+            content = content.decode("utf-8")
+
+        return content
+
+    def _get_s3_object_name_from_batch_jsonl(
+        self,
+        openai_jsonl_content: List[Dict[str, Any]],
+    ) -> str:
+        """
+        Gets a unique S3 object name for the Bedrock batch processing job
+
+        named as: litellm-bedrock-files/{model}/{uuid}
+        """
+        _model = openai_jsonl_content[0].get("body", {}).get("model", "")
+        # Remove bedrock/ prefix if present
+        if _model.startswith("bedrock/"):
+            _model = _model[8:]
+
+        # Replace colons with hyphens for Bedrock S3 URI compliance
+        _model = _model.replace(":", "-")
+
+        object_name = f"litellm-bedrock-files-{_model}-{uuid.uuid4()}.jsonl"
+        return object_name
+
+    def get_object_name(
+        self, extracted_file_data: ExtractedFileData, purpose: str
+    ) -> str:
+        """
+        Get the object name for the request
+        """
+        extracted_file_data_content = extracted_file_data.get("content")
+
+        if extracted_file_data_content is None:
+            raise ValueError("file content is required")
+
+        if purpose == "batch":
+            ## 1. If jsonl, check if there's a model name
+            file_content = self._get_content_from_openai_file(
+                extracted_file_data_content
+            )
+
+            # Split into lines and parse each line as JSON
+            openai_jsonl_content = [
+                json.loads(line) for line in file_content.splitlines() if line.strip()
+            ]
+            if len(openai_jsonl_content) > 0:
+                return self._get_s3_object_name_from_batch_jsonl(openai_jsonl_content)
+
+        ## 2. If not jsonl, return the filename
+        filename = extracted_file_data.get("filename")
+        if filename:
+            return filename
+        ## 3. If no file name, return timestamp
+        return str(int(time.time()))
+
+    def get_complete_file_url(
+        self,
+        api_base: Optional[str],
+        api_key: Optional[str],
+        model: str,
+        optional_params: Dict,
+        litellm_params: Dict,
+        data: CreateFileRequest,
+    ) -> str:
+        """
+        Get the complete S3 URL for the file upload request
+        """
+        bucket_name = litellm_params.get("s3_bucket_name") or os.getenv(
+            "AWS_S3_BUCKET_NAME"
+        )
+        if not bucket_name:
+            raise ValueError(
+                "S3 bucket_name is required. Set 's3_bucket_name' in litellm_params or AWS_S3_BUCKET_NAME env var"
+            )
+
+        aws_region_name = self._get_aws_region_name(optional_params, model)
+
+        file_data = data.get("file")
+        purpose = data.get("purpose")
+        if file_data is None:
+            raise ValueError("file is required")
+        if purpose is None:
+            raise ValueError("purpose is required")
+        extracted_file_data = extract_file_data(file_data)
+        object_name = self.get_object_name(extracted_file_data, purpose)
+
+        # S3 endpoint URL format
+        s3_endpoint_url = (
+            optional_params.get("s3_endpoint_url")
+            or f"https://s3.{aws_region_name}.amazonaws.com"
+        )
+
+        return f"{s3_endpoint_url}/{bucket_name}/{object_name}"
+
+    def get_supported_openai_params(
+        self, model: str
+    ) -> List[OpenAICreateFileRequestOptionalParams]:
+        return []
+
+    def map_openai_params(
+        self,
+        non_default_params: dict,
+        optional_params: dict,
+        model: str,
+        drop_params: bool,
+    ) -> dict:
+        return optional_params
+
+    # Providers whose InvokeModel body uses the Converse API format
+    # (messages + inferenceConfig + image blocks). Nova is the primary
+    # example; add others here as they adopt the same schema.
+    CONVERSE_INVOKE_PROVIDERS = ("nova",)
+
+    def _map_openai_to_bedrock_params(
+        self,
+        openai_request_body: Dict[str, Any],
+        provider: Optional[str] = None,
+    ) -> Dict[str, Any]:
+        """
+        Transform OpenAI request body to Bedrock-compatible modelInput
+        parameters using existing transformation logic.
+
+        Routes to the correct per-provider transformation so that the
+        resulting dict matches the InvokeModel body that Bedrock expects
+        for batch inference.
+        """
+        from litellm.types.utils import LlmProviders
+
+        _model = openai_request_body.get("model", "")
+        messages = openai_request_body.get("messages", [])
+        optional_params = {
+            k: v
+            for k, v in openai_request_body.items()
+            if k not in ["model", "messages"]
+        }
+
+        # --- Anthropic: use existing AmazonAnthropicClaudeConfig ---
+        if provider == LlmProviders.ANTHROPIC:
+            from litellm.llms.bedrock.chat.invoke_transformations.anthropic_claude3_transformation import (
+                AmazonAnthropicClaudeConfig,
+            )
+
+            config = AmazonAnthropicClaudeConfig()
+            mapped_params = config.map_openai_params(
+                non_default_params={},
+                optional_params=optional_params,
+                model=_model,
+                drop_params=False,
+            )
+            return config.transform_request(
+                model=_model,
+                messages=messages,
+                optional_params=mapped_params,
+                litellm_params={},
+                headers={},
+            )
+
+        # --- Converse API providers (e.g. Nova): use AmazonConverseConfig
+        #     to correctly convert image_url blocks to Bedrock image format
+        #     and wrap inference params inside inferenceConfig. ---
+        if provider in self.CONVERSE_INVOKE_PROVIDERS:
+            from litellm.llms.bedrock.chat.converse_transformation import (
+                AmazonConverseConfig,
+            )
+
+            converse_config = AmazonConverseConfig()
+            mapped_params = converse_config.map_openai_params(
+                non_default_params=optional_params,
+                optional_params={},
+                model=_model,
+                drop_params=False,
+            )
+            return converse_config.transform_request(
+                model=_model,
+                messages=messages,
+                optional_params=mapped_params,
+                litellm_params={},
+                headers={},
+            )
+
+        # --- All other providers: passthrough (OpenAI-compatible models
+        #     like openai.gpt-oss-*, qwen, deepseek, etc.) ---
+        return {
+            "messages": messages,
+            **optional_params,
+        }
+
+    def _transform_openai_jsonl_content_to_bedrock_jsonl_content(
+        self, openai_jsonl_content: List[Dict[str, Any]]
+    ) -> List[Dict[str, Any]]:
+        """
+        Transforms OpenAI JSONL content to Bedrock batch format
+
+        Bedrock batch format: { "recordId": "alphanumeric string", "modelInput": {JSON body} }
+        Example:
+        {
+            "recordId": "CALL0000001",
+            "modelInput": {
+                "anthropic_version": "bedrock-2023-05-31",
+                "max_tokens": 1024,
+                "messages": [
+                    {
+                        "role": "user",
+                        "content": [{"type": "text", "text": "Hello"}]
+                    }
+                ]
+            }
+        }
+        """
+
+        bedrock_jsonl_content = []
+        for idx, _openai_jsonl_content in enumerate(openai_jsonl_content):
+            # Extract the request body from OpenAI format
+            openai_body = _openai_jsonl_content.get("body", {})
+            model = openai_body.get("model", "")
+
+            try:
+                model, _, _, _ = get_llm_provider(
+                    model=model,
+                    custom_llm_provider=None,
+                )
+            except Exception as e:
+                verbose_logger.exception(
+                    f"litellm.llms.bedrock.files.transformation.py::_transform_openai_jsonl_content_to_bedrock_jsonl_content() - Error inferring custom_llm_provider - {str(e)}"
+                )
+
+            # Determine provider from model name
+            provider = self.get_bedrock_invoke_provider(model)
+
+            # Transform to Bedrock modelInput format
+            model_input = self._map_openai_to_bedrock_params(
+                openai_request_body=openai_body, provider=provider
+            )
+
+            # Create Bedrock batch record
+            record_id = _openai_jsonl_content.get(
+                "custom_id", f"CALL{str(idx).zfill(7)}"
+            )
+            bedrock_record = {"recordId": record_id, "modelInput": model_input}
+
+            bedrock_jsonl_content.append(bedrock_record)
+        return bedrock_jsonl_content
+
+    def transform_create_file_request(
+        self,
+        model: str,
+        create_file_data: CreateFileRequest,
+        optional_params: dict,
+        litellm_params: dict,
+    ) -> Union[bytes, str, dict]:
+        """
+        Transform file request and return a pre-signed request for S3.
+        This keeps the HTTP handler clean by doing all the signing here.
+        """
+        file_data = create_file_data.get("file")
+        if file_data is None:
+            raise ValueError("file is required")
+        extracted_file_data = extract_file_data(file_data)
+        extracted_file_data_content = extracted_file_data.get("content")
+
+        if extracted_file_data_content is None:
+            raise ValueError("file content is required")
+
+        # Get and transform the file content
+        if FilesAPIUtils.is_batch_jsonl_file(
+            create_file_data=create_file_data,
+            extracted_file_data=extracted_file_data,
+        ):
+            ## Transform JSONL content to Bedrock format
+            original_file_content = self._get_content_from_openai_file(
+                extracted_file_data_content
+            )
+            openai_jsonl_content = [
+                json.loads(line)
+                for line in original_file_content.splitlines()
+                if line.strip()
+            ]
+            bedrock_jsonl_content = (
+                self._transform_openai_jsonl_content_to_bedrock_jsonl_content(
+                    openai_jsonl_content
+                )
+            )
+            file_content = "\n".join(json.dumps(item) for item in bedrock_jsonl_content)
+        elif isinstance(extracted_file_data_content, bytes):
+            file_content = extracted_file_data_content.decode("utf-8")
+        elif isinstance(extracted_file_data_content, str):
+            file_content = extracted_file_data_content
+        else:
+            raise ValueError("Unsupported file content type")
+
+        # Get the S3 URL for upload
+        api_base = self.get_complete_file_url(
+            api_base=None,
+            api_key=None,
+            model=model,
+            optional_params=optional_params,
+            litellm_params=litellm_params,
+            data=create_file_data,
+        )
+
+        # Sign the request and return a pre-signed request object
+        signed_headers, signed_body = self._sign_s3_request(
+            content=file_content,
+            api_base=api_base,
+            optional_params=optional_params,
+        )
+
+        litellm_params["upload_url"] = api_base
+
+        # Return a dict that tells the HTTP handler exactly what to do
+        return {
+            "method": "PUT",
+            "url": api_base,
+            "headers": signed_headers,
+            "data": signed_body or file_content,
+        }
+
+    def _sign_s3_request(
+        self,
+        content: str,
+        api_base: str,
+        optional_params: dict,
+    ) -> Tuple[dict, str]:
+        """
+        Sign S3 PUT request using the same proven logic as S3Logger.
+        Reuses the exact pattern from litellm/integrations/s3_v2.py
+        """
+        try:
+            import hashlib
+
+            import requests
+            from botocore.auth import SigV4Auth
+            from botocore.awsrequest import AWSRequest
+        except ImportError:
+            raise ImportError("Missing boto3 to call bedrock. Run 'pip install boto3'.")
+
+        # Get AWS credentials using existing methods
+        aws_region_name = self._get_aws_region_name(
+            optional_params=optional_params, model=""
+        )
+        credentials = self.get_credentials(
+            aws_access_key_id=optional_params.get("aws_access_key_id"),
+            aws_secret_access_key=optional_params.get("aws_secret_access_key"),
+            aws_session_token=optional_params.get("aws_session_token"),
+            aws_region_name=aws_region_name,
+            aws_session_name=optional_params.get("aws_session_name"),
+            aws_profile_name=optional_params.get("aws_profile_name"),
+            aws_role_name=optional_params.get("aws_role_name"),
+            aws_web_identity_token=optional_params.get("aws_web_identity_token"),
+            aws_sts_endpoint=optional_params.get("aws_sts_endpoint"),
+        )
+
+        # Calculate SHA256 hash of the content (REQUIRED for S3)
+        content_hash = hashlib.sha256(content.encode("utf-8")).hexdigest()
+
+        # Prepare headers with required S3 headers (same as s3_v2.py)
+        request_headers = {
+            "Content-Type": "application/json",  # JSONL files are JSON content
+            "x-amz-content-sha256": content_hash,  # REQUIRED by S3
+            "Content-Language": "en",
+            "Cache-Control": "private, immutable, max-age=31536000, s-maxage=0",
+        }
+
+        # Use requests.Request to prepare the request (same pattern as s3_v2.py)
+        req = requests.Request("PUT", api_base, data=content, headers=request_headers)
+        prepped = req.prepare()
+
+        # Sign the request with S3 service
+        aws_request = AWSRequest(
+            method=prepped.method,
+            url=prepped.url,
+            data=prepped.body,
+            headers=prepped.headers,
+        )
+
+        # Get region name for non-LLM API calls (same as s3_v2.py)
+        signing_region = self.get_aws_region_name_for_non_llm_api_calls(
+            aws_region_name=aws_region_name
+        )
+
+        SigV4Auth(credentials, "s3", signing_region).add_auth(aws_request)
+
+        # Return signed headers and body
+        signed_body = aws_request.body
+        if isinstance(signed_body, bytes):
+            signed_body = signed_body.decode("utf-8")
+        elif signed_body is None:
+            signed_body = content  # Fallback to original content
+
+        return dict(aws_request.headers), signed_body
+
+    def _convert_https_url_to_s3_uri(self, https_url: str) -> tuple[str, str]:
+        """
+        Convert HTTPS S3 URL to s3:// URI format.
+
+        Args:
+            https_url: HTTPS S3 URL (e.g., "https://s3.us-west-2.amazonaws.com/bucket/key")
+
+        Returns:
+            Tuple of (s3_uri, filename)
+
+        Example:
+            Input: "https://s3.us-west-2.amazonaws.com/litellm-proxy/file.jsonl"
+            Output: ("s3://litellm-proxy/file.jsonl", "file.jsonl")
+        """
+        import re
+
+        # Match HTTPS S3 URL patterns
+        # Pattern 1: https://s3.region.amazonaws.com/bucket/key
+        # Pattern 2: https://bucket.s3.region.amazonaws.com/key
+
+        pattern1 = r"https://s3\.([^.]+)\.amazonaws\.com/([^/]+)/(.+)"
+        pattern2 = r"https://([^.]+)\.s3\.([^.]+)\.amazonaws\.com/(.+)"
+
+        match1 = re.match(pattern1, https_url)
+        match2 = re.match(pattern2, https_url)
+
+        if match1:
+            # Pattern: https://s3.region.amazonaws.com/bucket/key
+            region, bucket, key = match1.groups()
+            s3_uri = f"s3://{bucket}/{key}"
+        elif match2:
+            # Pattern: https://bucket.s3.region.amazonaws.com/key
+            bucket, region, key = match2.groups()
+            s3_uri = f"s3://{bucket}/{key}"
+        else:
+            # Fallback: try to extract bucket and key from URL path
+            from urllib.parse import urlparse
+
+            parsed = urlparse(https_url)
+            path_parts = parsed.path.lstrip("/").split("/", 1)
+            if len(path_parts) >= 2:
+                bucket, key = path_parts[0], path_parts[1]
+                s3_uri = f"s3://{bucket}/{key}"
+            else:
+                raise ValueError(f"Unable to parse S3 URL: {https_url}")
+
+        # Extract filename from key
+        filename = key.split("/")[-1] if "/" in key else key
+
+        return s3_uri, filename
+
+    def transform_create_file_response(
+        self,
+        model: Optional[str],
+        raw_response: Response,
+        logging_obj: LiteLLMLoggingObj,
+        litellm_params: dict,
+    ) -> OpenAIFileObject:
+        """
+        Transform S3 File upload response into OpenAI-style FileObject
+        """
+        # For S3 uploads, we typically get an ETag and other metadata
+        response_headers = raw_response.headers
+        # Extract S3 object information from the response
+        # S3 PUT object returns ETag and other metadata in headers
+        content_length = response_headers.get("Content-Length", "0")
+
+        # Use the actual upload URL that was used for the S3 upload
+        upload_url = litellm_params.get("upload_url")
+        file_id: str = ""
+        filename: str = ""
+        if upload_url:
+            # Convert HTTPS S3 URL to s3:// URI format
+            file_id, filename = self._convert_https_url_to_s3_uri(upload_url)
+
+        return OpenAIFileObject(
+            purpose="batch",  # Default purpose for Bedrock files
+            id=file_id,
+            filename=filename,
+            created_at=int(time.time()),  # Current timestamp
+            status="uploaded",
+            bytes=int(content_length) if content_length.isdigit() else 0,
+            object="file",
+        )
+
+    def get_error_class(
+        self, error_message: str, status_code: int, headers: Union[Dict, Headers]
+    ) -> BaseLLMException:
+        return BedrockError(
+            status_code=status_code, message=error_message, headers=headers
+        )
+
+    def transform_retrieve_file_request(
+        self,
+        file_id: str,
+        optional_params: dict,
+        litellm_params: dict,
+    ) -> tuple[str, dict]:
+        raise NotImplementedError("BedrockFilesConfig does not support file retrieval")
+
+    def transform_retrieve_file_response(
+        self,
+        raw_response: httpx.Response,
+        logging_obj: LiteLLMLoggingObj,
+        litellm_params: dict,
+    ) -> OpenAIFileObject:
+        raise NotImplementedError("BedrockFilesConfig does not support file retrieval")
+
+    def transform_delete_file_request(
+        self,
+        file_id: str,
+        optional_params: dict,
+        litellm_params: dict,
+    ) -> tuple[str, dict]:
+        raise NotImplementedError("BedrockFilesConfig does not support file deletion")
+
+    def transform_delete_file_response(
+        self,
+        raw_response: httpx.Response,
+        logging_obj: LiteLLMLoggingObj,
+        litellm_params: dict,
+    ) -> FileDeleted:
+        raise NotImplementedError("BedrockFilesConfig does not support file deletion")
+
+    def transform_list_files_request(
+        self,
+        purpose: Optional[str],
+        optional_params: dict,
+        litellm_params: dict,
+    ) -> tuple[str, dict]:
+        raise NotImplementedError("BedrockFilesConfig does not support file listing")
+
+    def transform_list_files_response(
+        self,
+        raw_response: httpx.Response,
+        logging_obj: LiteLLMLoggingObj,
+        litellm_params: dict,
+    ) -> List[OpenAIFileObject]:
+        raise NotImplementedError("BedrockFilesConfig does not support file listing")
+
+    def transform_file_content_request(
+        self,
+        file_content_request,
+        optional_params: dict,
+        litellm_params: dict,
+    ) -> tuple[str, dict]:
+        raise NotImplementedError(
+            "BedrockFilesConfig does not support file content retrieval"
+        )
+
+    def transform_file_content_response(
+        self,
+        raw_response: httpx.Response,
+        logging_obj: LiteLLMLoggingObj,
+        litellm_params: dict,
+    ) -> HttpxBinaryResponseContent:
+        raise NotImplementedError(
+            "BedrockFilesConfig does not support file content retrieval"
+        )
+
+
+class BedrockJsonlFilesTransformation:
+    """
+    Transforms OpenAI /v1/files/* requests to Bedrock S3 file uploads for batch processing
+    """
+
+    def transform_openai_file_content_to_bedrock_file_content(
+        self, openai_file_content: Optional[FileTypes] = None
+    ) -> Tuple[str, str]:
+        """
+        Transforms OpenAI FileContentRequest to Bedrock S3 file format
+        """
+
+        if openai_file_content is None:
+            raise ValueError("contents of file are None")
+        # Read the content of the file
+        file_content = self._get_content_from_openai_file(openai_file_content)
+
+        # Split into lines and parse each line as JSON
+        openai_jsonl_content = [
+            json.loads(line) for line in file_content.splitlines() if line.strip()
+        ]
+        bedrock_jsonl_content = (
+            self._transform_openai_jsonl_content_to_bedrock_jsonl_content(
+                openai_jsonl_content
+            )
+        )
+        bedrock_jsonl_string = "\n".join(
+            json.dumps(item) for item in bedrock_jsonl_content
+        )
+        object_name = self._get_s3_object_name(
+            openai_jsonl_content=openai_jsonl_content
+        )
+        return bedrock_jsonl_string, object_name
+
+    def _transform_openai_jsonl_content_to_bedrock_jsonl_content(
+        self, openai_jsonl_content: List[Dict[str, Any]]
+    ):
+        """
+        Delegate to the main BedrockFilesConfig transformation method
+        """
+        config = BedrockFilesConfig()
+        return config._transform_openai_jsonl_content_to_bedrock_jsonl_content(
+            openai_jsonl_content
+        )
+
+    def _get_s3_object_name(
+        self,
+        openai_jsonl_content: List[Dict[str, Any]],
+    ) -> str:
+        """
+        Gets a unique S3 object name for the Bedrock batch processing job
+
+        named as: litellm-bedrock-files-{model}-{uuid}
+        """
+        _model = openai_jsonl_content[0].get("body", {}).get("model", "")
+        # Remove bedrock/ prefix if present
+        if _model.startswith("bedrock/"):
+            _model = _model[8:]
+        object_name = f"litellm-bedrock-files-{_model}-{uuid.uuid4()}.jsonl"
+        return object_name
+
+    def _get_content_from_openai_file(self, openai_file_content: FileTypes) -> str:
+        """
+        Helper to extract content from various OpenAI file types and return as string.
+
+        Handles:
+        - Direct content (str, bytes, IO[bytes])
+        - Tuple formats: (filename, content, [content_type], [headers])
+        - PathLike objects
+        """
+        content: Union[str, bytes] = b""
+        # Extract file content from tuple if necessary
+        if isinstance(openai_file_content, tuple):
+            # Take the second element which is always the file content
+            file_content = openai_file_content[1]
+        else:
+            file_content = openai_file_content
+
+        # Handle different file content types
+        if isinstance(file_content, str):
+            # String content can be used directly
+            content = file_content
+        elif isinstance(file_content, bytes):
+            # Bytes content can be decoded
+            content = file_content
+        elif isinstance(file_content, PathLike):  # PathLike
+            with open(str(file_content), "rb") as f:
+                content = f.read()
+        elif hasattr(file_content, "read"):  # IO[bytes]
+            # File-like objects need to be read
+            content = file_content.read()
+
+        # Ensure content is string
+        if isinstance(content, bytes):
+            content = content.decode("utf-8")
+
+        return content
+
+    def transform_s3_bucket_response_to_openai_file_object(
+        self, create_file_data: CreateFileRequest, s3_upload_response: Dict[str, Any]
+    ) -> OpenAIFileObject:
+        """
+        Transforms S3 Bucket upload file response to OpenAI FileObject
+        """
+        # S3 response typically contains ETag, key, etc.
+        object_key = s3_upload_response.get("Key", "")
+        bucket_name = s3_upload_response.get("Bucket", "")
+
+        # Extract filename from object key
+        filename = object_key.split("/")[-1] if "/" in object_key else object_key
+
+        return OpenAIFileObject(
+            purpose=create_file_data.get("purpose", "batch"),
+            id=f"s3://{bucket_name}/{object_key}",
+            filename=filename,
+            created_at=int(time.time()),  # Current timestamp
+            status="uploaded",
+            bytes=s3_upload_response.get("ContentLength", 0),
+            object="file",
+        )