llm-gateway-competitors/litellm-wheel-src/litellm/proxy/response_polling/polling_handler.py

"""
Response Polling Handler for Background Responses with Cache
"""
import json
from datetime import datetime, timezone
from typing import Any, Dict, List, Optional

from litellm._logging import verbose_proxy_logger
from litellm._uuid import uuid4
from litellm.caching.redis_cache import RedisCache
from litellm.types.llms.openai import ResponsesAPIResponse, ResponsesAPIStatus


class ResponsePollingHandler:
    """Handles polling-based responses with Redis cache"""

    CACHE_KEY_PREFIX = "litellm:polling:response:"
    POLLING_ID_PREFIX = "litellm_poll_"  # Clear prefix to identify polling IDs

    def __init__(self, redis_cache: Optional[RedisCache] = None, ttl: int = 3600):
        self.redis_cache = redis_cache
        self.ttl = ttl  # Time-to-live for cache entries (default: 1 hour)

    @classmethod
    def generate_polling_id(cls) -> str:
        """Generate a unique UUID for polling with clear prefix"""
        return f"{cls.POLLING_ID_PREFIX}{uuid4()}"

    @classmethod
    def is_polling_id(cls, response_id: str) -> bool:
        """Check if a response_id is a polling ID"""
        return response_id.startswith(cls.POLLING_ID_PREFIX)

    @classmethod
    def get_cache_key(cls, polling_id: str) -> str:
        """Get Redis cache key for a polling ID"""
        return f"{cls.CACHE_KEY_PREFIX}{polling_id}"

    async def create_initial_state(
        self,
        polling_id: str,
        request_data: Dict[str, Any],
    ) -> ResponsesAPIResponse:
        """
        Create initial state in Redis for a polling request

        Uses OpenAI ResponsesAPIResponse object:
        https://platform.openai.com/docs/api-reference/responses/object

        Args:
            polling_id: Unique identifier for this polling request
            request_data: Original request data

        Returns:
            ResponsesAPIResponse object following OpenAI spec
        """
        created_timestamp = int(datetime.now(timezone.utc).timestamp())

        # Create OpenAI-compliant response object
        response = ResponsesAPIResponse(
            id=polling_id,
            object="response",
            status="queued",  # OpenAI native status
            created_at=created_timestamp,
            output=[],
            metadata=request_data.get("metadata", {}),
            usage=None,
        )

        cache_key = self.get_cache_key(polling_id)

        if self.redis_cache:
            # Store ResponsesAPIResponse directly in Redis
            await self.redis_cache.async_set_cache(
                key=cache_key,
                value=response.model_dump_json(),  # Pydantic v2 method
                ttl=self.ttl,
            )
            verbose_proxy_logger.debug(
                f"Created initial polling state for {polling_id} with TTL={self.ttl}s"
            )

        return response

    async def update_state(
        self,
        polling_id: str,
        status: Optional[ResponsesAPIStatus] = None,
        usage: Optional[Dict] = None,
        error: Optional[Dict] = None,
        incomplete_details: Optional[Dict] = None,
        reasoning: Optional[Dict] = None,
        tool_choice: Optional[Any] = None,
        tools: Optional[list] = None,
        output: Optional[list] = None,
        # Additional ResponsesAPIResponse fields
        model: Optional[str] = None,
        instructions: Optional[str] = None,
        temperature: Optional[float] = None,
        top_p: Optional[float] = None,
        max_output_tokens: Optional[int] = None,
        previous_response_id: Optional[str] = None,
        text: Optional[Dict] = None,
        truncation: Optional[str] = None,
        parallel_tool_calls: Optional[bool] = None,
        user: Optional[str] = None,
        store: Optional[bool] = None,
    ) -> None:
        """
        Update the polling state in Redis

        Uses OpenAI Response object format with native status types:
        https://platform.openai.com/docs/api-reference/responses/object

        Args:
            polling_id: Unique identifier for this polling request
            status: OpenAI ResponsesAPIStatus value
            usage: Usage information
            error: Error dict (automatically sets status to "failed")
            incomplete_details: Details for incomplete responses
            reasoning: Reasoning configuration from response.completed
            tool_choice: Tool choice configuration from response.completed
            tools: Tools list from response.completed
            output: Full output list to replace current output
            model: Model identifier
            instructions: System instructions
            temperature: Sampling temperature
            top_p: Nucleus sampling parameter
            max_output_tokens: Maximum output tokens
            previous_response_id: ID of previous response in conversation
            text: Text configuration
            truncation: Truncation setting
            parallel_tool_calls: Whether parallel tool calls are enabled
            user: User identifier
            store: Whether to store the response
        """
        if not self.redis_cache:
            return

        cache_key = self.get_cache_key(polling_id)

        # Get current state
        cached_state = await self.redis_cache.async_get_cache(cache_key)
        if not cached_state:
            verbose_proxy_logger.warning(
                f"No cached state found for polling_id: {polling_id}"
            )
            return

        # Parse existing ResponsesAPIResponse from cache
        state = json.loads(cached_state)

        # Update status (using OpenAI native status values)
        if status:
            state["status"] = status

        # Replace full output list if provided
        if output is not None:
            state["output"] = output

        # Update usage
        if usage:
            state["usage"] = usage

        # Handle error (sets status to OpenAI's "failed")
        if error:
            state["status"] = "failed"
            state["error"] = error  # Use OpenAI's 'error' field

        # Handle incomplete details
        if incomplete_details:
            state["incomplete_details"] = incomplete_details

        # Update reasoning, tool_choice, tools from response.completed
        if reasoning is not None:
            state["reasoning"] = reasoning
        if tool_choice is not None:
            state["tool_choice"] = tool_choice
        if tools is not None:
            state["tools"] = tools

        # Update additional ResponsesAPIResponse fields
        if model is not None:
            state["model"] = model
        if instructions is not None:
            state["instructions"] = instructions
        if temperature is not None:
            state["temperature"] = temperature
        if top_p is not None:
            state["top_p"] = top_p
        if max_output_tokens is not None:
            state["max_output_tokens"] = max_output_tokens
        if previous_response_id is not None:
            state["previous_response_id"] = previous_response_id
        if text is not None:
            state["text"] = text
        if truncation is not None:
            state["truncation"] = truncation
        if parallel_tool_calls is not None:
            state["parallel_tool_calls"] = parallel_tool_calls
        if user is not None:
            state["user"] = user
        if store is not None:
            state["store"] = store

        # Update cache with configured TTL
        await self.redis_cache.async_set_cache(
            key=cache_key,
            value=json.dumps(state),
            ttl=self.ttl,
        )

        output_count = len(state.get("output", []))
        verbose_proxy_logger.debug(
            f"Updated polling state for {polling_id}: status={state['status']}, output_items={output_count}"
        )

    async def get_state(self, polling_id: str) -> Optional[Dict[str, Any]]:
        """Get current polling state from Redis"""
        if not self.redis_cache:
            return None

        cache_key = self.get_cache_key(polling_id)
        cached_state = await self.redis_cache.async_get_cache(cache_key)

        if cached_state:
            return json.loads(cached_state)

        return None

    async def cancel_polling(self, polling_id: str) -> bool:
        """
        Cancel a polling request

        Following OpenAI Response object format for cancelled status
        """
        await self.update_state(
            polling_id=polling_id,
            status="cancelled",
        )
        return True

    async def delete_polling(self, polling_id: str) -> bool:
        """Delete a polling request from cache"""
        if not self.redis_cache:
            return False

        cache_key = self.get_cache_key(polling_id)
        # Use RedisCache's async_delete_cache method which handles Redis/RedisCluster
        await self.redis_cache.async_delete_cache(cache_key)
        return True


def should_use_polling_for_request(
    background_mode: bool,
    polling_via_cache_enabled,  # Can be False, "all", or List[str]
    redis_cache,  # RedisCache or None
    model: str,
    llm_router,  # Router instance or None
    native_background_mode: Optional[
        List[str]
    ] = None,  # List of models that should use native background mode
) -> bool:
    """
    Determine if polling via cache should be used for a request.

    Args:
        background_mode: Whether background=true was set in the request
        polling_via_cache_enabled: Config value - False, "all", or list of providers
        redis_cache: Redis cache instance (required for polling)
        model: Model name from the request (e.g., "gpt-5" or "openai/gpt-4o")
        llm_router: LiteLLM router instance for looking up model deployments
        native_background_mode: List of model names that should use native provider
            background mode instead of polling via cache

    Returns:
        True if polling should be used, False otherwise
    """
    # All conditions must be met
    if not (background_mode and polling_via_cache_enabled and redis_cache):
        return False

    # Check if model is in native_background_mode list - these use native provider background mode
    if native_background_mode and model in native_background_mode:
        verbose_proxy_logger.debug(
            f"Model {model} is in native_background_mode list, skipping polling via cache"
        )
        return False

    # "all" enables polling for all providers
    if polling_via_cache_enabled == "all":
        return True

    # Check if provider is in the enabled list
    if isinstance(polling_via_cache_enabled, list):
        # First, try to get provider from model string format "provider/model"
        if "/" in model:
            provider = model.split("/")[0]
            if provider in polling_via_cache_enabled:
                return True
        # Otherwise, check ALL deployments for this model_name in router
        elif llm_router is not None:
            try:
                # Get all deployment indices for this model name
                indices = llm_router.model_name_to_deployment_indices.get(model, [])
                for idx in indices:
                    deployment_dict = llm_router.model_list[idx]
                    litellm_params = deployment_dict.get("litellm_params", {})

                    # Check custom_llm_provider first
                    dep_provider = litellm_params.get("custom_llm_provider")

                    # Then try to extract from model (e.g., "openai/gpt-5")
                    if not dep_provider:
                        dep_model = litellm_params.get("model", "")
                        if "/" in dep_model:
                            dep_provider = dep_model.split("/")[0]

                    # If ANY deployment's provider matches, enable polling
                    if dep_provider and dep_provider in polling_via_cache_enabled:
                        verbose_proxy_logger.debug(
                            f"Polling enabled for model={model}, provider={dep_provider}"
                        )
                        return True
            except Exception as e:
                verbose_proxy_logger.debug(
                    f"Could not resolve provider for model {model}: {e}"
                )

    return False
chore: initial public snapshot for github upload 2026-03-26 20:06:14 +08:00			`"""`
			`Response Polling Handler for Background Responses with Cache`
			`"""`
			`import json`
			`from datetime import datetime, timezone`
			`from typing import Any, Dict, List, Optional`

			`from litellm._logging import verbose_proxy_logger`
			`from litellm._uuid import uuid4`
			`from litellm.caching.redis_cache import RedisCache`
			`from litellm.types.llms.openai import ResponsesAPIResponse, ResponsesAPIStatus`


			`class ResponsePollingHandler:`
			`"""Handles polling-based responses with Redis cache"""`

			`CACHE_KEY_PREFIX = "litellm:polling:response:"`
			`POLLING_ID_PREFIX = "litellm_poll_" # Clear prefix to identify polling IDs`

			`def __init__(self, redis_cache: Optional[RedisCache] = None, ttl: int = 3600):`
			`self.redis_cache = redis_cache`
			`self.ttl = ttl # Time-to-live for cache entries (default: 1 hour)`

			`@classmethod`
			`def generate_polling_id(cls) -> str:`
			`"""Generate a unique UUID for polling with clear prefix"""`
			`return f"{cls.POLLING_ID_PREFIX}{uuid4()}"`

			`@classmethod`
			`def is_polling_id(cls, response_id: str) -> bool:`
			`"""Check if a response_id is a polling ID"""`
			`return response_id.startswith(cls.POLLING_ID_PREFIX)`

			`@classmethod`
			`def get_cache_key(cls, polling_id: str) -> str:`
			`"""Get Redis cache key for a polling ID"""`
			`return f"{cls.CACHE_KEY_PREFIX}{polling_id}"`

			`async def create_initial_state(`
			`self,`
			`polling_id: str,`
			`request_data: Dict[str, Any],`
			`) -> ResponsesAPIResponse:`
			`"""`
			`Create initial state in Redis for a polling request`

			`Uses OpenAI ResponsesAPIResponse object:`
			`https://platform.openai.com/docs/api-reference/responses/object`

			`Args:`
			`polling_id: Unique identifier for this polling request`
			`request_data: Original request data`

			`Returns:`
			`ResponsesAPIResponse object following OpenAI spec`
			`"""`
			`created_timestamp = int(datetime.now(timezone.utc).timestamp())`

			`# Create OpenAI-compliant response object`
			`response = ResponsesAPIResponse(`
			`id=polling_id,`
			`object="response",`
			`status="queued", # OpenAI native status`
			`created_at=created_timestamp,`
			`output=[],`
			`metadata=request_data.get("metadata", {}),`
			`usage=None,`
			`)`

			`cache_key = self.get_cache_key(polling_id)`

			`if self.redis_cache:`
			`# Store ResponsesAPIResponse directly in Redis`
			`await self.redis_cache.async_set_cache(`
			`key=cache_key,`
			`value=response.model_dump_json(), # Pydantic v2 method`
			`ttl=self.ttl,`
			`)`
			`verbose_proxy_logger.debug(`
			`f"Created initial polling state for {polling_id} with TTL={self.ttl}s"`
			`)`

			`return response`

			`async def update_state(`
			`self,`
			`polling_id: str,`
			`status: Optional[ResponsesAPIStatus] = None,`
			`usage: Optional[Dict] = None,`
			`error: Optional[Dict] = None,`
			`incomplete_details: Optional[Dict] = None,`
			`reasoning: Optional[Dict] = None,`
			`tool_choice: Optional[Any] = None,`
			`tools: Optional[list] = None,`
			`output: Optional[list] = None,`
			`# Additional ResponsesAPIResponse fields`
			`model: Optional[str] = None,`
			`instructions: Optional[str] = None,`
			`temperature: Optional[float] = None,`
			`top_p: Optional[float] = None,`
			`max_output_tokens: Optional[int] = None,`
			`previous_response_id: Optional[str] = None,`
			`text: Optional[Dict] = None,`
			`truncation: Optional[str] = None,`
			`parallel_tool_calls: Optional[bool] = None,`
			`user: Optional[str] = None,`
			`store: Optional[bool] = None,`
			`) -> None:`
			`"""`
			`Update the polling state in Redis`

			`Uses OpenAI Response object format with native status types:`
			`https://platform.openai.com/docs/api-reference/responses/object`

			`Args:`
			`polling_id: Unique identifier for this polling request`
			`status: OpenAI ResponsesAPIStatus value`
			`usage: Usage information`
			`error: Error dict (automatically sets status to "failed")`
			`incomplete_details: Details for incomplete responses`
			`reasoning: Reasoning configuration from response.completed`
			`tool_choice: Tool choice configuration from response.completed`
			`tools: Tools list from response.completed`
			`output: Full output list to replace current output`
			`model: Model identifier`
			`instructions: System instructions`
			`temperature: Sampling temperature`
			`top_p: Nucleus sampling parameter`
			`max_output_tokens: Maximum output tokens`
			`previous_response_id: ID of previous response in conversation`
			`text: Text configuration`
			`truncation: Truncation setting`
			`parallel_tool_calls: Whether parallel tool calls are enabled`
			`user: User identifier`
			`store: Whether to store the response`
			`"""`
			`if not self.redis_cache:`
			`return`

			`cache_key = self.get_cache_key(polling_id)`

			`# Get current state`
			`cached_state = await self.redis_cache.async_get_cache(cache_key)`
			`if not cached_state:`
			`verbose_proxy_logger.warning(`
			`f"No cached state found for polling_id: {polling_id}"`
			`)`
			`return`

			`# Parse existing ResponsesAPIResponse from cache`
			`state = json.loads(cached_state)`

			`# Update status (using OpenAI native status values)`
			`if status:`
			`state["status"] = status`

			`# Replace full output list if provided`
			`if output is not None:`
			`state["output"] = output`

			`# Update usage`
			`if usage:`
			`state["usage"] = usage`

			`# Handle error (sets status to OpenAI's "failed")`
			`if error:`
			`state["status"] = "failed"`
			`state["error"] = error # Use OpenAI's 'error' field`

			`# Handle incomplete details`
			`if incomplete_details:`
			`state["incomplete_details"] = incomplete_details`

			`# Update reasoning, tool_choice, tools from response.completed`
			`if reasoning is not None:`
			`state["reasoning"] = reasoning`
			`if tool_choice is not None:`
			`state["tool_choice"] = tool_choice`
			`if tools is not None:`
			`state["tools"] = tools`

			`# Update additional ResponsesAPIResponse fields`
			`if model is not None:`
			`state["model"] = model`
			`if instructions is not None:`
			`state["instructions"] = instructions`
			`if temperature is not None:`
			`state["temperature"] = temperature`
			`if top_p is not None:`
			`state["top_p"] = top_p`
			`if max_output_tokens is not None:`
			`state["max_output_tokens"] = max_output_tokens`
			`if previous_response_id is not None:`
			`state["previous_response_id"] = previous_response_id`
			`if text is not None:`
			`state["text"] = text`
			`if truncation is not None:`
			`state["truncation"] = truncation`
			`if parallel_tool_calls is not None:`
			`state["parallel_tool_calls"] = parallel_tool_calls`
			`if user is not None:`
			`state["user"] = user`
			`if store is not None:`
			`state["store"] = store`

			`# Update cache with configured TTL`
			`await self.redis_cache.async_set_cache(`
			`key=cache_key,`
			`value=json.dumps(state),`
			`ttl=self.ttl,`
			`)`

			`output_count = len(state.get("output", []))`
			`verbose_proxy_logger.debug(`
			`f"Updated polling state for {polling_id}: status={state['status']}, output_items={output_count}"`
			`)`

			`async def get_state(self, polling_id: str) -> Optional[Dict[str, Any]]:`
			`"""Get current polling state from Redis"""`
			`if not self.redis_cache:`
			`return None`

			`cache_key = self.get_cache_key(polling_id)`
			`cached_state = await self.redis_cache.async_get_cache(cache_key)`

			`if cached_state:`
			`return json.loads(cached_state)`

			`return None`

			`async def cancel_polling(self, polling_id: str) -> bool:`
			`"""`
			`Cancel a polling request`

			`Following OpenAI Response object format for cancelled status`
			`"""`
			`await self.update_state(`
			`polling_id=polling_id,`
			`status="cancelled",`
			`)`
			`return True`

			`async def delete_polling(self, polling_id: str) -> bool:`
			`"""Delete a polling request from cache"""`
			`if not self.redis_cache:`
			`return False`

			`cache_key = self.get_cache_key(polling_id)`
			`# Use RedisCache's async_delete_cache method which handles Redis/RedisCluster`
			`await self.redis_cache.async_delete_cache(cache_key)`
			`return True`


			`def should_use_polling_for_request(`
			`background_mode: bool,`
			`polling_via_cache_enabled, # Can be False, "all", or List[str]`
			`redis_cache, # RedisCache or None`
			`model: str,`
			`llm_router, # Router instance or None`
			`native_background_mode: Optional[`
			`List[str]`
			`] = None, # List of models that should use native background mode`
			`) -> bool:`
			`"""`
			`Determine if polling via cache should be used for a request.`

			`Args:`
			`background_mode: Whether background=true was set in the request`
			`polling_via_cache_enabled: Config value - False, "all", or list of providers`
			`redis_cache: Redis cache instance (required for polling)`
			`model: Model name from the request (e.g., "gpt-5" or "openai/gpt-4o")`
			`llm_router: LiteLLM router instance for looking up model deployments`
			`native_background_mode: List of model names that should use native provider`
			`background mode instead of polling via cache`

			`Returns:`
			`True if polling should be used, False otherwise`
			`"""`
			`# All conditions must be met`
			`if not (background_mode and polling_via_cache_enabled and redis_cache):`
			`return False`

			`# Check if model is in native_background_mode list - these use native provider background mode`
			`if native_background_mode and model in native_background_mode:`
			`verbose_proxy_logger.debug(`
			`f"Model {model} is in native_background_mode list, skipping polling via cache"`
			`)`
			`return False`

			`# "all" enables polling for all providers`
			`if polling_via_cache_enabled == "all":`
			`return True`

			`# Check if provider is in the enabled list`
			`if isinstance(polling_via_cache_enabled, list):`
			`# First, try to get provider from model string format "provider/model"`
			`if "/" in model:`
			`provider = model.split("/")[0]`
			`if provider in polling_via_cache_enabled:`
			`return True`
			`# Otherwise, check ALL deployments for this model_name in router`
			`elif llm_router is not None:`
			`try:`
			`# Get all deployment indices for this model name`
			`indices = llm_router.model_name_to_deployment_indices.get(model, [])`
			`for idx in indices:`
			`deployment_dict = llm_router.model_list[idx]`
			`litellm_params = deployment_dict.get("litellm_params", {})`

			`# Check custom_llm_provider first`
			`dep_provider = litellm_params.get("custom_llm_provider")`

			`# Then try to extract from model (e.g., "openai/gpt-5")`
			`if not dep_provider:`
			`dep_model = litellm_params.get("model", "")`
			`if "/" in dep_model:`
			`dep_provider = dep_model.split("/")[0]`

			`# If ANY deployment's provider matches, enable polling`
			`if dep_provider and dep_provider in polling_via_cache_enabled:`
			`verbose_proxy_logger.debug(`
			`f"Polling enabled for model={model}, provider={dep_provider}"`
			`)`
			`return True`
			`except Exception as e:`
			`verbose_proxy_logger.debug(`
			`f"Could not resolve provider for model {model}: {e}"`
			`)`

			`return False`