from functools import lru_cache from typing import Optional from litellm.constants import _REALTIME_BODY_CACHE_SIZE @lru_cache(maxsize=_REALTIME_BODY_CACHE_SIZE) def _realtime_request_body(model: Optional[str]) -> bytes: """ Generate the realtime websocket request body. Cached with LRU semantics to avoid repeated string formatting work while keeping memory usage bounded. """ return f'{{"model": "{model or ""}"}}'.encode()