lijiaoqiao/llm-gateway-competitors/litellm-wheel-src/litellm/llms/duckduckgo/search/transformation.py

"""
Calls DuckDuckGo's Instant Answer API to search the web.

DuckDuckGo API Reference: https://duckduckgo.com/api
"""
from typing import Dict, List, Literal, Optional, TypedDict, Union
from urllib.parse import urlencode

import httpx

from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
from litellm.llms.base_llm.search.transformation import (
    BaseSearchConfig,
    SearchResponse,
    SearchResult,
)
from litellm.secret_managers.main import get_secret_str


class _DuckDuckGoSearchRequestRequired(TypedDict):
    """Required fields for DuckDuckGo Search API request."""

    q: str  # Required - search query


class DuckDuckGoSearchRequest(_DuckDuckGoSearchRequestRequired, total=False):
    """
    DuckDuckGo Instant Answer API request format.
    Based on: https://duckduckgo.com/api
    """

    format: str  # Optional - output format ('json', 'xml'), default 'json'
    pretty: int  # Optional - pretty print (0 or 1), default 1
    no_redirect: int  # Optional - skip HTTP redirects (0 or 1), default 0
    no_html: int  # Optional - remove HTML from text (0 or 1), default 0
    skip_disambig: int  # Optional - skip disambiguation results (0 or 1), default 0


class DuckDuckGoSearchConfig(BaseSearchConfig):
    DUCKDUCKGO_API_BASE = "https://api.duckduckgo.com"

    @staticmethod
    def ui_friendly_name() -> str:
        return "DuckDuckGo"

    def get_http_method(self) -> Literal["GET", "POST"]:
        """
        Get HTTP method for search requests.
        DuckDuckGo Instant Answer API uses GET requests.

        Returns:
            HTTP method 'GET'
        """
        return "GET"

    def validate_environment(
        self,
        headers: Dict,
        api_key: Optional[str] = None,
        api_base: Optional[str] = None,
        **kwargs,
    ) -> Dict:
        """
        Validate environment and return headers.
        DuckDuckGo Instant Answer API does not require authentication.
        """
        # DuckDuckGo API is free and doesn't require API key
        headers["Content-Type"] = "application/json"
        return headers

    def get_complete_url(
        self,
        api_base: Optional[str],
        optional_params: dict,
        data: Optional[Union[Dict, List[Dict]]] = None,
        **kwargs,
    ) -> str:
        """
        Get complete URL for Search endpoint.
        DuckDuckGo uses query parameters, so we construct the URL with the query.
        """
        api_base = (
            api_base
            or get_secret_str("DUCKDUCKGO_API_BASE")
            or self.DUCKDUCKGO_API_BASE
        )

        # Build query parameters from the transformed request body
        if data and isinstance(data, dict) and "_duckduckgo_params" in data:
            params = data["_duckduckgo_params"]
            query_string = urlencode(params, doseq=True)
            return f"{api_base}/?{query_string}"

        return api_base

    def transform_search_request(
        self,
        query: Union[str, List[str]],
        optional_params: dict,
        **kwargs,
    ) -> Dict:
        """
        Transform Search request to DuckDuckGo API format.

        Args:
            query: Search query (string or list of strings). DuckDuckGo only supports single string queries.
            optional_params: Optional parameters for the request
                - max_results: Maximum number of search results (DuckDuckGo API doesn't directly support this, used for filtering)
                - format: Output format ('json', 'xml')
                - pretty: Pretty print (0 or 1)
                - no_redirect: Skip HTTP redirects (0 or 1)
                - no_html: Remove HTML from text (0 or 1)
                - skip_disambig: Skip disambiguation results (0 or 1)

        Returns:
            Dict with typed request data following DuckDuckGoSearchRequest spec
        """
        if isinstance(query, list):
            # DuckDuckGo only supports single string queries
            query = " ".join(query)

        request_data: DuckDuckGoSearchRequest = {
            "q": query,
            "format": "json",  # Always use JSON format
        }

        # Convert to dict before dynamic key assignments
        result_data = dict(request_data)

        if "max_results" in optional_params:
            result_data["_max_results"] = optional_params["max_results"]

        # Pass through DuckDuckGo-specific parameters
        ddg_params = ["pretty", "no_redirect", "no_html", "skip_disambig"]
        for param in ddg_params:
            if param in optional_params:
                result_data[param] = optional_params[param]

        return {
            "_duckduckgo_params": result_data,
        }

    def transform_search_response(
        self,
        raw_response: httpx.Response,
        logging_obj: LiteLLMLoggingObj,
        **kwargs,
    ) -> SearchResponse:
        """
        Transform DuckDuckGo API response to LiteLLM unified SearchResponse format.

        DuckDuckGo → LiteLLM mappings:
        - RelatedTopics[].Text → SearchResult.title + snippet
        - RelatedTopics[].FirstURL → SearchResult.url
        - RelatedTopics[].Text → SearchResult.snippet
        - No date/last_updated fields in DuckDuckGo response (set to None)

        Args:
            raw_response: Raw httpx response from DuckDuckGo API
            logging_obj: Logging object for tracking

        Returns:
            SearchResponse with standardized format
        """
        response_json = raw_response.json()

        # Extract max_results from the request URL params
        query_params = raw_response.request.url.params if raw_response.request else {}
        max_results = None
        if "_max_results" in query_params:
            try:
                max_results = int(query_params["_max_results"])
            except (ValueError, TypeError):
                pass

        # Transform results to SearchResult objects
        results = []

        # DuckDuckGo can return results in different fields
        # Priority: Abstract > Answer > RelatedTopics

        # Check if there's an Abstract with URL
        if response_json.get("AbstractURL") and response_json.get("AbstractText"):
            abstract_result = SearchResult(
                title=response_json.get("Heading", ""),
                url=response_json.get("AbstractURL", ""),
                snippet=response_json.get("AbstractText", ""),
                date=None,
                last_updated=None,
            )
            results.append(abstract_result)

        # Process RelatedTopics
        related_topics = response_json.get("RelatedTopics", [])
        for topic in related_topics:
            # Stop if we've reached max_results
            if max_results is not None and len(results) >= max_results:
                break

            if isinstance(topic, dict):
                # Check if it's a direct result
                if "FirstURL" in topic and "Text" in topic:
                    text = topic.get("Text", "")
                    url = topic.get("FirstURL", "")

                    # Try to split title and snippet
                    if " - " in text:
                        parts = text.split(" - ", 1)
                        title = parts[0]
                        snippet = parts[1] if len(parts) > 1 else text
                    else:
                        title = text[:50] + "..." if len(text) > 50 else text
                        snippet = text

                    search_result = SearchResult(
                        title=title,
                        url=url,
                        snippet=snippet,
                        date=None,
                        last_updated=None,
                    )
                    results.append(search_result)

                # Check if it contains nested topics
                elif "Topics" in topic:
                    nested_topics = topic.get("Topics", [])
                    for nested_topic in nested_topics:
                        # Stop if we've reached max_results
                        if max_results is not None and len(results) >= max_results:
                            break

                        if "FirstURL" in nested_topic and "Text" in nested_topic:
                            text = nested_topic.get("Text", "")
                            url = nested_topic.get("FirstURL", "")

                            # Try to split title and snippet
                            if " - " in text:
                                parts = text.split(" - ", 1)
                                title = parts[0]
                                snippet = parts[1] if len(parts) > 1 else text
                            else:
                                title = text[:50] + "..." if len(text) > 50 else text
                                snippet = text

                            search_result = SearchResult(
                                title=title,
                                url=url,
                                snippet=snippet,
                                date=None,
                                last_updated=None,
                            )
                            results.append(search_result)

        return SearchResponse(
            results=results,
            object="search",
        )