Files
lijiaoqiao/llm-gateway-competitors/litellm-wheel-src/litellm/llms/searxng/search/transformation.py
2026-03-26 20:06:14 +08:00

226 lines
8.1 KiB
Python

"""
Calls SearXNG's /search endpoint to search the web.
SearXNG API Reference: https://docs.searxng.org/dev/search_api.html
"""
from typing import Dict, List, Optional, TypedDict, Union
import httpx
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
from litellm.llms.base_llm.search.transformation import (
BaseSearchConfig,
SearchResponse,
SearchResult,
)
from litellm.secret_managers.main import get_secret_str
class _SearXNGSearchRequestRequired(TypedDict):
"""Required fields for SearXNG Search API request."""
q: str # Required - search query
class SearXNGSearchRequest(_SearXNGSearchRequestRequired, total=False):
"""
SearXNG Search API request format.
Based on: https://docs.searxng.org/dev/search_api.html
"""
categories: str # Optional - comma-separated list of categories
engines: str # Optional - comma-separated list of engines
language: str # Optional - language code
pageno: int # Optional - page number (default 1)
time_range: str # Optional - time range filter (day, month, year)
format: str # Optional - output format (json, csv, rss) - should be 'json'
class SearXNGSearchConfig(BaseSearchConfig):
@staticmethod
def ui_friendly_name() -> str:
return "SearXNG"
def get_http_method(self):
"""
SearXNG supports both GET and POST, but we'll use GET for simplicity.
"""
return "GET"
def validate_environment(
self,
headers: Dict,
api_key: Optional[str] = None,
api_base: Optional[str] = None,
**kwargs,
) -> Dict:
"""
Validate environment and return headers.
SearXNG is open-source and doesn't require an API key by default.
Some instances may require authentication via headers.
"""
# SearXNG typically doesn't require API keys, but support optional auth
api_key = api_key or get_secret_str("SEARXNG_API_KEY")
if api_key:
headers["Authorization"] = f"Bearer {api_key}"
headers["Content-Type"] = "application/json"
return headers
def get_complete_url(
self,
api_base: Optional[str],
optional_params: dict,
data: Optional[Union[Dict, List[Dict]]] = None,
**kwargs,
) -> str:
"""
Get complete URL for Search endpoint with query parameters.
SearXNG uses GET requests, so we build the full URL with query params here.
The transformed request body (data) contains the parameters needed for the URL.
"""
from urllib.parse import urlencode
api_base = api_base or get_secret_str("SEARXNG_API_BASE")
if not api_base:
raise ValueError(
"SEARXNG_API_BASE is not set. Please set the `SEARXNG_API_BASE` environment variable "
"or pass `api_base` parameter. Example: os.environ['SEARXNG_API_BASE'] = 'https://your-searxng-instance.com'"
)
# Append "/search" to the api base if it's not already there
if not api_base.endswith("/search"):
if api_base.endswith("/"):
api_base = f"{api_base}search"
else:
api_base = f"{api_base}/search"
# Build query parameters from the transformed request body
if data and isinstance(data, dict) and "_searxng_params" in data:
params = data["_searxng_params"]
query_string = urlencode(params)
return f"{api_base}?{query_string}"
return api_base
def transform_search_request(
self,
query: Union[str, List[str]],
optional_params: dict,
**kwargs,
) -> Dict:
"""
Transform Search request to SearXNG API format.
Transforms Perplexity unified spec parameters:
- query → q
- max_results → (handled via pageno, SearXNG returns ~20 results per page)
- search_domain_filter → (not directly supported)
- country → language (approximate mapping)
- max_tokens_per_page → (not applicable, ignored)
All other SearXNG-specific parameters are passed through as-is.
Args:
query: Search query (string or list of strings). SearXNG only supports single string queries.
optional_params: Optional parameters for the request
Returns:
Dict with typed request data following SearXNGSearchRequest spec
"""
if isinstance(query, list):
# SearXNG only supports single string queries, join with spaces
query = " ".join(query)
request_data: SearXNGSearchRequest = {
"q": query,
"format": "json", # Always request JSON format
}
# Transform Perplexity unified spec parameters to SearXNG format
if "country" in optional_params:
# Map country code to language (approximate)
country = optional_params["country"].lower()
if country == "us" or country == "uk":
request_data["language"] = "en"
elif country == "de":
request_data["language"] = "de"
elif country == "fr":
request_data["language"] = "fr"
elif country == "es":
request_data["language"] = "es"
elif country == "jp":
request_data["language"] = "ja"
else:
request_data["language"] = country # Pass through as-is
# Handle max_results via pagination (SearXNG returns ~20 results per page by default)
# For simplicity, we'll just use page 1 and let SearXNG return its default number of results
if "max_results" in optional_params:
# Note: We could calculate pageno based on max_results, but for now we'll ignore this
# and let SearXNG return its default results
pass
# Convert to dict before dynamic key assignments
result_data = dict(request_data)
# Pass through all other SearXNG-specific parameters as-is
for param, value in optional_params.items():
if (
param not in self.get_supported_perplexity_optional_params()
and param not in result_data
):
result_data[param] = value
# Store params in special key for GET request URL building
# This will be used by get_complete_url to build the query string
return {"_searxng_params": result_data}
def transform_search_response(
self,
raw_response: httpx.Response,
logging_obj: LiteLLMLoggingObj,
**kwargs,
) -> SearchResponse:
"""
Transform SearXNG API response to LiteLLM unified SearchResponse format.
SearXNG → LiteLLM mappings:
- results[].title → SearchResult.title
- results[].url → SearchResult.url
- results[].content → SearchResult.snippet
- results[].publishedDate OR results[].pubdate → SearchResult.date
- No last_updated field in SearXNG response (set to None)
Args:
raw_response: Raw httpx response from SearXNG API
logging_obj: Logging object for tracking
Returns:
SearchResponse with standardized format
"""
response_json = raw_response.json()
# Transform results to SearchResult objects
# Note: SearXNG doesn't natively support limiting results via API params
# It returns ~20 results per page by default
results = []
for result in response_json.get("results", []):
# Get date from either publishedDate or pubdate field
date = result.get("publishedDate") or result.get("pubdate")
search_result = SearchResult(
title=result.get("title", ""),
url=result.get("url", ""),
snippet=result.get("content", ""), # SearXNG uses "content" for snippet
date=date,
last_updated=None, # SearXNG doesn't provide last_updated in response
)
results.append(search_result)
return SearchResponse(
results=results,
object="search",
)