226 lines
8.1 KiB
Python
226 lines
8.1 KiB
Python
"""
|
|
Calls SearXNG's /search endpoint to search the web.
|
|
|
|
SearXNG API Reference: https://docs.searxng.org/dev/search_api.html
|
|
"""
|
|
from typing import Dict, List, Optional, TypedDict, Union
|
|
|
|
import httpx
|
|
|
|
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
|
|
from litellm.llms.base_llm.search.transformation import (
|
|
BaseSearchConfig,
|
|
SearchResponse,
|
|
SearchResult,
|
|
)
|
|
from litellm.secret_managers.main import get_secret_str
|
|
|
|
|
|
class _SearXNGSearchRequestRequired(TypedDict):
|
|
"""Required fields for SearXNG Search API request."""
|
|
|
|
q: str # Required - search query
|
|
|
|
|
|
class SearXNGSearchRequest(_SearXNGSearchRequestRequired, total=False):
|
|
"""
|
|
SearXNG Search API request format.
|
|
Based on: https://docs.searxng.org/dev/search_api.html
|
|
"""
|
|
|
|
categories: str # Optional - comma-separated list of categories
|
|
engines: str # Optional - comma-separated list of engines
|
|
language: str # Optional - language code
|
|
pageno: int # Optional - page number (default 1)
|
|
time_range: str # Optional - time range filter (day, month, year)
|
|
format: str # Optional - output format (json, csv, rss) - should be 'json'
|
|
|
|
|
|
class SearXNGSearchConfig(BaseSearchConfig):
|
|
@staticmethod
|
|
def ui_friendly_name() -> str:
|
|
return "SearXNG"
|
|
|
|
def get_http_method(self):
|
|
"""
|
|
SearXNG supports both GET and POST, but we'll use GET for simplicity.
|
|
"""
|
|
return "GET"
|
|
|
|
def validate_environment(
|
|
self,
|
|
headers: Dict,
|
|
api_key: Optional[str] = None,
|
|
api_base: Optional[str] = None,
|
|
**kwargs,
|
|
) -> Dict:
|
|
"""
|
|
Validate environment and return headers.
|
|
SearXNG is open-source and doesn't require an API key by default.
|
|
Some instances may require authentication via headers.
|
|
"""
|
|
# SearXNG typically doesn't require API keys, but support optional auth
|
|
api_key = api_key or get_secret_str("SEARXNG_API_KEY")
|
|
if api_key:
|
|
headers["Authorization"] = f"Bearer {api_key}"
|
|
headers["Content-Type"] = "application/json"
|
|
return headers
|
|
|
|
def get_complete_url(
|
|
self,
|
|
api_base: Optional[str],
|
|
optional_params: dict,
|
|
data: Optional[Union[Dict, List[Dict]]] = None,
|
|
**kwargs,
|
|
) -> str:
|
|
"""
|
|
Get complete URL for Search endpoint with query parameters.
|
|
|
|
SearXNG uses GET requests, so we build the full URL with query params here.
|
|
The transformed request body (data) contains the parameters needed for the URL.
|
|
"""
|
|
from urllib.parse import urlencode
|
|
|
|
api_base = api_base or get_secret_str("SEARXNG_API_BASE")
|
|
|
|
if not api_base:
|
|
raise ValueError(
|
|
"SEARXNG_API_BASE is not set. Please set the `SEARXNG_API_BASE` environment variable "
|
|
"or pass `api_base` parameter. Example: os.environ['SEARXNG_API_BASE'] = 'https://your-searxng-instance.com'"
|
|
)
|
|
|
|
# Append "/search" to the api base if it's not already there
|
|
if not api_base.endswith("/search"):
|
|
if api_base.endswith("/"):
|
|
api_base = f"{api_base}search"
|
|
else:
|
|
api_base = f"{api_base}/search"
|
|
|
|
# Build query parameters from the transformed request body
|
|
if data and isinstance(data, dict) and "_searxng_params" in data:
|
|
params = data["_searxng_params"]
|
|
query_string = urlencode(params)
|
|
return f"{api_base}?{query_string}"
|
|
|
|
return api_base
|
|
|
|
def transform_search_request(
|
|
self,
|
|
query: Union[str, List[str]],
|
|
optional_params: dict,
|
|
**kwargs,
|
|
) -> Dict:
|
|
"""
|
|
Transform Search request to SearXNG API format.
|
|
|
|
Transforms Perplexity unified spec parameters:
|
|
- query → q
|
|
- max_results → (handled via pageno, SearXNG returns ~20 results per page)
|
|
- search_domain_filter → (not directly supported)
|
|
- country → language (approximate mapping)
|
|
- max_tokens_per_page → (not applicable, ignored)
|
|
|
|
All other SearXNG-specific parameters are passed through as-is.
|
|
|
|
Args:
|
|
query: Search query (string or list of strings). SearXNG only supports single string queries.
|
|
optional_params: Optional parameters for the request
|
|
|
|
Returns:
|
|
Dict with typed request data following SearXNGSearchRequest spec
|
|
"""
|
|
if isinstance(query, list):
|
|
# SearXNG only supports single string queries, join with spaces
|
|
query = " ".join(query)
|
|
|
|
request_data: SearXNGSearchRequest = {
|
|
"q": query,
|
|
"format": "json", # Always request JSON format
|
|
}
|
|
|
|
# Transform Perplexity unified spec parameters to SearXNG format
|
|
if "country" in optional_params:
|
|
# Map country code to language (approximate)
|
|
country = optional_params["country"].lower()
|
|
if country == "us" or country == "uk":
|
|
request_data["language"] = "en"
|
|
elif country == "de":
|
|
request_data["language"] = "de"
|
|
elif country == "fr":
|
|
request_data["language"] = "fr"
|
|
elif country == "es":
|
|
request_data["language"] = "es"
|
|
elif country == "jp":
|
|
request_data["language"] = "ja"
|
|
else:
|
|
request_data["language"] = country # Pass through as-is
|
|
|
|
# Handle max_results via pagination (SearXNG returns ~20 results per page by default)
|
|
# For simplicity, we'll just use page 1 and let SearXNG return its default number of results
|
|
if "max_results" in optional_params:
|
|
# Note: We could calculate pageno based on max_results, but for now we'll ignore this
|
|
# and let SearXNG return its default results
|
|
pass
|
|
|
|
# Convert to dict before dynamic key assignments
|
|
result_data = dict(request_data)
|
|
|
|
# Pass through all other SearXNG-specific parameters as-is
|
|
for param, value in optional_params.items():
|
|
if (
|
|
param not in self.get_supported_perplexity_optional_params()
|
|
and param not in result_data
|
|
):
|
|
result_data[param] = value
|
|
|
|
# Store params in special key for GET request URL building
|
|
# This will be used by get_complete_url to build the query string
|
|
return {"_searxng_params": result_data}
|
|
|
|
def transform_search_response(
|
|
self,
|
|
raw_response: httpx.Response,
|
|
logging_obj: LiteLLMLoggingObj,
|
|
**kwargs,
|
|
) -> SearchResponse:
|
|
"""
|
|
Transform SearXNG API response to LiteLLM unified SearchResponse format.
|
|
|
|
SearXNG → LiteLLM mappings:
|
|
- results[].title → SearchResult.title
|
|
- results[].url → SearchResult.url
|
|
- results[].content → SearchResult.snippet
|
|
- results[].publishedDate OR results[].pubdate → SearchResult.date
|
|
- No last_updated field in SearXNG response (set to None)
|
|
|
|
Args:
|
|
raw_response: Raw httpx response from SearXNG API
|
|
logging_obj: Logging object for tracking
|
|
|
|
Returns:
|
|
SearchResponse with standardized format
|
|
"""
|
|
response_json = raw_response.json()
|
|
|
|
# Transform results to SearchResult objects
|
|
# Note: SearXNG doesn't natively support limiting results via API params
|
|
# It returns ~20 results per page by default
|
|
results = []
|
|
for result in response_json.get("results", []):
|
|
# Get date from either publishedDate or pubdate field
|
|
date = result.get("publishedDate") or result.get("pubdate")
|
|
|
|
search_result = SearchResult(
|
|
title=result.get("title", ""),
|
|
url=result.get("url", ""),
|
|
snippet=result.get("content", ""), # SearXNG uses "content" for snippet
|
|
date=date,
|
|
last_updated=None, # SearXNG doesn't provide last_updated in response
|
|
)
|
|
results.append(search_result)
|
|
|
|
return SearchResponse(
|
|
results=results,
|
|
object="search",
|
|
)
|