Files
sub2api-cn-relay-manager/internal/app/route_health_api.go
2026-05-29 13:37:43 +08:00

296 lines
9.9 KiB
Go

package app
import (
"context"
"database/sql"
"fmt"
"net/http"
"strings"
"sub2api-cn-relay-manager/internal/routing"
"sub2api-cn-relay-manager/internal/store/sqlite"
)
const (
routeRuntimeStatusHealthy = "healthy"
routeRuntimeStatusCooldown = "cooldown"
routeRuntimeStatusFailing = "failing"
routeRuntimeStatusDisabled = "disabled"
defaultRouteHealthFailoverLimit = 20
)
type ListRouteHealthRequest struct {
LogicalGroupID string
RouteID string
Status string
}
type RouteHealthInfo struct {
Backend string `json:"backend"`
RouteID string `json:"route_id"`
RouteName string `json:"route_name,omitempty"`
LogicalGroupID string `json:"logical_group_id"`
LogicalGroupDisplayName string `json:"logical_group_display_name,omitempty"`
LogicalGroupStatus string `json:"logical_group_status,omitempty"`
ConfiguredStatus string `json:"configured_status,omitempty"`
ShadowHostID string `json:"shadow_host_id"`
ShadowGroupID string `json:"shadow_group_id"`
Priority int `json:"priority"`
Weight int `json:"weight,omitempty"`
RuntimeStatus string `json:"runtime_status"`
FailureCount int `json:"failure_count"`
CooldownUntil string `json:"cooldown_until,omitempty"`
CooldownReason string `json:"cooldown_reason,omitempty"`
LastErrorClass string `json:"last_error_class,omitempty"`
LastSelectedAt string `json:"last_selected_at,omitempty"`
LastUpstreamStatus int `json:"last_upstream_status,omitempty"`
LastRequestID string `json:"last_request_id,omitempty"`
LastPublicModel string `json:"last_public_model,omitempty"`
RecentFailoverCount int `json:"recent_failover_count"`
UpstreamBaseURLHint string `json:"upstream_base_url_hint,omitempty"`
UpdatedAt string `json:"updated_at,omitempty"`
}
func handleListRouteHealth(w http.ResponseWriter, r *http.Request, fn func(context.Context, ListRouteHealthRequest) ([]RouteHealthInfo, error)) {
if fn == nil {
writeHTTPError(w, &httpError{StatusCode: http.StatusInternalServerError, Code: "server_misconfigured", Message: "list-route-health action is not configured"})
return
}
req, err := decodeListRouteHealthRequest(r)
if err != nil {
writeHTTPError(w, err)
return
}
items, actionErr := fn(r.Context(), req)
if actionErr != nil {
writeHTTPError(w, classifyError(actionErr))
return
}
writeJSON(w, http.StatusOK, map[string]any{"route_health": items})
}
func buildListRouteHealthAction(sqliteDSN string, stickyRuntime stickyStoreRuntime) func(context.Context, ListRouteHealthRequest) ([]RouteHealthInfo, error) {
return func(ctx context.Context, req ListRouteHealthRequest) ([]RouteHealthInfo, error) {
store, err := sqlite.Open(ctx, sqliteDSN)
if err != nil {
return nil, err
}
defer store.Close()
groups, routes, err := loadRouteHealthScope(ctx, store, req)
if err != nil {
return nil, err
}
items := make([]RouteHealthInfo, 0, len(routes))
for _, route := range routes {
group, ok := groups[route.LogicalGroupID]
if !ok {
return nil, fmt.Errorf("logical group %q not found for route %q", route.LogicalGroupID, route.RouteID)
}
item, err := buildRouteHealthInfo(ctx, store, stickyRuntime, group, route)
if err != nil {
return nil, err
}
if req.Status != "" && !strings.EqualFold(item.RuntimeStatus, req.Status) {
continue
}
items = append(items, item)
}
return items, nil
}
}
func decodeListRouteHealthRequest(r *http.Request) (ListRouteHealthRequest, *httpError) {
status := strings.TrimSpace(r.URL.Query().Get("status"))
if status != "" && !isSupportedRouteHealthStatus(status) {
return ListRouteHealthRequest{}, &httpError{
StatusCode: http.StatusBadRequest,
Code: "bad_request",
Message: fmt.Sprintf("unsupported route health status %q", status),
}
}
return ListRouteHealthRequest{
LogicalGroupID: strings.TrimSpace(r.URL.Query().Get("logical_group_id")),
RouteID: strings.TrimSpace(r.URL.Query().Get("route_id")),
Status: strings.ToLower(status),
}, nil
}
func loadRouteHealthScope(ctx context.Context, store *sqlite.DB, req ListRouteHealthRequest) (map[string]sqlite.LogicalGroup, []sqlite.LogicalGroupRoute, error) {
if req.RouteID != "" {
route, err := store.LogicalGroupRoutes().GetByRouteID(ctx, req.RouteID)
if err != nil {
if err == sql.ErrNoRows {
return map[string]sqlite.LogicalGroup{}, nil, nil
}
return nil, nil, err
}
if req.LogicalGroupID != "" && route.LogicalGroupID != req.LogicalGroupID {
return map[string]sqlite.LogicalGroup{}, nil, nil
}
group, err := store.LogicalGroups().GetByLogicalGroupID(ctx, route.LogicalGroupID)
if err != nil {
return nil, nil, err
}
return map[string]sqlite.LogicalGroup{group.LogicalGroupID: group}, []sqlite.LogicalGroupRoute{route}, nil
}
if req.LogicalGroupID != "" {
group, err := store.LogicalGroups().GetByLogicalGroupID(ctx, req.LogicalGroupID)
if err != nil {
return nil, nil, err
}
routes, err := store.LogicalGroupRoutes().ListByLogicalGroupID(ctx, req.LogicalGroupID)
if err != nil {
return nil, nil, err
}
return map[string]sqlite.LogicalGroup{group.LogicalGroupID: group}, routes, nil
}
groupRows, err := store.LogicalGroups().List(ctx)
if err != nil {
return nil, nil, err
}
groupMap := make(map[string]sqlite.LogicalGroup, len(groupRows))
routes := make([]sqlite.LogicalGroupRoute, 0)
for _, group := range groupRows {
groupMap[group.LogicalGroupID] = group
groupRoutes, err := store.LogicalGroupRoutes().ListByLogicalGroupID(ctx, group.LogicalGroupID)
if err != nil {
return nil, nil, err
}
routes = append(routes, groupRoutes...)
}
return groupMap, routes, nil
}
func buildRouteHealthInfo(ctx context.Context, store *sqlite.DB, stickyRuntime stickyStoreRuntime, group sqlite.LogicalGroup, route sqlite.LogicalGroupRoute) (RouteHealthInfo, error) {
failureState, hasFailure, err := stickyRuntime.store.GetRouteFailure(ctx, route.RouteID)
if err != nil {
return RouteHealthInfo{}, err
}
cooldownState, hasCooldown, err := stickyRuntime.store.GetCooldown(ctx, route.RouteID)
if err != nil {
return RouteHealthInfo{}, err
}
// 兼容 route 表上的手工 cooldown 标记,避免健康页漏掉已配置的禁用窗口。
if !hasCooldown && !routeExitsCooldown(route.CooldownUntil) {
hasCooldown = true
cooldownState = routing.RouteCooldownState{
RouteID: route.RouteID,
Reason: "configured_cooldown",
Until: route.CooldownUntil,
}
}
decisionRows, err := store.RouteDecisionLogs().ListRecent(ctx, sqlite.RouteDecisionLogFilter{
SelectedRouteID: route.RouteID,
Limit: 1,
})
if err != nil {
return RouteHealthInfo{}, err
}
failoverFrom, err := store.RouteFailoverEvents().ListRecent(ctx, sqlite.RouteFailoverEventFilter{
FromRouteID: route.RouteID,
Limit: defaultRouteHealthFailoverLimit,
})
if err != nil {
return RouteHealthInfo{}, err
}
failoverTo, err := store.RouteFailoverEvents().ListRecent(ctx, sqlite.RouteFailoverEventFilter{
ToRouteID: route.RouteID,
Limit: defaultRouteHealthFailoverLimit,
})
if err != nil {
return RouteHealthInfo{}, err
}
item := RouteHealthInfo{
Backend: stickyRuntime.backend,
RouteID: route.RouteID,
RouteName: route.Name,
LogicalGroupID: route.LogicalGroupID,
LogicalGroupDisplayName: group.DisplayName,
LogicalGroupStatus: group.Status,
ConfiguredStatus: route.Status,
ShadowHostID: route.ShadowHostID,
ShadowGroupID: route.ShadowGroupID,
Priority: route.Priority,
Weight: route.Weight,
RuntimeStatus: deriveRouteRuntimeStatus(group.Status, route.Status, hasCooldown, failureCountFromState(failureState, hasFailure)),
FailureCount: failureCountFromState(failureState, hasFailure),
CooldownUntil: cooldownUntilFromState(cooldownState, hasCooldown),
CooldownReason: cooldownReasonFromState(cooldownState, hasCooldown),
LastErrorClass: lastErrorClassFromStates(failureState, hasFailure, decisionRows),
RecentFailoverCount: len(failoverFrom) + len(failoverTo),
UpstreamBaseURLHint: route.UpstreamBaseURLHint,
UpdatedAt: route.UpdatedAt,
}
if len(decisionRows) > 0 {
item.LastSelectedAt = decisionRows[0].CreatedAt
item.LastUpstreamStatus = decisionRows[0].UpstreamStatus
item.LastRequestID = decisionRows[0].RequestID
item.LastPublicModel = decisionRows[0].PublicModel
}
return item, nil
}
func deriveRouteRuntimeStatus(groupStatus, routeStatus string, hasCooldown bool, failureCount int) string {
if !isActiveStatus(groupStatus) || !isActiveStatus(routeStatus) {
return routeRuntimeStatusDisabled
}
if hasCooldown {
return routeRuntimeStatusCooldown
}
if failureCount > 0 {
return routeRuntimeStatusFailing
}
return routeRuntimeStatusHealthy
}
func failureCountFromState(state routing.RouteFailureState, ok bool) int {
if !ok {
return 0
}
return state.FailureCount
}
func cooldownUntilFromState(state routing.RouteCooldownState, ok bool) string {
if !ok {
return ""
}
return strings.TrimSpace(state.Until)
}
func cooldownReasonFromState(state routing.RouteCooldownState, ok bool) string {
if !ok {
return ""
}
return strings.TrimSpace(state.Reason)
}
func lastErrorClassFromStates(state routing.RouteFailureState, hasFailure bool, decisions []sqlite.RouteDecisionLog) string {
if hasFailure && strings.TrimSpace(state.LastErrorClass) != "" {
return strings.TrimSpace(state.LastErrorClass)
}
if len(decisions) == 0 {
return ""
}
return strings.TrimSpace(decisions[0].ErrorClass)
}
func isSupportedRouteHealthStatus(status string) bool {
switch strings.ToLower(strings.TrimSpace(status)) {
case routeRuntimeStatusHealthy, routeRuntimeStatusCooldown, routeRuntimeStatusFailing, routeRuntimeStatusDisabled:
return true
default:
return false
}
}