296 lines
9.9 KiB
Go
296 lines
9.9 KiB
Go
package app
|
|
|
|
import (
|
|
"context"
|
|
"database/sql"
|
|
"fmt"
|
|
"net/http"
|
|
"strings"
|
|
|
|
"sub2api-cn-relay-manager/internal/routing"
|
|
"sub2api-cn-relay-manager/internal/store/sqlite"
|
|
)
|
|
|
|
const (
|
|
routeRuntimeStatusHealthy = "healthy"
|
|
routeRuntimeStatusCooldown = "cooldown"
|
|
routeRuntimeStatusFailing = "failing"
|
|
routeRuntimeStatusDisabled = "disabled"
|
|
|
|
defaultRouteHealthFailoverLimit = 20
|
|
)
|
|
|
|
type ListRouteHealthRequest struct {
|
|
LogicalGroupID string
|
|
RouteID string
|
|
Status string
|
|
}
|
|
|
|
type RouteHealthInfo struct {
|
|
Backend string `json:"backend"`
|
|
RouteID string `json:"route_id"`
|
|
RouteName string `json:"route_name,omitempty"`
|
|
LogicalGroupID string `json:"logical_group_id"`
|
|
LogicalGroupDisplayName string `json:"logical_group_display_name,omitempty"`
|
|
LogicalGroupStatus string `json:"logical_group_status,omitempty"`
|
|
ConfiguredStatus string `json:"configured_status,omitempty"`
|
|
ShadowHostID string `json:"shadow_host_id"`
|
|
ShadowGroupID string `json:"shadow_group_id"`
|
|
Priority int `json:"priority"`
|
|
Weight int `json:"weight,omitempty"`
|
|
RuntimeStatus string `json:"runtime_status"`
|
|
FailureCount int `json:"failure_count"`
|
|
CooldownUntil string `json:"cooldown_until,omitempty"`
|
|
CooldownReason string `json:"cooldown_reason,omitempty"`
|
|
LastErrorClass string `json:"last_error_class,omitempty"`
|
|
LastSelectedAt string `json:"last_selected_at,omitempty"`
|
|
LastUpstreamStatus int `json:"last_upstream_status,omitempty"`
|
|
LastRequestID string `json:"last_request_id,omitempty"`
|
|
LastPublicModel string `json:"last_public_model,omitempty"`
|
|
RecentFailoverCount int `json:"recent_failover_count"`
|
|
UpstreamBaseURLHint string `json:"upstream_base_url_hint,omitempty"`
|
|
UpdatedAt string `json:"updated_at,omitempty"`
|
|
}
|
|
|
|
func handleListRouteHealth(w http.ResponseWriter, r *http.Request, fn func(context.Context, ListRouteHealthRequest) ([]RouteHealthInfo, error)) {
|
|
if fn == nil {
|
|
writeHTTPError(w, &httpError{StatusCode: http.StatusInternalServerError, Code: "server_misconfigured", Message: "list-route-health action is not configured"})
|
|
return
|
|
}
|
|
req, err := decodeListRouteHealthRequest(r)
|
|
if err != nil {
|
|
writeHTTPError(w, err)
|
|
return
|
|
}
|
|
items, actionErr := fn(r.Context(), req)
|
|
if actionErr != nil {
|
|
writeHTTPError(w, classifyError(actionErr))
|
|
return
|
|
}
|
|
writeJSON(w, http.StatusOK, map[string]any{"route_health": items})
|
|
}
|
|
|
|
func buildListRouteHealthAction(sqliteDSN string, stickyRuntime stickyStoreRuntime) func(context.Context, ListRouteHealthRequest) ([]RouteHealthInfo, error) {
|
|
return func(ctx context.Context, req ListRouteHealthRequest) ([]RouteHealthInfo, error) {
|
|
store, err := sqlite.Open(ctx, sqliteDSN)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer store.Close()
|
|
|
|
groups, routes, err := loadRouteHealthScope(ctx, store, req)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
items := make([]RouteHealthInfo, 0, len(routes))
|
|
for _, route := range routes {
|
|
group, ok := groups[route.LogicalGroupID]
|
|
if !ok {
|
|
return nil, fmt.Errorf("logical group %q not found for route %q", route.LogicalGroupID, route.RouteID)
|
|
}
|
|
item, err := buildRouteHealthInfo(ctx, store, stickyRuntime, group, route)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if req.Status != "" && !strings.EqualFold(item.RuntimeStatus, req.Status) {
|
|
continue
|
|
}
|
|
items = append(items, item)
|
|
}
|
|
return items, nil
|
|
}
|
|
}
|
|
|
|
func decodeListRouteHealthRequest(r *http.Request) (ListRouteHealthRequest, *httpError) {
|
|
status := strings.TrimSpace(r.URL.Query().Get("status"))
|
|
if status != "" && !isSupportedRouteHealthStatus(status) {
|
|
return ListRouteHealthRequest{}, &httpError{
|
|
StatusCode: http.StatusBadRequest,
|
|
Code: "bad_request",
|
|
Message: fmt.Sprintf("unsupported route health status %q", status),
|
|
}
|
|
}
|
|
return ListRouteHealthRequest{
|
|
LogicalGroupID: strings.TrimSpace(r.URL.Query().Get("logical_group_id")),
|
|
RouteID: strings.TrimSpace(r.URL.Query().Get("route_id")),
|
|
Status: strings.ToLower(status),
|
|
}, nil
|
|
}
|
|
|
|
func loadRouteHealthScope(ctx context.Context, store *sqlite.DB, req ListRouteHealthRequest) (map[string]sqlite.LogicalGroup, []sqlite.LogicalGroupRoute, error) {
|
|
if req.RouteID != "" {
|
|
route, err := store.LogicalGroupRoutes().GetByRouteID(ctx, req.RouteID)
|
|
if err != nil {
|
|
if err == sql.ErrNoRows {
|
|
return map[string]sqlite.LogicalGroup{}, nil, nil
|
|
}
|
|
return nil, nil, err
|
|
}
|
|
if req.LogicalGroupID != "" && route.LogicalGroupID != req.LogicalGroupID {
|
|
return map[string]sqlite.LogicalGroup{}, nil, nil
|
|
}
|
|
group, err := store.LogicalGroups().GetByLogicalGroupID(ctx, route.LogicalGroupID)
|
|
if err != nil {
|
|
return nil, nil, err
|
|
}
|
|
return map[string]sqlite.LogicalGroup{group.LogicalGroupID: group}, []sqlite.LogicalGroupRoute{route}, nil
|
|
}
|
|
|
|
if req.LogicalGroupID != "" {
|
|
group, err := store.LogicalGroups().GetByLogicalGroupID(ctx, req.LogicalGroupID)
|
|
if err != nil {
|
|
return nil, nil, err
|
|
}
|
|
routes, err := store.LogicalGroupRoutes().ListByLogicalGroupID(ctx, req.LogicalGroupID)
|
|
if err != nil {
|
|
return nil, nil, err
|
|
}
|
|
return map[string]sqlite.LogicalGroup{group.LogicalGroupID: group}, routes, nil
|
|
}
|
|
|
|
groupRows, err := store.LogicalGroups().List(ctx)
|
|
if err != nil {
|
|
return nil, nil, err
|
|
}
|
|
|
|
groupMap := make(map[string]sqlite.LogicalGroup, len(groupRows))
|
|
routes := make([]sqlite.LogicalGroupRoute, 0)
|
|
for _, group := range groupRows {
|
|
groupMap[group.LogicalGroupID] = group
|
|
groupRoutes, err := store.LogicalGroupRoutes().ListByLogicalGroupID(ctx, group.LogicalGroupID)
|
|
if err != nil {
|
|
return nil, nil, err
|
|
}
|
|
routes = append(routes, groupRoutes...)
|
|
}
|
|
return groupMap, routes, nil
|
|
}
|
|
|
|
func buildRouteHealthInfo(ctx context.Context, store *sqlite.DB, stickyRuntime stickyStoreRuntime, group sqlite.LogicalGroup, route sqlite.LogicalGroupRoute) (RouteHealthInfo, error) {
|
|
failureState, hasFailure, err := stickyRuntime.store.GetRouteFailure(ctx, route.RouteID)
|
|
if err != nil {
|
|
return RouteHealthInfo{}, err
|
|
}
|
|
cooldownState, hasCooldown, err := stickyRuntime.store.GetCooldown(ctx, route.RouteID)
|
|
if err != nil {
|
|
return RouteHealthInfo{}, err
|
|
}
|
|
|
|
// 兼容 route 表上的手工 cooldown 标记,避免健康页漏掉已配置的禁用窗口。
|
|
if !hasCooldown && !routeExitsCooldown(route.CooldownUntil) {
|
|
hasCooldown = true
|
|
cooldownState = routing.RouteCooldownState{
|
|
RouteID: route.RouteID,
|
|
Reason: "configured_cooldown",
|
|
Until: route.CooldownUntil,
|
|
}
|
|
}
|
|
|
|
decisionRows, err := store.RouteDecisionLogs().ListRecent(ctx, sqlite.RouteDecisionLogFilter{
|
|
SelectedRouteID: route.RouteID,
|
|
Limit: 1,
|
|
})
|
|
if err != nil {
|
|
return RouteHealthInfo{}, err
|
|
}
|
|
|
|
failoverFrom, err := store.RouteFailoverEvents().ListRecent(ctx, sqlite.RouteFailoverEventFilter{
|
|
FromRouteID: route.RouteID,
|
|
Limit: defaultRouteHealthFailoverLimit,
|
|
})
|
|
if err != nil {
|
|
return RouteHealthInfo{}, err
|
|
}
|
|
failoverTo, err := store.RouteFailoverEvents().ListRecent(ctx, sqlite.RouteFailoverEventFilter{
|
|
ToRouteID: route.RouteID,
|
|
Limit: defaultRouteHealthFailoverLimit,
|
|
})
|
|
if err != nil {
|
|
return RouteHealthInfo{}, err
|
|
}
|
|
|
|
item := RouteHealthInfo{
|
|
Backend: stickyRuntime.backend,
|
|
RouteID: route.RouteID,
|
|
RouteName: route.Name,
|
|
LogicalGroupID: route.LogicalGroupID,
|
|
LogicalGroupDisplayName: group.DisplayName,
|
|
LogicalGroupStatus: group.Status,
|
|
ConfiguredStatus: route.Status,
|
|
ShadowHostID: route.ShadowHostID,
|
|
ShadowGroupID: route.ShadowGroupID,
|
|
Priority: route.Priority,
|
|
Weight: route.Weight,
|
|
RuntimeStatus: deriveRouteRuntimeStatus(group.Status, route.Status, hasCooldown, failureCountFromState(failureState, hasFailure)),
|
|
FailureCount: failureCountFromState(failureState, hasFailure),
|
|
CooldownUntil: cooldownUntilFromState(cooldownState, hasCooldown),
|
|
CooldownReason: cooldownReasonFromState(cooldownState, hasCooldown),
|
|
LastErrorClass: lastErrorClassFromStates(failureState, hasFailure, decisionRows),
|
|
RecentFailoverCount: len(failoverFrom) + len(failoverTo),
|
|
UpstreamBaseURLHint: route.UpstreamBaseURLHint,
|
|
UpdatedAt: route.UpdatedAt,
|
|
}
|
|
|
|
if len(decisionRows) > 0 {
|
|
item.LastSelectedAt = decisionRows[0].CreatedAt
|
|
item.LastUpstreamStatus = decisionRows[0].UpstreamStatus
|
|
item.LastRequestID = decisionRows[0].RequestID
|
|
item.LastPublicModel = decisionRows[0].PublicModel
|
|
}
|
|
return item, nil
|
|
}
|
|
|
|
func deriveRouteRuntimeStatus(groupStatus, routeStatus string, hasCooldown bool, failureCount int) string {
|
|
if !isActiveStatus(groupStatus) || !isActiveStatus(routeStatus) {
|
|
return routeRuntimeStatusDisabled
|
|
}
|
|
if hasCooldown {
|
|
return routeRuntimeStatusCooldown
|
|
}
|
|
if failureCount > 0 {
|
|
return routeRuntimeStatusFailing
|
|
}
|
|
return routeRuntimeStatusHealthy
|
|
}
|
|
|
|
func failureCountFromState(state routing.RouteFailureState, ok bool) int {
|
|
if !ok {
|
|
return 0
|
|
}
|
|
return state.FailureCount
|
|
}
|
|
|
|
func cooldownUntilFromState(state routing.RouteCooldownState, ok bool) string {
|
|
if !ok {
|
|
return ""
|
|
}
|
|
return strings.TrimSpace(state.Until)
|
|
}
|
|
|
|
func cooldownReasonFromState(state routing.RouteCooldownState, ok bool) string {
|
|
if !ok {
|
|
return ""
|
|
}
|
|
return strings.TrimSpace(state.Reason)
|
|
}
|
|
|
|
func lastErrorClassFromStates(state routing.RouteFailureState, hasFailure bool, decisions []sqlite.RouteDecisionLog) string {
|
|
if hasFailure && strings.TrimSpace(state.LastErrorClass) != "" {
|
|
return strings.TrimSpace(state.LastErrorClass)
|
|
}
|
|
if len(decisions) == 0 {
|
|
return ""
|
|
}
|
|
return strings.TrimSpace(decisions[0].ErrorClass)
|
|
}
|
|
|
|
func isSupportedRouteHealthStatus(status string) bool {
|
|
switch strings.ToLower(strings.TrimSpace(status)) {
|
|
case routeRuntimeStatusHealthy, routeRuntimeStatusCooldown, routeRuntimeStatusFailing, routeRuntimeStatusDisabled:
|
|
return true
|
|
default:
|
|
return false
|
|
}
|
|
}
|