package monitoring import ( "context" "database/sql" "net/http" "time" "github.com/gin-gonic/gin" "gorm.io/gorm" ) // HealthStatus 健康状态 type HealthStatus string const ( HealthStatusUP HealthStatus = "UP" HealthStatusDOWN HealthStatus = "DOWN" HealthStatusDEGRADED HealthStatus = "DEGRADED" HealthStatusUNKNOWN HealthStatus = "UNKNOWN" ) // HealthCheck 健康检查器(增强版,支持 Redis 检查) type HealthCheck struct { db *gorm.DB redisClient RedisChecker startTime time.Time } // RedisChecker Redis 健康检查接口(避免直接依赖 Redis 包) type RedisChecker interface { Ping(ctx context.Context) error } // Status 健康状态 type Status struct { Status HealthStatus `json:"status"` Checks map[string]CheckResult `json:"checks"` Uptime string `json:"uptime,omitempty"` Timestamp string `json:"timestamp"` } // CheckResult 检查结果 type CheckResult struct { Status HealthStatus `json:"status"` Error string `json:"error,omitempty"` Latency string `json:"latency_ms,omitempty"` } // NewHealthCheck 创建健康检查器 func NewHealthCheck(db *gorm.DB) *HealthCheck { return &HealthCheck{ db: db, startTime: time.Now(), } } // WithRedis 注入 Redis 检查器(可选) func (h *HealthCheck) WithRedis(r RedisChecker) *HealthCheck { h.redisClient = r return h } // Check 执行完整健康检查 func (h *HealthCheck) Check() *Status { status := &Status{ Status: HealthStatusUP, Checks: make(map[string]CheckResult), Timestamp: time.Now().UTC().Format(time.RFC3339), } if h.startTime != (time.Time{}) { status.Uptime = time.Since(h.startTime).Round(time.Second).String() } // 检查数据库(强依赖:DOWN 则服务 DOWN) dbResult := h.checkDatabase() status.Checks["database"] = dbResult if dbResult.Status == HealthStatusDOWN { status.Status = HealthStatusDOWN } // 检查 Redis(弱依赖:DOWN 则服务 DEGRADED,不影响主功能) if h.redisClient != nil { redisResult := h.checkRedis() status.Checks["redis"] = redisResult if redisResult.Status == HealthStatusDOWN && status.Status == HealthStatusUP { status.Status = HealthStatusDEGRADED } } return status } // LivenessCheck 存活检查(只检查进程是否运行,不检查依赖) func (h *HealthCheck) LivenessCheck() *Status { return &Status{ Status: HealthStatusUP, Checks: map[string]CheckResult{}, Timestamp: time.Now().UTC().Format(time.RFC3339), } } // checkDatabase 检查数据库连接 func (h *HealthCheck) checkDatabase() CheckResult { if h == nil || h.db == nil { return CheckResult{ Status: HealthStatusDOWN, Error: "database not configured", } } start := time.Now() sqlDB, err := h.db.DB() if err != nil { return CheckResult{ Status: HealthStatusDOWN, Error: err.Error(), } } ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second) defer cancel() if err := sqlDB.PingContext(ctx); err != nil { return CheckResult{ Status: HealthStatusDOWN, Error: err.Error(), Latency: formatLatency(time.Since(start)), } } // 同时更新连接池指标 go h.updateDBConnectionMetrics(sqlDB) return CheckResult{ Status: HealthStatusUP, Latency: formatLatency(time.Since(start)), } } // checkRedis 检查 Redis 连接 func (h *HealthCheck) checkRedis() CheckResult { if h.redisClient == nil { return CheckResult{Status: HealthStatusUNKNOWN} } start := time.Now() ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) defer cancel() if err := h.redisClient.Ping(ctx); err != nil { return CheckResult{ Status: HealthStatusDOWN, Error: err.Error(), Latency: formatLatency(time.Since(start)), } } return CheckResult{ Status: HealthStatusUP, Latency: formatLatency(time.Since(start)), } } // updateDBConnectionMetrics 更新数据库连接池 Prometheus 指标 func (h *HealthCheck) updateDBConnectionMetrics(sqlDB *sql.DB) { stats := sqlDB.Stats() sloMetrics := GetGlobalSLOMetrics() sloMetrics.SetDBConnections( float64(stats.InUse), float64(stats.MaxOpenConnections), ) } // ReadinessHandler 就绪检查 Handler(检查所有依赖) func (h *HealthCheck) ReadinessHandler(c *gin.Context) { status := h.Check() httpStatus := http.StatusOK if status.Status == HealthStatusDOWN { httpStatus = http.StatusServiceUnavailable } else if status.Status == HealthStatusDEGRADED { // DEGRADED 仍返回 200,但在响应体中标注 httpStatus = http.StatusOK } c.JSON(httpStatus, status) } // LivenessHandler 存活检查 Handler(只检查进程存活,不检查依赖) // 返回 204 No Content:进程存活,不需要响应体(节省 k8s probe 开销) func (h *HealthCheck) LivenessHandler(c *gin.Context) { c.AbortWithStatus(http.StatusNoContent) } // Handler 兼容旧 /health 端点 func (h *HealthCheck) Handler(c *gin.Context) { h.ReadinessHandler(c) } func formatLatency(d time.Duration) string { if d < time.Millisecond { return "< 1ms" } return d.Round(time.Millisecond).String() }