feat: backend core - auth, user, role, permission, device, webhook, monitoring, cache, repository, service, middleware, API handlers
2026-04-02 11:19:50 +08:00
|
|
|
|
package monitoring
|
|
|
|
|
|
|
|
|
|
|
|
import (
|
2026-04-07 12:08:16 +08:00
|
|
|
|
"context"
|
|
|
|
|
|
"database/sql"
|
feat: backend core - auth, user, role, permission, device, webhook, monitoring, cache, repository, service, middleware, API handlers
2026-04-02 11:19:50 +08:00
|
|
|
|
"net/http"
|
2026-04-07 12:08:16 +08:00
|
|
|
|
"time"
|
feat: backend core - auth, user, role, permission, device, webhook, monitoring, cache, repository, service, middleware, API handlers
2026-04-02 11:19:50 +08:00
|
|
|
|
|
|
|
|
|
|
"github.com/gin-gonic/gin"
|
|
|
|
|
|
"gorm.io/gorm"
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
// HealthStatus 健康状态
|
|
|
|
|
|
type HealthStatus string
|
|
|
|
|
|
|
|
|
|
|
|
const (
|
|
|
|
|
|
HealthStatusUP HealthStatus = "UP"
|
|
|
|
|
|
HealthStatusDOWN HealthStatus = "DOWN"
|
2026-04-07 12:08:16 +08:00
|
|
|
|
HealthStatusDEGRADED HealthStatus = "DEGRADED"
|
feat: backend core - auth, user, role, permission, device, webhook, monitoring, cache, repository, service, middleware, API handlers
2026-04-02 11:19:50 +08:00
|
|
|
|
HealthStatusUNKNOWN HealthStatus = "UNKNOWN"
|
|
|
|
|
|
)
|
|
|
|
|
|
|
2026-04-07 12:08:16 +08:00
|
|
|
|
// HealthCheck 健康检查器(增强版,支持 Redis 检查)
|
feat: backend core - auth, user, role, permission, device, webhook, monitoring, cache, repository, service, middleware, API handlers
2026-04-02 11:19:50 +08:00
|
|
|
|
type HealthCheck struct {
|
2026-04-07 12:08:16 +08:00
|
|
|
|
db *gorm.DB
|
|
|
|
|
|
redisClient RedisChecker
|
|
|
|
|
|
startTime time.Time
|
feat: backend core - auth, user, role, permission, device, webhook, monitoring, cache, repository, service, middleware, API handlers
2026-04-02 11:19:50 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
2026-04-07 12:08:16 +08:00
|
|
|
|
// RedisChecker Redis 健康检查接口(避免直接依赖 Redis 包)
|
|
|
|
|
|
type RedisChecker interface {
|
|
|
|
|
|
Ping(ctx context.Context) error
|
feat: backend core - auth, user, role, permission, device, webhook, monitoring, cache, repository, service, middleware, API handlers
2026-04-02 11:19:50 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Status 健康状态
|
|
|
|
|
|
type Status struct {
|
2026-04-07 12:08:16 +08:00
|
|
|
|
Status HealthStatus `json:"status"`
|
|
|
|
|
|
Checks map[string]CheckResult `json:"checks"`
|
|
|
|
|
|
Uptime string `json:"uptime,omitempty"`
|
|
|
|
|
|
Timestamp string `json:"timestamp"`
|
feat: backend core - auth, user, role, permission, device, webhook, monitoring, cache, repository, service, middleware, API handlers
2026-04-02 11:19:50 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// CheckResult 检查结果
|
|
|
|
|
|
type CheckResult struct {
|
2026-04-07 12:08:16 +08:00
|
|
|
|
Status HealthStatus `json:"status"`
|
|
|
|
|
|
Error string `json:"error,omitempty"`
|
|
|
|
|
|
Latency string `json:"latency_ms,omitempty"`
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// NewHealthCheck 创建健康检查器
|
|
|
|
|
|
func NewHealthCheck(db *gorm.DB) *HealthCheck {
|
|
|
|
|
|
return &HealthCheck{
|
|
|
|
|
|
db: db,
|
|
|
|
|
|
startTime: time.Now(),
|
|
|
|
|
|
}
|
feat: backend core - auth, user, role, permission, device, webhook, monitoring, cache, repository, service, middleware, API handlers
2026-04-02 11:19:50 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
2026-04-07 12:08:16 +08:00
|
|
|
|
// WithRedis 注入 Redis 检查器(可选)
|
|
|
|
|
|
func (h *HealthCheck) WithRedis(r RedisChecker) *HealthCheck {
|
|
|
|
|
|
h.redisClient = r
|
|
|
|
|
|
return h
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Check 执行完整健康检查
|
feat: backend core - auth, user, role, permission, device, webhook, monitoring, cache, repository, service, middleware, API handlers
2026-04-02 11:19:50 +08:00
|
|
|
|
func (h *HealthCheck) Check() *Status {
|
|
|
|
|
|
status := &Status{
|
2026-04-07 12:08:16 +08:00
|
|
|
|
Status: HealthStatusUP,
|
|
|
|
|
|
Checks: make(map[string]CheckResult),
|
|
|
|
|
|
Timestamp: time.Now().UTC().Format(time.RFC3339),
|
feat: backend core - auth, user, role, permission, device, webhook, monitoring, cache, repository, service, middleware, API handlers
2026-04-02 11:19:50 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
2026-04-07 12:08:16 +08:00
|
|
|
|
if h.startTime != (time.Time{}) {
|
|
|
|
|
|
status.Uptime = time.Since(h.startTime).Round(time.Second).String()
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// 检查数据库(强依赖:DOWN 则服务 DOWN)
|
feat: backend core - auth, user, role, permission, device, webhook, monitoring, cache, repository, service, middleware, API handlers
2026-04-02 11:19:50 +08:00
|
|
|
|
dbResult := h.checkDatabase()
|
|
|
|
|
|
status.Checks["database"] = dbResult
|
2026-04-07 12:08:16 +08:00
|
|
|
|
if dbResult.Status == HealthStatusDOWN {
|
feat: backend core - auth, user, role, permission, device, webhook, monitoring, cache, repository, service, middleware, API handlers
2026-04-02 11:19:50 +08:00
|
|
|
|
status.Status = HealthStatusDOWN
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-04-07 12:08:16 +08:00
|
|
|
|
// 检查 Redis(弱依赖:DOWN 则服务 DEGRADED,不影响主功能)
|
|
|
|
|
|
if h.redisClient != nil {
|
|
|
|
|
|
redisResult := h.checkRedis()
|
|
|
|
|
|
status.Checks["redis"] = redisResult
|
|
|
|
|
|
if redisResult.Status == HealthStatusDOWN && status.Status == HealthStatusUP {
|
|
|
|
|
|
status.Status = HealthStatusDEGRADED
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
feat: backend core - auth, user, role, permission, device, webhook, monitoring, cache, repository, service, middleware, API handlers
2026-04-02 11:19:50 +08:00
|
|
|
|
return status
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-04-07 12:08:16 +08:00
|
|
|
|
// LivenessCheck 存活检查(只检查进程是否运行,不检查依赖)
|
|
|
|
|
|
func (h *HealthCheck) LivenessCheck() *Status {
|
|
|
|
|
|
return &Status{
|
|
|
|
|
|
Status: HealthStatusUP,
|
|
|
|
|
|
Checks: map[string]CheckResult{},
|
|
|
|
|
|
Timestamp: time.Now().UTC().Format(time.RFC3339),
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// checkDatabase 检查数据库连接
|
feat: backend core - auth, user, role, permission, device, webhook, monitoring, cache, repository, service, middleware, API handlers
2026-04-02 11:19:50 +08:00
|
|
|
|
func (h *HealthCheck) checkDatabase() CheckResult {
|
|
|
|
|
|
if h == nil || h.db == nil {
|
|
|
|
|
|
return CheckResult{
|
|
|
|
|
|
Status: HealthStatusDOWN,
|
|
|
|
|
|
Error: "database not configured",
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-04-07 12:08:16 +08:00
|
|
|
|
start := time.Now()
|
feat: backend core - auth, user, role, permission, device, webhook, monitoring, cache, repository, service, middleware, API handlers
2026-04-02 11:19:50 +08:00
|
|
|
|
sqlDB, err := h.db.DB()
|
|
|
|
|
|
if err != nil {
|
|
|
|
|
|
return CheckResult{
|
|
|
|
|
|
Status: HealthStatusDOWN,
|
|
|
|
|
|
Error: err.Error(),
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-04-07 12:08:16 +08:00
|
|
|
|
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
|
|
|
|
|
|
defer cancel()
|
|
|
|
|
|
|
|
|
|
|
|
if err := sqlDB.PingContext(ctx); err != nil {
|
feat: backend core - auth, user, role, permission, device, webhook, monitoring, cache, repository, service, middleware, API handlers
2026-04-02 11:19:50 +08:00
|
|
|
|
return CheckResult{
|
2026-04-07 12:08:16 +08:00
|
|
|
|
Status: HealthStatusDOWN,
|
|
|
|
|
|
Error: err.Error(),
|
|
|
|
|
|
Latency: formatLatency(time.Since(start)),
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// 同时更新连接池指标
|
|
|
|
|
|
go h.updateDBConnectionMetrics(sqlDB)
|
|
|
|
|
|
|
|
|
|
|
|
return CheckResult{
|
|
|
|
|
|
Status: HealthStatusUP,
|
|
|
|
|
|
Latency: formatLatency(time.Since(start)),
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// checkRedis 检查 Redis 连接
|
|
|
|
|
|
func (h *HealthCheck) checkRedis() CheckResult {
|
|
|
|
|
|
if h.redisClient == nil {
|
|
|
|
|
|
return CheckResult{Status: HealthStatusUNKNOWN}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
start := time.Now()
|
|
|
|
|
|
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
|
|
|
|
|
|
defer cancel()
|
|
|
|
|
|
|
|
|
|
|
|
if err := h.redisClient.Ping(ctx); err != nil {
|
|
|
|
|
|
return CheckResult{
|
|
|
|
|
|
Status: HealthStatusDOWN,
|
|
|
|
|
|
Error: err.Error(),
|
|
|
|
|
|
Latency: formatLatency(time.Since(start)),
|
feat: backend core - auth, user, role, permission, device, webhook, monitoring, cache, repository, service, middleware, API handlers
2026-04-02 11:19:50 +08:00
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-04-07 12:08:16 +08:00
|
|
|
|
return CheckResult{
|
|
|
|
|
|
Status: HealthStatusUP,
|
|
|
|
|
|
Latency: formatLatency(time.Since(start)),
|
|
|
|
|
|
}
|
feat: backend core - auth, user, role, permission, device, webhook, monitoring, cache, repository, service, middleware, API handlers
2026-04-02 11:19:50 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
2026-04-07 12:08:16 +08:00
|
|
|
|
// updateDBConnectionMetrics 更新数据库连接池 Prometheus 指标
|
|
|
|
|
|
func (h *HealthCheck) updateDBConnectionMetrics(sqlDB *sql.DB) {
|
|
|
|
|
|
stats := sqlDB.Stats()
|
|
|
|
|
|
sloMetrics := GetGlobalSLOMetrics()
|
|
|
|
|
|
sloMetrics.SetDBConnections(
|
|
|
|
|
|
float64(stats.InUse),
|
|
|
|
|
|
float64(stats.MaxOpenConnections),
|
|
|
|
|
|
)
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// ReadinessHandler 就绪检查 Handler(检查所有依赖)
|
feat: backend core - auth, user, role, permission, device, webhook, monitoring, cache, repository, service, middleware, API handlers
2026-04-02 11:19:50 +08:00
|
|
|
|
func (h *HealthCheck) ReadinessHandler(c *gin.Context) {
|
|
|
|
|
|
status := h.Check()
|
|
|
|
|
|
|
|
|
|
|
|
httpStatus := http.StatusOK
|
2026-04-07 12:08:16 +08:00
|
|
|
|
if status.Status == HealthStatusDOWN {
|
feat: backend core - auth, user, role, permission, device, webhook, monitoring, cache, repository, service, middleware, API handlers
2026-04-02 11:19:50 +08:00
|
|
|
|
httpStatus = http.StatusServiceUnavailable
|
2026-04-07 12:08:16 +08:00
|
|
|
|
} else if status.Status == HealthStatusDEGRADED {
|
|
|
|
|
|
// DEGRADED 仍返回 200,但在响应体中标注
|
|
|
|
|
|
httpStatus = http.StatusOK
|
feat: backend core - auth, user, role, permission, device, webhook, monitoring, cache, repository, service, middleware, API handlers
2026-04-02 11:19:50 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
c.JSON(httpStatus, status)
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-04-07 12:08:16 +08:00
|
|
|
|
// LivenessHandler 存活检查 Handler(只检查进程存活,不检查依赖)
|
|
|
|
|
|
// 返回 204 No Content:进程存活,不需要响应体(节省 k8s probe 开销)
|
feat: backend core - auth, user, role, permission, device, webhook, monitoring, cache, repository, service, middleware, API handlers
2026-04-02 11:19:50 +08:00
|
|
|
|
func (h *HealthCheck) LivenessHandler(c *gin.Context) {
|
2026-04-07 12:08:16 +08:00
|
|
|
|
c.AbortWithStatus(http.StatusNoContent)
|
feat: backend core - auth, user, role, permission, device, webhook, monitoring, cache, repository, service, middleware, API handlers
2026-04-02 11:19:50 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
2026-04-07 12:08:16 +08:00
|
|
|
|
// Handler 兼容旧 /health 端点
|
feat: backend core - auth, user, role, permission, device, webhook, monitoring, cache, repository, service, middleware, API handlers
2026-04-02 11:19:50 +08:00
|
|
|
|
func (h *HealthCheck) Handler(c *gin.Context) {
|
|
|
|
|
|
h.ReadinessHandler(c)
|
|
|
|
|
|
}
|
2026-04-07 12:08:16 +08:00
|
|
|
|
|
|
|
|
|
|
func formatLatency(d time.Duration) string {
|
|
|
|
|
|
if d < time.Millisecond {
|
|
|
|
|
|
return "< 1ms"
|
|
|
|
|
|
}
|
|
|
|
|
|
return d.Round(time.Millisecond).String()
|
|
|
|
|
|
}
|