Files
user-system/internal/monitoring/health.go
long-agent 582ad7a069 test: add comprehensive test coverage and improve code quality
- Add new test files for auth, service, and handler modules
- Improve test organization and coverage
- Refactor code for better maintainability
- Add captcha, settings, stats, and theme handler tests
- Add auth module tests (CAS, OAuth, password, SSO, state)
- Add service layer tests for auth, export, permissions, roles
- All Go tests pass (exit code 0)
- All frontend tests pass (325 tests in 59 files)
2026-04-17 20:43:50 +08:00

208 lines
5.1 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package monitoring
import (
"context"
"database/sql"
"net/http"
"time"
"github.com/gin-gonic/gin"
"gorm.io/gorm"
)
// HealthStatus 健康状态
type HealthStatus string
const (
HealthStatusUP HealthStatus = "UP"
HealthStatusDOWN HealthStatus = "DOWN"
HealthStatusDEGRADED HealthStatus = "DEGRADED"
HealthStatusUNKNOWN HealthStatus = "UNKNOWN"
)
// HealthCheck 健康检查器(增强版,支持 Redis 检查)
type HealthCheck struct {
db *gorm.DB
redisClient RedisChecker
startTime time.Time
}
// RedisChecker Redis 健康检查接口(避免直接依赖 Redis 包)
type RedisChecker interface {
Ping(ctx context.Context) error
}
// Status 健康状态
type Status struct {
Status HealthStatus `json:"status"`
Checks map[string]CheckResult `json:"checks"`
Uptime string `json:"uptime,omitempty"`
Timestamp string `json:"timestamp"`
}
// CheckResult 检查结果
type CheckResult struct {
Status HealthStatus `json:"status"`
Error string `json:"error,omitempty"`
Latency string `json:"latency_ms,omitempty"`
}
// NewHealthCheck 创建健康检查器
func NewHealthCheck(db *gorm.DB) *HealthCheck {
return &HealthCheck{
db: db,
startTime: time.Now(),
}
}
// WithRedis 注入 Redis 检查器(可选)
func (h *HealthCheck) WithRedis(r RedisChecker) *HealthCheck {
h.redisClient = r
return h
}
// Check 执行完整健康检查
func (h *HealthCheck) Check() *Status {
status := &Status{
Status: HealthStatusUP,
Checks: make(map[string]CheckResult),
Timestamp: time.Now().UTC().Format(time.RFC3339),
}
if h.startTime != (time.Time{}) {
status.Uptime = time.Since(h.startTime).Round(time.Second).String()
}
// 检查数据库强依赖DOWN 则服务 DOWN
dbResult := h.checkDatabase()
status.Checks["database"] = dbResult
if dbResult.Status == HealthStatusDOWN {
status.Status = HealthStatusDOWN
}
// 检查 Redis弱依赖DOWN 则服务 DEGRADED不影响主功能
if h.redisClient != nil {
redisResult := h.checkRedis()
status.Checks["redis"] = redisResult
if redisResult.Status == HealthStatusDOWN && status.Status == HealthStatusUP {
status.Status = HealthStatusDEGRADED
}
}
return status
}
// LivenessCheck 存活检查(只检查进程是否运行,不检查依赖)
func (h *HealthCheck) LivenessCheck() *Status {
return &Status{
Status: HealthStatusUP,
Checks: map[string]CheckResult{},
Timestamp: time.Now().UTC().Format(time.RFC3339),
}
}
// checkDatabase 检查数据库连接
func (h *HealthCheck) checkDatabase() CheckResult {
if h == nil || h.db == nil {
return CheckResult{
Status: HealthStatusDOWN,
Error: "database not configured",
}
}
start := time.Now()
sqlDB, err := h.db.DB()
if err != nil {
return CheckResult{
Status: HealthStatusDOWN,
Error: err.Error(),
}
}
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
defer cancel()
if err := sqlDB.PingContext(ctx); err != nil {
return CheckResult{
Status: HealthStatusDOWN,
Error: err.Error(),
Latency: formatLatency(time.Since(start)),
}
}
// 同时更新连接池指标
go h.updateDBConnectionMetrics(sqlDB)
return CheckResult{
Status: HealthStatusUP,
Latency: formatLatency(time.Since(start)),
}
}
// checkRedis 检查 Redis 连接
func (h *HealthCheck) checkRedis() CheckResult {
if h.redisClient == nil {
return CheckResult{Status: HealthStatusUNKNOWN}
}
start := time.Now()
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
defer cancel()
if err := h.redisClient.Ping(ctx); err != nil {
return CheckResult{
Status: HealthStatusDOWN,
Error: err.Error(),
Latency: formatLatency(time.Since(start)),
}
}
return CheckResult{
Status: HealthStatusUP,
Latency: formatLatency(time.Since(start)),
}
}
// updateDBConnectionMetrics 更新数据库连接池 Prometheus 指标
func (h *HealthCheck) updateDBConnectionMetrics(sqlDB *sql.DB) {
stats := sqlDB.Stats()
sloMetrics := GetGlobalSLOMetrics()
sloMetrics.SetDBConnections(
float64(stats.InUse),
float64(stats.MaxOpenConnections),
)
}
// ReadinessHandler 就绪检查 Handler检查所有依赖
func (h *HealthCheck) ReadinessHandler(c *gin.Context) {
status := h.Check()
httpStatus := http.StatusOK
if status.Status == HealthStatusDOWN {
httpStatus = http.StatusServiceUnavailable
} else if status.Status == HealthStatusDEGRADED {
// DEGRADED 仍返回 200但在响应体中标注
httpStatus = http.StatusOK
}
c.JSON(httpStatus, status)
}
// LivenessHandler 存活检查 Handler只检查进程存活不检查依赖
// 返回 204 No Content进程存活不需要响应体节省 k8s probe 开销)
func (h *HealthCheck) LivenessHandler(c *gin.Context) {
c.AbortWithStatus(http.StatusNoContent)
}
// Handler 兼容旧 /health 端点
func (h *HealthCheck) Handler(c *gin.Context) {
h.ReadinessHandler(c)
}
func formatLatency(d time.Duration) string {
if d < time.Millisecond {
return "< 1ms"
}
return d.Round(time.Millisecond).String()
}