refactor: 整理项目根目录结构
整理内容:
- 删除 60+ 临时测试输出文件 (*.txt)
- 移动二进制文件到 bin/ 目录
- 移动 Shell 脚本到 scripts/ 目录
- scripts/dev/: check_gitea.sh, check_sub2api.sh, run_tests.sh
- scripts/deploy/: deploy_*.sh, simple_deploy.sh
- scripts/ops/: fix_nginx.sh, fix_ssl.sh, install_docker.sh
- scripts/test/: test_*.sh, test_*.bat
- 移动批处理文件到 scripts/
- 移动 Python 脚本到 tools/
- 清理临时日志文件
保留根目录必要文件:
- go.mod, go.sum, go.work
- Makefile, docker-compose.yml
- .env.example, .gitignore
- README.md, AGENTS.md, DEPLOY_GUIDE.md
验证: go build ./... && go test ./... 通过
2026-04-07 18:10:36 +08:00
|
|
|
|
package monitoring
|
|
|
|
|
|
|
|
|
|
|
|
import (
|
|
|
|
|
|
"sync"
|
|
|
|
|
|
|
|
|
|
|
|
"github.com/prometheus/client_golang/prometheus"
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
// SLOMetrics 服务级别目标(SLO)相关指标
|
|
|
|
|
|
// 这些指标是 SLO 测量的基础,用于计算错误预算燃烧率
|
|
|
|
|
|
type SLOMetrics struct {
|
|
|
|
|
|
// 缓存命中统计(alerts.yml 引用但原来未定义)
|
test: add comprehensive test coverage and improve code quality
- Add new test files for auth, service, and handler modules
- Improve test organization and coverage
- Refactor code for better maintainability
- Add captcha, settings, stats, and theme handler tests
- Add auth module tests (CAS, OAuth, password, SSO, state)
- Add service layer tests for auth, export, permissions, roles
- All Go tests pass (exit code 0)
- All frontend tests pass (325 tests in 59 files)
2026-04-17 20:43:50 +08:00
|
|
|
|
CacheHitsTotal *prometheus.CounterVec
|
refactor: 整理项目根目录结构
整理内容:
- 删除 60+ 临时测试输出文件 (*.txt)
- 移动二进制文件到 bin/ 目录
- 移动 Shell 脚本到 scripts/ 目录
- scripts/dev/: check_gitea.sh, check_sub2api.sh, run_tests.sh
- scripts/deploy/: deploy_*.sh, simple_deploy.sh
- scripts/ops/: fix_nginx.sh, fix_ssl.sh, install_docker.sh
- scripts/test/: test_*.sh, test_*.bat
- 移动批处理文件到 scripts/
- 移动 Python 脚本到 tools/
- 清理临时日志文件
保留根目录必要文件:
- go.mod, go.sum, go.work
- Makefile, docker-compose.yml
- .env.example, .gitignore
- README.md, AGENTS.md, DEPLOY_GUIDE.md
验证: go build ./... && go test ./... 通过
2026-04-07 18:10:36 +08:00
|
|
|
|
CacheOperationsTotal *prometheus.CounterVec
|
|
|
|
|
|
|
|
|
|
|
|
// 数据库连接池状态(alerts.yml 引用但原来未定义)
|
|
|
|
|
|
DBConnectionsActive prometheus.Gauge
|
|
|
|
|
|
DBConnectionsMax prometheus.Gauge
|
|
|
|
|
|
|
|
|
|
|
|
// Token 操作
|
|
|
|
|
|
TokenRefreshTotal *prometheus.CounterVec
|
|
|
|
|
|
|
|
|
|
|
|
// 账号安全事件
|
test: add comprehensive test coverage and improve code quality
- Add new test files for auth, service, and handler modules
- Improve test organization and coverage
- Refactor code for better maintainability
- Add captcha, settings, stats, and theme handler tests
- Add auth module tests (CAS, OAuth, password, SSO, state)
- Add service layer tests for auth, export, permissions, roles
- All Go tests pass (exit code 0)
- All frontend tests pass (325 tests in 59 files)
2026-04-17 20:43:50 +08:00
|
|
|
|
AccountLockTotal prometheus.Counter
|
|
|
|
|
|
AnomalyDetectedTotal *prometheus.CounterVec
|
refactor: 整理项目根目录结构
整理内容:
- 删除 60+ 临时测试输出文件 (*.txt)
- 移动二进制文件到 bin/ 目录
- 移动 Shell 脚本到 scripts/ 目录
- scripts/dev/: check_gitea.sh, check_sub2api.sh, run_tests.sh
- scripts/deploy/: deploy_*.sh, simple_deploy.sh
- scripts/ops/: fix_nginx.sh, fix_ssl.sh, install_docker.sh
- scripts/test/: test_*.sh, test_*.bat
- 移动批处理文件到 scripts/
- 移动 Python 脚本到 tools/
- 清理临时日志文件
保留根目录必要文件:
- go.mod, go.sum, go.work
- Makefile, docker-compose.yml
- .env.example, .gitignore
- README.md, AGENTS.md, DEPLOY_GUIDE.md
验证: go build ./... && go test ./... 通过
2026-04-07 18:10:36 +08:00
|
|
|
|
|
|
|
|
|
|
// 错误预算燃烧率(可选,用于自定义仪表盘)
|
|
|
|
|
|
ErrorBudgetBurnRate *prometheus.GaugeVec
|
|
|
|
|
|
|
|
|
|
|
|
registry *prometheus.Registry
|
|
|
|
|
|
once sync.Once
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
var (
|
|
|
|
|
|
globalSLOMetrics *SLOMetrics
|
|
|
|
|
|
globalSLOMetricsOnce sync.Once
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
// NewSLOMetrics 创建 SLO 指标实例(使用独立 registry 避免测试冲突)
|
|
|
|
|
|
func NewSLOMetrics() *SLOMetrics {
|
|
|
|
|
|
reg := prometheus.NewRegistry()
|
|
|
|
|
|
m := &SLOMetrics{registry: reg}
|
|
|
|
|
|
|
|
|
|
|
|
m.CacheHitsTotal = prometheus.NewCounterVec(
|
|
|
|
|
|
prometheus.CounterOpts{
|
|
|
|
|
|
Name: "cache_hits_total",
|
|
|
|
|
|
Help: "Total number of cache hits",
|
|
|
|
|
|
},
|
|
|
|
|
|
[]string{"level", "operation"}, // level: l1/l2, operation: get/set
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
m.CacheOperationsTotal = prometheus.NewCounterVec(
|
|
|
|
|
|
prometheus.CounterOpts{
|
|
|
|
|
|
Name: "cache_operations_total",
|
|
|
|
|
|
Help: "Total number of cache operations",
|
|
|
|
|
|
},
|
|
|
|
|
|
[]string{"level", "operation"},
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
m.DBConnectionsActive = prometheus.NewGauge(
|
|
|
|
|
|
prometheus.GaugeOpts{
|
|
|
|
|
|
Name: "db_connections_active",
|
|
|
|
|
|
Help: "Number of active database connections",
|
|
|
|
|
|
},
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
m.DBConnectionsMax = prometheus.NewGauge(
|
|
|
|
|
|
prometheus.GaugeOpts{
|
|
|
|
|
|
Name: "db_connections_max",
|
|
|
|
|
|
Help: "Maximum number of database connections configured",
|
|
|
|
|
|
},
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
m.TokenRefreshTotal = prometheus.NewCounterVec(
|
|
|
|
|
|
prometheus.CounterOpts{
|
|
|
|
|
|
Name: "token_refresh_total",
|
|
|
|
|
|
Help: "Total number of token refresh attempts",
|
|
|
|
|
|
},
|
|
|
|
|
|
[]string{"status"}, // success/failure/rate_limited
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
m.AccountLockTotal = prometheus.NewCounter(
|
|
|
|
|
|
prometheus.CounterOpts{
|
|
|
|
|
|
Name: "account_lock_total",
|
|
|
|
|
|
Help: "Total number of account lockout events due to failed login attempts",
|
|
|
|
|
|
},
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
m.AnomalyDetectedTotal = prometheus.NewCounterVec(
|
|
|
|
|
|
prometheus.CounterOpts{
|
|
|
|
|
|
Name: "anomaly_detected_total",
|
|
|
|
|
|
Help: "Total number of anomaly login detections",
|
|
|
|
|
|
},
|
|
|
|
|
|
[]string{"type"}, // geo_anomaly/device_anomaly/brute_force/suspicious_ip
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
m.ErrorBudgetBurnRate = prometheus.NewGaugeVec(
|
|
|
|
|
|
prometheus.GaugeOpts{
|
|
|
|
|
|
Name: "error_budget_burn_rate",
|
|
|
|
|
|
Help: "Current error budget burn rate multiplier (1.0 = nominal consumption)",
|
|
|
|
|
|
},
|
|
|
|
|
|
[]string{"slo"}, // api-availability/api-latency/login-success-rate
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
reg.MustRegister(
|
|
|
|
|
|
m.CacheHitsTotal,
|
|
|
|
|
|
m.CacheOperationsTotal,
|
|
|
|
|
|
m.DBConnectionsActive,
|
|
|
|
|
|
m.DBConnectionsMax,
|
|
|
|
|
|
m.TokenRefreshTotal,
|
|
|
|
|
|
m.AccountLockTotal,
|
|
|
|
|
|
m.AnomalyDetectedTotal,
|
|
|
|
|
|
m.ErrorBudgetBurnRate,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
return m
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// GetGlobalSLOMetrics 获取全局 SLO 指标单例(生产使用)
|
|
|
|
|
|
func GetGlobalSLOMetrics() *SLOMetrics {
|
|
|
|
|
|
globalSLOMetricsOnce.Do(func() {
|
|
|
|
|
|
m := NewSLOMetrics()
|
|
|
|
|
|
// 注册到默认 registry 以便 /metrics 端点暴露
|
|
|
|
|
|
prometheus.DefaultRegisterer.Register(m.CacheHitsTotal) //nolint:errcheck
|
|
|
|
|
|
prometheus.DefaultRegisterer.Register(m.CacheOperationsTotal) //nolint:errcheck
|
|
|
|
|
|
prometheus.DefaultRegisterer.Register(m.DBConnectionsActive) //nolint:errcheck
|
|
|
|
|
|
prometheus.DefaultRegisterer.Register(m.DBConnectionsMax) //nolint:errcheck
|
|
|
|
|
|
prometheus.DefaultRegisterer.Register(m.TokenRefreshTotal) //nolint:errcheck
|
|
|
|
|
|
prometheus.DefaultRegisterer.Register(m.AccountLockTotal) //nolint:errcheck
|
|
|
|
|
|
prometheus.DefaultRegisterer.Register(m.AnomalyDetectedTotal) //nolint:errcheck
|
|
|
|
|
|
prometheus.DefaultRegisterer.Register(m.ErrorBudgetBurnRate) //nolint:errcheck
|
|
|
|
|
|
globalSLOMetrics = m
|
|
|
|
|
|
})
|
|
|
|
|
|
return globalSLOMetrics
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// GetRegistry 获取私有 registry(测试使用)
|
|
|
|
|
|
func (m *SLOMetrics) GetRegistry() *prometheus.Registry {
|
|
|
|
|
|
return m.registry
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// RecordCacheHit 记录缓存命中
|
|
|
|
|
|
func (m *SLOMetrics) RecordCacheHit(level, operation string) {
|
|
|
|
|
|
m.CacheHitsTotal.WithLabelValues(level, operation).Inc()
|
|
|
|
|
|
m.CacheOperationsTotal.WithLabelValues(level, operation).Inc()
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// RecordCacheMiss 记录缓存未命中
|
|
|
|
|
|
func (m *SLOMetrics) RecordCacheMiss(level, operation string) {
|
|
|
|
|
|
m.CacheOperationsTotal.WithLabelValues(level, operation).Inc()
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// RecordTokenRefresh 记录 Token 刷新操作
|
|
|
|
|
|
func (m *SLOMetrics) RecordTokenRefresh(status string) {
|
|
|
|
|
|
m.TokenRefreshTotal.WithLabelValues(status).Inc()
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// RecordAccountLock 记录账号锁定事件
|
|
|
|
|
|
func (m *SLOMetrics) RecordAccountLock() {
|
|
|
|
|
|
m.AccountLockTotal.Inc()
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// RecordAnomaly 记录异常检测事件
|
|
|
|
|
|
func (m *SLOMetrics) RecordAnomaly(anomalyType string) {
|
|
|
|
|
|
m.AnomalyDetectedTotal.WithLabelValues(anomalyType).Inc()
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// SetDBConnections 更新数据库连接池状态
|
|
|
|
|
|
func (m *SLOMetrics) SetDBConnections(active, max float64) {
|
|
|
|
|
|
m.DBConnectionsActive.Set(active)
|
|
|
|
|
|
m.DBConnectionsMax.Set(max)
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// SetErrorBudgetBurnRate 设置错误预算燃烧率
|
|
|
|
|
|
func (m *SLOMetrics) SetErrorBudgetBurnRate(slo string, burnRate float64) {
|
|
|
|
|
|
m.ErrorBudgetBurnRate.WithLabelValues(slo).Set(burnRate)
|
|
|
|
|
|
}
|