- Remove old review reports (keep latest only) - Move docs/ to deploy/docs-backup/ - Move performance-testing/ to deploy/ - Clean up test output files - Organize root directory
205 lines
5.7 KiB
Go
205 lines
5.7 KiB
Go
package admin
|
|
|
|
import (
|
|
"crypto/subtle"
|
|
"log/slog"
|
|
"net/http"
|
|
"os"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/Wei-Shaw/sub2api/internal/pkg/response"
|
|
"github.com/Wei-Shaw/sub2api/internal/service"
|
|
"github.com/gin-gonic/gin"
|
|
)
|
|
|
|
// ============================================================
|
|
// Alertmanager Webhook Handler
|
|
//
|
|
// 将 Prometheus Alertmanager 告警桥接到现有 ops_alert_events 表,
|
|
// 复用现有的邮件通知、静默检查等基础设施。
|
|
//
|
|
// Alertmanager 中配置 webhook receiver 指向该接口:
|
|
//
|
|
// receivers:
|
|
// - name: 'ops-bridge'
|
|
// webhook_configs:
|
|
// - url: 'http://localhost:8080/admin/ops/prometheus-alerts'
|
|
// send_resolved: true
|
|
// http_config:
|
|
// bearer_token: '<INTERNAL_TOKEN>'
|
|
//
|
|
// ============================================================
|
|
|
|
// alertmanagerPayload is the JSON body sent by Alertmanager.
|
|
type alertmanagerPayload struct {
|
|
Receiver string `json:"receiver"`
|
|
Status string `json:"status"` // "firing" | "resolved"
|
|
Alerts []alertmanagerAlert `json:"alerts"`
|
|
|
|
GroupLabels map[string]string `json:"groupLabels"`
|
|
CommonLabels map[string]string `json:"commonLabels"`
|
|
CommonAnnotations map[string]string `json:"commonAnnotations"`
|
|
|
|
ExternalURL string `json:"externalURL"`
|
|
Version string `json:"version"`
|
|
}
|
|
|
|
type alertmanagerAlert struct {
|
|
Status string `json:"status"` // "firing" | "resolved"
|
|
|
|
Labels map[string]string `json:"labels"`
|
|
Annotations map[string]string `json:"annotations"`
|
|
|
|
StartsAt time.Time `json:"startsAt"`
|
|
EndsAt time.Time `json:"endsAt"`
|
|
GeneratorURL string `json:"generatorURL"`
|
|
Fingerprint string `json:"fingerprint"`
|
|
}
|
|
|
|
// PromAlertsBridgeHandler receives Alertmanager webhook and writes to ops_alert_events.
|
|
//
|
|
// POST /admin/ops/prometheus-alerts
|
|
//
|
|
// Security: This endpoint is NOT protected by JWT (it's called by Alertmanager).
|
|
// It validates the Authorization header against INTERNAL_WEBHOOK_TOKEN env var
|
|
// using constant-time comparison to prevent timing attacks.
|
|
func (h *OpsHandler) PromAlertsBridgeHandler(c *gin.Context) {
|
|
// Validate bearer token against INTERNAL_WEBHOOK_TOKEN env var
|
|
expectedToken := os.Getenv("INTERNAL_WEBHOOK_TOKEN")
|
|
if expectedToken == "" {
|
|
slog.Error("INTERNAL_WEBHOOK_TOKEN not configured, rejecting prometheus webhook")
|
|
response.Error(c, http.StatusServiceUnavailable, "webhook token not configured")
|
|
return
|
|
}
|
|
|
|
authHeader := c.GetHeader("Authorization")
|
|
const prefix = "Bearer "
|
|
if !strings.HasPrefix(authHeader, prefix) {
|
|
response.Error(c, http.StatusUnauthorized, "missing or invalid authorization header")
|
|
return
|
|
}
|
|
token := strings.TrimPrefix(authHeader, prefix)
|
|
|
|
// Constant-time comparison to prevent timing attacks
|
|
if subtle.ConstantTimeCompare([]byte(token), []byte(expectedToken)) != 1 {
|
|
slog.Warn("invalid webhook token received", "remote_ip", c.ClientIP())
|
|
response.Error(c, http.StatusUnauthorized, "invalid token")
|
|
return
|
|
}
|
|
|
|
var payload alertmanagerPayload
|
|
if err := c.ShouldBindJSON(&payload); err != nil {
|
|
response.Error(c, http.StatusBadRequest, "invalid payload: "+err.Error())
|
|
return
|
|
}
|
|
|
|
if len(payload.Alerts) == 0 {
|
|
c.Status(http.StatusNoContent)
|
|
return
|
|
}
|
|
|
|
var successCount, failCount int
|
|
|
|
for _, alert := range payload.Alerts {
|
|
event := convertAlertToEvent(alert)
|
|
if err := h.opsService.CreateExternalAlertEvent(c.Request.Context(), event); err != nil {
|
|
slog.Warn("failed to persist prometheus alert event",
|
|
"fingerprint", alert.Fingerprint,
|
|
"alertname", alert.Labels["alertname"],
|
|
"err", err,
|
|
)
|
|
failCount++
|
|
continue
|
|
}
|
|
successCount++
|
|
}
|
|
|
|
slog.Info("prometheus alerts bridged",
|
|
"total", len(payload.Alerts),
|
|
"success", successCount,
|
|
"failed", failCount,
|
|
)
|
|
|
|
c.JSON(http.StatusOK, gin.H{
|
|
"total": len(payload.Alerts),
|
|
"success": successCount,
|
|
"failed": failCount,
|
|
})
|
|
}
|
|
|
|
// convertAlertToEvent maps an Alertmanager alert to an OpsAlertEvent.
|
|
// We use a synthetic rule_id=0 to distinguish Prometheus-sourced events
|
|
// from native evaluator-sourced events.
|
|
func convertAlertToEvent(a alertmanagerAlert) *service.OpsAlertEvent {
|
|
// Map Alertmanager severity to internal severity levels
|
|
severity := mapSeverity(a.Labels["severity"])
|
|
|
|
// Map status
|
|
status := service.OpsAlertStatusFiring
|
|
if a.Status == "resolved" {
|
|
status = service.OpsAlertStatusResolved
|
|
}
|
|
|
|
title := a.Annotations["summary"]
|
|
if title == "" {
|
|
title = a.Labels["alertname"]
|
|
}
|
|
|
|
description := a.Annotations["description"]
|
|
if description == "" {
|
|
description = a.Annotations["message"]
|
|
}
|
|
|
|
firedAt := a.StartsAt
|
|
if firedAt.IsZero() {
|
|
firedAt = time.Now()
|
|
}
|
|
|
|
var resolvedAt *time.Time
|
|
if a.Status == "resolved" && !a.EndsAt.IsZero() {
|
|
t := a.EndsAt
|
|
resolvedAt = &t
|
|
}
|
|
|
|
// Collect useful label dimensions to store in Dimensions
|
|
dimensions := make(map[string]any)
|
|
for k, v := range a.Labels {
|
|
if k != "alertname" && k != "severity" {
|
|
dimensions[k] = v
|
|
}
|
|
}
|
|
// Add Prometheus-specific metadata
|
|
dimensions["source"] = "prometheus"
|
|
dimensions["fingerprint"] = a.Fingerprint
|
|
if runbook := a.Annotations["runbook_url"]; runbook != "" {
|
|
dimensions["runbook_url"] = runbook
|
|
}
|
|
|
|
return &service.OpsAlertEvent{
|
|
// RuleID = 0 indicates an externally-sourced alert (Prometheus)
|
|
RuleID: 0,
|
|
Severity: severity,
|
|
Status: status,
|
|
Title: title,
|
|
Description: description,
|
|
Dimensions: dimensions,
|
|
FiredAt: firedAt,
|
|
ResolvedAt: resolvedAt,
|
|
}
|
|
}
|
|
|
|
// mapSeverity converts Prometheus severity labels to internal levels.
|
|
func mapSeverity(s string) string {
|
|
switch strings.ToLower(s) {
|
|
case "critical":
|
|
return "P0"
|
|
case "high":
|
|
return "P1"
|
|
case "warning", "medium":
|
|
return "P2"
|
|
default:
|
|
return "P3"
|
|
}
|
|
}
|