P3-C: 三服务可观测面统一 - metrics端点统一/健康检查别名/traceID透传
Gateway: - remote_runtime.go: P3-C-08 从请求上下文透传 X-Request-Id 到 platform-token-runtime Supply-api: - 新建 internal/metrics/metrics.go: HTTP请求计数/latency/token发布/worker queue指标 (Prometheus-text) - 新建 internal/metrics/metrics_test.go: 6个测试覆盖 - bootstrap.go: 注册 /metrics (P3-C-01/04)、/health、/healthz 别名 (P3-C-05) Platform-token-runtime: - bootstrap.go: 添加 /health 和 /livez 别名 (P3-C-05) 三服务 /metrics 统一为 text/plain; version=0.0.4 三服务 /health 端点统一别名 Gateway → platform-token-runtime 透传 trace ID
This commit is contained in:
@@ -239,7 +239,12 @@ func (r *RemoteTokenRuntime) Verify(ctx context.Context, rawToken string) (Verif
|
||||
return VerifiedToken{}, err
|
||||
}
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
// P3-C-08: 从请求上下文透传 trace ID,避免生成新的 ID 截断链路
|
||||
if reqID, ok := RequestIDFromContext(ctx); ok && reqID != "" {
|
||||
req.Header.Set("X-Request-Id", reqID)
|
||||
} else {
|
||||
req.Header.Set("X-Request-Id", fmt.Sprintf("gateway-introspect-%d", r.now().UnixNano()))
|
||||
}
|
||||
|
||||
start := time.Now()
|
||||
resp, err := r.httpClient.Do(req)
|
||||
|
||||
@@ -99,6 +99,17 @@ func BuildServer(cfg Config) (*http.Server, error) {
|
||||
w.WriteHeader(http.StatusOK)
|
||||
_, _ = w.Write([]byte(`{"status":"UP"}`))
|
||||
})
|
||||
// P3-C-05: /health 和 /livez 别名(统一路径,对齐 gateway/supply-api)
|
||||
mux.HandleFunc("/health", func(w http.ResponseWriter, _ *http.Request) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.WriteHeader(http.StatusOK)
|
||||
_, _ = w.Write([]byte(`{"status":"UP"}`))
|
||||
})
|
||||
mux.HandleFunc("/livez", func(w http.ResponseWriter, _ *http.Request) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.WriteHeader(http.StatusOK)
|
||||
_, _ = w.Write([]byte(`{"status":"UP"}`))
|
||||
})
|
||||
// P3-B: /metrics 端点(Prometheus-text 格式)
|
||||
mux.HandleFunc("/metrics", func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "text/plain; version=0.0.4")
|
||||
|
||||
@@ -9,6 +9,7 @@ import (
|
||||
|
||||
"lijiaoqiao/supply-api/internal/config"
|
||||
"lijiaoqiao/supply-api/internal/httpapi"
|
||||
"lijiaoqiao/supply-api/internal/metrics"
|
||||
"lijiaoqiao/supply-api/internal/middleware"
|
||||
"lijiaoqiao/supply-api/internal/pkg/logging"
|
||||
)
|
||||
@@ -156,6 +157,14 @@ func buildRouteMux(opts buildRouteMuxOptions) *http.ServeMux {
|
||||
mux := http.NewServeMux()
|
||||
healthHandler := httpapi.NewHealthHandlerWithDefaults(opts.DBHealthCheck, opts.RedisHealthCheck)
|
||||
healthHandler.RegisterRoutes(mux)
|
||||
// P3-C: /metrics 端点(Prometheus-text 格式)
|
||||
mux.HandleFunc("/metrics", func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "text/plain; version=0.0.4")
|
||||
_, _ = w.Write([]byte(metrics.Export()))
|
||||
})
|
||||
// P3-C-05: /health 别名(统一路径,对齐 gateway/platform-token-runtime)
|
||||
mux.HandleFunc("/health", healthHandler.ServeHealth)
|
||||
mux.HandleFunc("/healthz", healthHandler.ServeHealth)
|
||||
opts.SupplyAPI.Register(mux)
|
||||
opts.AlertAPI.Register(mux)
|
||||
if opts.IAMHandler != nil {
|
||||
|
||||
104
supply-api/internal/metrics/metrics.go
Normal file
104
supply-api/internal/metrics/metrics.go
Normal file
@@ -0,0 +1,104 @@
|
||||
package metrics
|
||||
|
||||
import (
|
||||
"strconv"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
)
|
||||
|
||||
// SupplyAPIMetrics supply-api 指标收集器
|
||||
// P3-C: 统一可观测面,对齐 gateway/platform-token-runtime metrics 风格
|
||||
type SupplyAPIMetrics struct {
|
||||
// HTTP 请求计数
|
||||
httpRequests atomic.Int64
|
||||
httpRequestsOK atomic.Int64
|
||||
httpRequestsError atomic.Int64
|
||||
|
||||
// HTTP 延迟(纳秒)
|
||||
httpLatencySum atomic.Int64
|
||||
httpLatencyCount atomic.Int64
|
||||
|
||||
// Token 发布计数
|
||||
tokenPublishes atomic.Int64
|
||||
tokenPublishFail atomic.Int64
|
||||
|
||||
// Worker queue 指标
|
||||
queueSize atomic.Int64
|
||||
workersBusy atomic.Int64
|
||||
|
||||
startAt time.Time
|
||||
}
|
||||
|
||||
var global *SupplyAPIMetrics
|
||||
|
||||
func init() {
|
||||
global = &SupplyAPIMetrics{startAt: time.Now()}
|
||||
}
|
||||
|
||||
// IncHTTPRequest 记录一次 HTTP 请求
|
||||
func IncHTTPRequest() { global.httpRequests.Add(1) }
|
||||
|
||||
// IncHTTPOK 记录一次成功请求
|
||||
func IncHTTPOK() { global.httpRequestsOK.Add(1) }
|
||||
|
||||
// IncHTTPError 记录一次错误请求
|
||||
func IncHTTPError() { global.httpRequestsError.Add(1) }
|
||||
|
||||
// IncLatency 记录延迟(纳秒)
|
||||
func IncLatency(ns int64) {
|
||||
global.httpLatencySum.Add(ns)
|
||||
global.httpLatencyCount.Add(1)
|
||||
}
|
||||
|
||||
// IncTokenPublish 记录一次 token 发布
|
||||
func IncTokenPublish() { global.tokenPublishes.Add(1) }
|
||||
|
||||
// IncTokenPublishFail 记录一次 token 发布失败
|
||||
func IncTokenPublishFail() { global.tokenPublishes.Add(1); global.tokenPublishFail.Add(1) }
|
||||
|
||||
// SetQueueSize 设置当前队列大小
|
||||
func SetQueueSize(n int64) { global.queueSize.Store(n) }
|
||||
|
||||
// SetWorkersBusy 设置忙碌的 worker 数量
|
||||
func SetWorkersBusy(n int64) { global.workersBusy.Store(n) }
|
||||
|
||||
// Export 返回 Prometheus-text 格式指标快照
|
||||
func Export() string {
|
||||
m := global
|
||||
uptime := time.Since(m.startAt).Seconds()
|
||||
|
||||
latencyAvg := float64(0)
|
||||
if count := m.httpLatencyCount.Load(); count > 0 {
|
||||
latencyAvg = float64(m.httpLatencySum.Load()) / float64(count)
|
||||
}
|
||||
latencyMs := latencyAvg / 1e6
|
||||
|
||||
return `# HELP supply_api_uptime_seconds Time since service start
|
||||
# TYPE supply_api_uptime_seconds gauge
|
||||
supply_api_uptime_seconds ` + strconv.FormatFloat(uptime, 'f', 3, 64) + `
|
||||
# HELP supply_api_http_requests_total Total HTTP requests received
|
||||
# TYPE supply_api_http_requests_total counter
|
||||
supply_api_http_requests_total ` + strconv.FormatInt(m.httpRequests.Load(), 10) + `
|
||||
# HELP supply_api_http_requests_ok_total Successful HTTP requests (2xx/3xx)
|
||||
# TYPE supply_api_http_requests_ok_total counter
|
||||
supply_api_http_requests_ok_total ` + strconv.FormatInt(m.httpRequestsOK.Load(), 10) + `
|
||||
# HELP supply_api_http_requests_error_total Failed HTTP requests (4xx/5xx)
|
||||
# TYPE supply_api_http_requests_error_total counter
|
||||
supply_api_http_requests_error_total ` + strconv.FormatInt(m.httpRequestsError.Load(), 10) + `
|
||||
# HELP supply_api_http_latency_ms_avg Average HTTP request latency in milliseconds
|
||||
# TYPE supply_api_http_latency_ms_avg gauge
|
||||
supply_api_http_latency_ms_avg ` + strconv.FormatFloat(latencyMs, 'f', 3, 64) + `
|
||||
# HELP supply_api_token_publishes_total Total token publish operations
|
||||
# TYPE supply_api_token_publishes_total counter
|
||||
supply_api_token_publishes_total ` + strconv.FormatInt(m.tokenPublishes.Load(), 10) + `
|
||||
# HELP supply_api_token_publish_fail_total Token publish failures
|
||||
# TYPE supply_api_token_publish_fail_total counter
|
||||
supply_api_token_publish_fail_total ` + strconv.FormatInt(m.tokenPublishFail.Load(), 10) + `
|
||||
# HELP supply_api_queue_size Current worker queue size
|
||||
# TYPE supply_api_queue_size gauge
|
||||
supply_api_queue_size ` + strconv.FormatInt(m.queueSize.Load(), 10) + `
|
||||
# HELP supply_api_workers_busy Number of busy workers
|
||||
# TYPE supply_api_workers_busy gauge
|
||||
supply_api_workers_busy ` + strconv.FormatInt(m.workersBusy.Load(), 10) + `
|
||||
`
|
||||
}
|
||||
65
supply-api/internal/metrics/metrics_test.go
Normal file
65
supply-api/internal/metrics/metrics_test.go
Normal file
@@ -0,0 +1,65 @@
|
||||
package metrics
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestExport_ContainsUptime(t *testing.T) {
|
||||
output := Export()
|
||||
if !strings.Contains(output, "supply_api_uptime_seconds") {
|
||||
t.Fatal("missing uptime metric")
|
||||
}
|
||||
}
|
||||
|
||||
func TestExport_ContainsHTTPMetrics(t *testing.T) {
|
||||
output := Export()
|
||||
for _, m := range []string{
|
||||
"supply_api_http_requests_total",
|
||||
"supply_api_http_requests_ok_total",
|
||||
"supply_api_http_requests_error_total",
|
||||
"supply_api_http_latency_ms_avg",
|
||||
} {
|
||||
if !strings.Contains(output, m) {
|
||||
t.Errorf("missing metric: %s", m)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestExport_ContainsTokenPublishMetrics(t *testing.T) {
|
||||
output := Export()
|
||||
for _, m := range []string{
|
||||
"supply_api_token_publishes_total",
|
||||
"supply_api_token_publish_fail_total",
|
||||
} {
|
||||
if !strings.Contains(output, m) {
|
||||
t.Errorf("missing metric: %s", m)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestExport_PrometheusFormat(t *testing.T) {
|
||||
output := Export()
|
||||
if !strings.Contains(output, "# HELP supply_api_uptime_seconds") {
|
||||
t.Error("missing HELP line")
|
||||
}
|
||||
if !strings.Contains(output, "# TYPE supply_api_uptime_seconds gauge") {
|
||||
t.Error("missing TYPE line")
|
||||
}
|
||||
}
|
||||
|
||||
func TestIncTokenPublish_IncrementsCounter(t *testing.T) {
|
||||
before := global.tokenPublishes.Load()
|
||||
IncTokenPublish()
|
||||
after := global.tokenPublishes.Load()
|
||||
if after != before+1 {
|
||||
t.Errorf("expected %d, got %d", before+1, after)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSetQueueSize_SetsValue(t *testing.T) {
|
||||
SetQueueSize(42)
|
||||
if got := global.queueSize.Load(); got != 42 {
|
||||
t.Errorf("expected 42, got %d", got)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user