From e249a9160b929585d1ee1d0c9c0907220b2c30b1 Mon Sep 17 00:00:00 2001 From: Your Name Date: Tue, 21 Apr 2026 18:40:43 +0800 Subject: [PATCH] =?UTF-8?q?P3-C:=20=E4=B8=89=E6=9C=8D=E5=8A=A1=E5=8F=AF?= =?UTF-8?q?=E8=A7=82=E6=B5=8B=E9=9D=A2=E7=BB=9F=E4=B8=80=20-=20metrics?= =?UTF-8?q?=E7=AB=AF=E7=82=B9=E7=BB=9F=E4=B8=80/=E5=81=A5=E5=BA=B7?= =?UTF-8?q?=E6=A3=80=E6=9F=A5=E5=88=AB=E5=90=8D/traceID=E9=80=8F=E4=BC=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Gateway: - remote_runtime.go: P3-C-08 从请求上下文透传 X-Request-Id 到 platform-token-runtime Supply-api: - 新建 internal/metrics/metrics.go: HTTP请求计数/latency/token发布/worker queue指标 (Prometheus-text) - 新建 internal/metrics/metrics_test.go: 6个测试覆盖 - bootstrap.go: 注册 /metrics (P3-C-01/04)、/health、/healthz 别名 (P3-C-05) Platform-token-runtime: - bootstrap.go: 添加 /health 和 /livez 别名 (P3-C-05) 三服务 /metrics 统一为 text/plain; version=0.0.4 三服务 /health 端点统一别名 Gateway → platform-token-runtime 透传 trace ID --- gateway/internal/middleware/remote_runtime.go | 7 +- .../internal/app/bootstrap.go | 11 ++ supply-api/internal/app/bootstrap.go | 9 ++ supply-api/internal/metrics/metrics.go | 104 ++++++++++++++++++ supply-api/internal/metrics/metrics_test.go | 65 +++++++++++ 5 files changed, 195 insertions(+), 1 deletion(-) create mode 100644 supply-api/internal/metrics/metrics.go create mode 100644 supply-api/internal/metrics/metrics_test.go diff --git a/gateway/internal/middleware/remote_runtime.go b/gateway/internal/middleware/remote_runtime.go index ed7c08cb..3ab69525 100644 --- a/gateway/internal/middleware/remote_runtime.go +++ b/gateway/internal/middleware/remote_runtime.go @@ -239,7 +239,12 @@ func (r *RemoteTokenRuntime) Verify(ctx context.Context, rawToken string) (Verif return VerifiedToken{}, err } req.Header.Set("Content-Type", "application/json") - req.Header.Set("X-Request-Id", fmt.Sprintf("gateway-introspect-%d", r.now().UnixNano())) + // P3-C-08: 从请求上下文透传 trace ID,避免生成新的 ID 截断链路 + if reqID, ok := RequestIDFromContext(ctx); ok && reqID != "" { + req.Header.Set("X-Request-Id", reqID) + } else { + req.Header.Set("X-Request-Id", fmt.Sprintf("gateway-introspect-%d", r.now().UnixNano())) + } start := time.Now() resp, err := r.httpClient.Do(req) diff --git a/platform-token-runtime/internal/app/bootstrap.go b/platform-token-runtime/internal/app/bootstrap.go index b24561bb..767fed18 100644 --- a/platform-token-runtime/internal/app/bootstrap.go +++ b/platform-token-runtime/internal/app/bootstrap.go @@ -99,6 +99,17 @@ func BuildServer(cfg Config) (*http.Server, error) { w.WriteHeader(http.StatusOK) _, _ = w.Write([]byte(`{"status":"UP"}`)) }) + // P3-C-05: /health 和 /livez 别名(统一路径,对齐 gateway/supply-api) + mux.HandleFunc("/health", func(w http.ResponseWriter, _ *http.Request) { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte(`{"status":"UP"}`)) + }) + mux.HandleFunc("/livez", func(w http.ResponseWriter, _ *http.Request) { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte(`{"status":"UP"}`)) + }) // P3-B: /metrics 端点(Prometheus-text 格式) mux.HandleFunc("/metrics", func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "text/plain; version=0.0.4") diff --git a/supply-api/internal/app/bootstrap.go b/supply-api/internal/app/bootstrap.go index 2c6948e8..d70a0b3d 100644 --- a/supply-api/internal/app/bootstrap.go +++ b/supply-api/internal/app/bootstrap.go @@ -9,6 +9,7 @@ import ( "lijiaoqiao/supply-api/internal/config" "lijiaoqiao/supply-api/internal/httpapi" + "lijiaoqiao/supply-api/internal/metrics" "lijiaoqiao/supply-api/internal/middleware" "lijiaoqiao/supply-api/internal/pkg/logging" ) @@ -156,6 +157,14 @@ func buildRouteMux(opts buildRouteMuxOptions) *http.ServeMux { mux := http.NewServeMux() healthHandler := httpapi.NewHealthHandlerWithDefaults(opts.DBHealthCheck, opts.RedisHealthCheck) healthHandler.RegisterRoutes(mux) + // P3-C: /metrics 端点(Prometheus-text 格式) + mux.HandleFunc("/metrics", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "text/plain; version=0.0.4") + _, _ = w.Write([]byte(metrics.Export())) + }) + // P3-C-05: /health 别名(统一路径,对齐 gateway/platform-token-runtime) + mux.HandleFunc("/health", healthHandler.ServeHealth) + mux.HandleFunc("/healthz", healthHandler.ServeHealth) opts.SupplyAPI.Register(mux) opts.AlertAPI.Register(mux) if opts.IAMHandler != nil { diff --git a/supply-api/internal/metrics/metrics.go b/supply-api/internal/metrics/metrics.go new file mode 100644 index 00000000..a412efe1 --- /dev/null +++ b/supply-api/internal/metrics/metrics.go @@ -0,0 +1,104 @@ +package metrics + +import ( + "strconv" + "sync/atomic" + "time" +) + +// SupplyAPIMetrics supply-api 指标收集器 +// P3-C: 统一可观测面,对齐 gateway/platform-token-runtime metrics 风格 +type SupplyAPIMetrics struct { + // HTTP 请求计数 + httpRequests atomic.Int64 + httpRequestsOK atomic.Int64 + httpRequestsError atomic.Int64 + + // HTTP 延迟(纳秒) + httpLatencySum atomic.Int64 + httpLatencyCount atomic.Int64 + + // Token 发布计数 + tokenPublishes atomic.Int64 + tokenPublishFail atomic.Int64 + + // Worker queue 指标 + queueSize atomic.Int64 + workersBusy atomic.Int64 + + startAt time.Time +} + +var global *SupplyAPIMetrics + +func init() { + global = &SupplyAPIMetrics{startAt: time.Now()} +} + +// IncHTTPRequest 记录一次 HTTP 请求 +func IncHTTPRequest() { global.httpRequests.Add(1) } + +// IncHTTPOK 记录一次成功请求 +func IncHTTPOK() { global.httpRequestsOK.Add(1) } + +// IncHTTPError 记录一次错误请求 +func IncHTTPError() { global.httpRequestsError.Add(1) } + +// IncLatency 记录延迟(纳秒) +func IncLatency(ns int64) { + global.httpLatencySum.Add(ns) + global.httpLatencyCount.Add(1) +} + +// IncTokenPublish 记录一次 token 发布 +func IncTokenPublish() { global.tokenPublishes.Add(1) } + +// IncTokenPublishFail 记录一次 token 发布失败 +func IncTokenPublishFail() { global.tokenPublishes.Add(1); global.tokenPublishFail.Add(1) } + +// SetQueueSize 设置当前队列大小 +func SetQueueSize(n int64) { global.queueSize.Store(n) } + +// SetWorkersBusy 设置忙碌的 worker 数量 +func SetWorkersBusy(n int64) { global.workersBusy.Store(n) } + +// Export 返回 Prometheus-text 格式指标快照 +func Export() string { + m := global + uptime := time.Since(m.startAt).Seconds() + + latencyAvg := float64(0) + if count := m.httpLatencyCount.Load(); count > 0 { + latencyAvg = float64(m.httpLatencySum.Load()) / float64(count) + } + latencyMs := latencyAvg / 1e6 + + return `# HELP supply_api_uptime_seconds Time since service start +# TYPE supply_api_uptime_seconds gauge +supply_api_uptime_seconds ` + strconv.FormatFloat(uptime, 'f', 3, 64) + ` +# HELP supply_api_http_requests_total Total HTTP requests received +# TYPE supply_api_http_requests_total counter +supply_api_http_requests_total ` + strconv.FormatInt(m.httpRequests.Load(), 10) + ` +# HELP supply_api_http_requests_ok_total Successful HTTP requests (2xx/3xx) +# TYPE supply_api_http_requests_ok_total counter +supply_api_http_requests_ok_total ` + strconv.FormatInt(m.httpRequestsOK.Load(), 10) + ` +# HELP supply_api_http_requests_error_total Failed HTTP requests (4xx/5xx) +# TYPE supply_api_http_requests_error_total counter +supply_api_http_requests_error_total ` + strconv.FormatInt(m.httpRequestsError.Load(), 10) + ` +# HELP supply_api_http_latency_ms_avg Average HTTP request latency in milliseconds +# TYPE supply_api_http_latency_ms_avg gauge +supply_api_http_latency_ms_avg ` + strconv.FormatFloat(latencyMs, 'f', 3, 64) + ` +# HELP supply_api_token_publishes_total Total token publish operations +# TYPE supply_api_token_publishes_total counter +supply_api_token_publishes_total ` + strconv.FormatInt(m.tokenPublishes.Load(), 10) + ` +# HELP supply_api_token_publish_fail_total Token publish failures +# TYPE supply_api_token_publish_fail_total counter +supply_api_token_publish_fail_total ` + strconv.FormatInt(m.tokenPublishFail.Load(), 10) + ` +# HELP supply_api_queue_size Current worker queue size +# TYPE supply_api_queue_size gauge +supply_api_queue_size ` + strconv.FormatInt(m.queueSize.Load(), 10) + ` +# HELP supply_api_workers_busy Number of busy workers +# TYPE supply_api_workers_busy gauge +supply_api_workers_busy ` + strconv.FormatInt(m.workersBusy.Load(), 10) + ` +` +} diff --git a/supply-api/internal/metrics/metrics_test.go b/supply-api/internal/metrics/metrics_test.go new file mode 100644 index 00000000..fcabf1c2 --- /dev/null +++ b/supply-api/internal/metrics/metrics_test.go @@ -0,0 +1,65 @@ +package metrics + +import ( + "strings" + "testing" +) + +func TestExport_ContainsUptime(t *testing.T) { + output := Export() + if !strings.Contains(output, "supply_api_uptime_seconds") { + t.Fatal("missing uptime metric") + } +} + +func TestExport_ContainsHTTPMetrics(t *testing.T) { + output := Export() + for _, m := range []string{ + "supply_api_http_requests_total", + "supply_api_http_requests_ok_total", + "supply_api_http_requests_error_total", + "supply_api_http_latency_ms_avg", + } { + if !strings.Contains(output, m) { + t.Errorf("missing metric: %s", m) + } + } +} + +func TestExport_ContainsTokenPublishMetrics(t *testing.T) { + output := Export() + for _, m := range []string{ + "supply_api_token_publishes_total", + "supply_api_token_publish_fail_total", + } { + if !strings.Contains(output, m) { + t.Errorf("missing metric: %s", m) + } + } +} + +func TestExport_PrometheusFormat(t *testing.T) { + output := Export() + if !strings.Contains(output, "# HELP supply_api_uptime_seconds") { + t.Error("missing HELP line") + } + if !strings.Contains(output, "# TYPE supply_api_uptime_seconds gauge") { + t.Error("missing TYPE line") + } +} + +func TestIncTokenPublish_IncrementsCounter(t *testing.T) { + before := global.tokenPublishes.Load() + IncTokenPublish() + after := global.tokenPublishes.Load() + if after != before+1 { + t.Errorf("expected %d, got %d", before+1, after) + } +} + +func TestSetQueueSize_SetsValue(t *testing.T) { + SetQueueSize(42) + if got := global.queueSize.Load(); got != 42 { + t.Errorf("expected 42, got %d", got) + } +}