From e249a9160b929585d1ee1d0c9c0907220b2c30b1 Mon Sep 17 00:00:00 2001
From: Your Name <your.email@example.com>
Date: Tue, 21 Apr 2026 18:40:43 +0800
Subject: [PATCH] =?UTF-8?q?P3-C:=20=E4=B8=89=E6=9C=8D=E5=8A=A1=E5=8F=AF?=
 =?UTF-8?q?=E8=A7=82=E6=B5=8B=E9=9D=A2=E7=BB=9F=E4=B8=80=20-=20metrics?=
 =?UTF-8?q?=E7=AB=AF=E7=82=B9=E7=BB=9F=E4=B8=80/=E5=81=A5=E5=BA=B7?=
 =?UTF-8?q?=E6=A3=80=E6=9F=A5=E5=88=AB=E5=90=8D/traceID=E9=80=8F=E4=BC=A0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Gateway:
- remote_runtime.go: P3-C-08 从请求上下文透传 X-Request-Id 到 platform-token-runtime

Supply-api:
- 新建 internal/metrics/metrics.go: HTTP请求计数/latency/token发布/worker queue指标 (Prometheus-text)
- 新建 internal/metrics/metrics_test.go: 6个测试覆盖
- bootstrap.go: 注册 /metrics (P3-C-01/04)、/health、/healthz 别名 (P3-C-05)

Platform-token-runtime:
- bootstrap.go: 添加 /health 和 /livez 别名 (P3-C-05)

三服务 /metrics 统一为 text/plain; version=0.0.4
三服务 /health 端点统一别名
Gateway → platform-token-runtime 透传 trace ID
---
 gateway/internal/middleware/remote_runtime.go |   7 +-
 .../internal/app/bootstrap.go                 |  11 ++
 supply-api/internal/app/bootstrap.go          |   9 ++
 supply-api/internal/metrics/metrics.go        | 104 ++++++++++++++++++
 supply-api/internal/metrics/metrics_test.go   |  65 +++++++++++
 5 files changed, 195 insertions(+), 1 deletion(-)
 create mode 100644 supply-api/internal/metrics/metrics.go
 create mode 100644 supply-api/internal/metrics/metrics_test.go

diff --git a/gateway/internal/middleware/remote_runtime.go b/gateway/internal/middleware/remote_runtime.go
index ed7c08cb..3ab69525 100644
--- a/gateway/internal/middleware/remote_runtime.go
+++ b/gateway/internal/middleware/remote_runtime.go
@@ -239,7 +239,12 @@ func (r *RemoteTokenRuntime) Verify(ctx context.Context, rawToken string) (Verif
 		return VerifiedToken{}, err
 	}
 	req.Header.Set("Content-Type", "application/json")
-	req.Header.Set("X-Request-Id", fmt.Sprintf("gateway-introspect-%d", r.now().UnixNano()))
+	// P3-C-08: 从请求上下文透传 trace ID，避免生成新的 ID 截断链路
+	if reqID, ok := RequestIDFromContext(ctx); ok && reqID != "" {
+		req.Header.Set("X-Request-Id", reqID)
+	} else {
+		req.Header.Set("X-Request-Id", fmt.Sprintf("gateway-introspect-%d", r.now().UnixNano()))
+	}
 
 	start := time.Now()
 	resp, err := r.httpClient.Do(req)
diff --git a/platform-token-runtime/internal/app/bootstrap.go b/platform-token-runtime/internal/app/bootstrap.go
index b24561bb..767fed18 100644
--- a/platform-token-runtime/internal/app/bootstrap.go
+++ b/platform-token-runtime/internal/app/bootstrap.go
@@ -99,6 +99,17 @@ func BuildServer(cfg Config) (*http.Server, error) {
 		w.WriteHeader(http.StatusOK)
 		_, _ = w.Write([]byte(`{"status":"UP"}`))
 	})
+	// P3-C-05: /health 和 /livez 别名（统一路径，对齐 gateway/supply-api）
+	mux.HandleFunc("/health", func(w http.ResponseWriter, _ *http.Request) {
+		w.Header().Set("Content-Type", "application/json")
+		w.WriteHeader(http.StatusOK)
+		_, _ = w.Write([]byte(`{"status":"UP"}`))
+	})
+	mux.HandleFunc("/livez", func(w http.ResponseWriter, _ *http.Request) {
+		w.Header().Set("Content-Type", "application/json")
+		w.WriteHeader(http.StatusOK)
+		_, _ = w.Write([]byte(`{"status":"UP"}`))
+	})
 	// P3-B: /metrics 端点（Prometheus-text 格式）
 	mux.HandleFunc("/metrics", func(w http.ResponseWriter, r *http.Request) {
 		w.Header().Set("Content-Type", "text/plain; version=0.0.4")
diff --git a/supply-api/internal/app/bootstrap.go b/supply-api/internal/app/bootstrap.go
index 2c6948e8..d70a0b3d 100644
--- a/supply-api/internal/app/bootstrap.go
+++ b/supply-api/internal/app/bootstrap.go
@@ -9,6 +9,7 @@ import (
 
 	"lijiaoqiao/supply-api/internal/config"
 	"lijiaoqiao/supply-api/internal/httpapi"
+	"lijiaoqiao/supply-api/internal/metrics"
 	"lijiaoqiao/supply-api/internal/middleware"
 	"lijiaoqiao/supply-api/internal/pkg/logging"
 )
@@ -156,6 +157,14 @@ func buildRouteMux(opts buildRouteMuxOptions) *http.ServeMux {
 	mux := http.NewServeMux()
 	healthHandler := httpapi.NewHealthHandlerWithDefaults(opts.DBHealthCheck, opts.RedisHealthCheck)
 	healthHandler.RegisterRoutes(mux)
+	// P3-C: /metrics 端点（Prometheus-text 格式）
+	mux.HandleFunc("/metrics", func(w http.ResponseWriter, r *http.Request) {
+		w.Header().Set("Content-Type", "text/plain; version=0.0.4")
+		_, _ = w.Write([]byte(metrics.Export()))
+	})
+	// P3-C-05: /health 别名（统一路径，对齐 gateway/platform-token-runtime）
+	mux.HandleFunc("/health", healthHandler.ServeHealth)
+	mux.HandleFunc("/healthz", healthHandler.ServeHealth)
 	opts.SupplyAPI.Register(mux)
 	opts.AlertAPI.Register(mux)
 	if opts.IAMHandler != nil {
diff --git a/supply-api/internal/metrics/metrics.go b/supply-api/internal/metrics/metrics.go
new file mode 100644
index 00000000..a412efe1
--- /dev/null
+++ b/supply-api/internal/metrics/metrics.go
@@ -0,0 +1,104 @@
+package metrics
+
+import (
+	"strconv"
+	"sync/atomic"
+	"time"
+)
+
+// SupplyAPIMetrics supply-api 指标收集器
+// P3-C: 统一可观测面，对齐 gateway/platform-token-runtime metrics 风格
+type SupplyAPIMetrics struct {
+	// HTTP 请求计数
+	httpRequests      atomic.Int64
+	httpRequestsOK    atomic.Int64
+	httpRequestsError atomic.Int64
+
+	// HTTP 延迟（纳秒）
+	httpLatencySum   atomic.Int64
+	httpLatencyCount atomic.Int64
+
+	// Token 发布计数
+	tokenPublishes   atomic.Int64
+	tokenPublishFail atomic.Int64
+
+	// Worker queue 指标
+	queueSize   atomic.Int64
+	workersBusy atomic.Int64
+
+	startAt time.Time
+}
+
+var global *SupplyAPIMetrics
+
+func init() {
+	global = &SupplyAPIMetrics{startAt: time.Now()}
+}
+
+// IncHTTPRequest 记录一次 HTTP 请求
+func IncHTTPRequest() { global.httpRequests.Add(1) }
+
+// IncHTTPOK 记录一次成功请求
+func IncHTTPOK() { global.httpRequestsOK.Add(1) }
+
+// IncHTTPError 记录一次错误请求
+func IncHTTPError() { global.httpRequestsError.Add(1) }
+
+// IncLatency 记录延迟（纳秒）
+func IncLatency(ns int64) {
+	global.httpLatencySum.Add(ns)
+	global.httpLatencyCount.Add(1)
+}
+
+// IncTokenPublish 记录一次 token 发布
+func IncTokenPublish() { global.tokenPublishes.Add(1) }
+
+// IncTokenPublishFail 记录一次 token 发布失败
+func IncTokenPublishFail() { global.tokenPublishes.Add(1); global.tokenPublishFail.Add(1) }
+
+// SetQueueSize 设置当前队列大小
+func SetQueueSize(n int64) { global.queueSize.Store(n) }
+
+// SetWorkersBusy 设置忙碌的 worker 数量
+func SetWorkersBusy(n int64) { global.workersBusy.Store(n) }
+
+// Export 返回 Prometheus-text 格式指标快照
+func Export() string {
+	m := global
+	uptime := time.Since(m.startAt).Seconds()
+
+	latencyAvg := float64(0)
+	if count := m.httpLatencyCount.Load(); count > 0 {
+		latencyAvg = float64(m.httpLatencySum.Load()) / float64(count)
+	}
+	latencyMs := latencyAvg / 1e6
+
+	return `# HELP supply_api_uptime_seconds Time since service start
+# TYPE supply_api_uptime_seconds gauge
+supply_api_uptime_seconds ` + strconv.FormatFloat(uptime, 'f', 3, 64) + `
+# HELP supply_api_http_requests_total Total HTTP requests received
+# TYPE supply_api_http_requests_total counter
+supply_api_http_requests_total ` + strconv.FormatInt(m.httpRequests.Load(), 10) + `
+# HELP supply_api_http_requests_ok_total Successful HTTP requests (2xx/3xx)
+# TYPE supply_api_http_requests_ok_total counter
+supply_api_http_requests_ok_total ` + strconv.FormatInt(m.httpRequestsOK.Load(), 10) + `
+# HELP supply_api_http_requests_error_total Failed HTTP requests (4xx/5xx)
+# TYPE supply_api_http_requests_error_total counter
+supply_api_http_requests_error_total ` + strconv.FormatInt(m.httpRequestsError.Load(), 10) + `
+# HELP supply_api_http_latency_ms_avg Average HTTP request latency in milliseconds
+# TYPE supply_api_http_latency_ms_avg gauge
+supply_api_http_latency_ms_avg ` + strconv.FormatFloat(latencyMs, 'f', 3, 64) + `
+# HELP supply_api_token_publishes_total Total token publish operations
+# TYPE supply_api_token_publishes_total counter
+supply_api_token_publishes_total ` + strconv.FormatInt(m.tokenPublishes.Load(), 10) + `
+# HELP supply_api_token_publish_fail_total Token publish failures
+# TYPE supply_api_token_publish_fail_total counter
+supply_api_token_publish_fail_total ` + strconv.FormatInt(m.tokenPublishFail.Load(), 10) + `
+# HELP supply_api_queue_size Current worker queue size
+# TYPE supply_api_queue_size gauge
+supply_api_queue_size ` + strconv.FormatInt(m.queueSize.Load(), 10) + `
+# HELP supply_api_workers_busy Number of busy workers
+# TYPE supply_api_workers_busy gauge
+supply_api_workers_busy ` + strconv.FormatInt(m.workersBusy.Load(), 10) + `
+`
+}
diff --git a/supply-api/internal/metrics/metrics_test.go b/supply-api/internal/metrics/metrics_test.go
new file mode 100644
index 00000000..fcabf1c2
--- /dev/null
+++ b/supply-api/internal/metrics/metrics_test.go
@@ -0,0 +1,65 @@
+package metrics
+
+import (
+	"strings"
+	"testing"
+)
+
+func TestExport_ContainsUptime(t *testing.T) {
+	output := Export()
+	if !strings.Contains(output, "supply_api_uptime_seconds") {
+		t.Fatal("missing uptime metric")
+	}
+}
+
+func TestExport_ContainsHTTPMetrics(t *testing.T) {
+	output := Export()
+	for _, m := range []string{
+		"supply_api_http_requests_total",
+		"supply_api_http_requests_ok_total",
+		"supply_api_http_requests_error_total",
+		"supply_api_http_latency_ms_avg",
+	} {
+		if !strings.Contains(output, m) {
+			t.Errorf("missing metric: %s", m)
+		}
+	}
+}
+
+func TestExport_ContainsTokenPublishMetrics(t *testing.T) {
+	output := Export()
+	for _, m := range []string{
+		"supply_api_token_publishes_total",
+		"supply_api_token_publish_fail_total",
+	} {
+		if !strings.Contains(output, m) {
+			t.Errorf("missing metric: %s", m)
+		}
+	}
+}
+
+func TestExport_PrometheusFormat(t *testing.T) {
+	output := Export()
+	if !strings.Contains(output, "# HELP supply_api_uptime_seconds") {
+		t.Error("missing HELP line")
+	}
+	if !strings.Contains(output, "# TYPE supply_api_uptime_seconds gauge") {
+		t.Error("missing TYPE line")
+	}
+}
+
+func TestIncTokenPublish_IncrementsCounter(t *testing.T) {
+	before := global.tokenPublishes.Load()
+	IncTokenPublish()
+	after := global.tokenPublishes.Load()
+	if after != before+1 {
+		t.Errorf("expected %d, got %d", before+1, after)
+	}
+}
+
+func TestSetQueueSize_SetsValue(t *testing.T) {
+	SetQueueSize(42)
+	if got := global.queueSize.Load(); got != 42 {
+		t.Errorf("expected 42, got %d", got)
+	}
+}