chore: prepare repository for publishing

This commit is contained in:
phamnazage-jpg
2026-05-13 14:42:45 +08:00
parent 55e506b2b5
commit 77e6610fd2
118 changed files with 27373 additions and 1009 deletions

24
scripts/apply_migration.sh Executable file
View File

@@ -0,0 +1,24 @@
#!/usr/bin/env bash
set -euo pipefail
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
cd "$ROOT_DIR"
if [[ -f ".env.local" ]]; then
# shellcheck disable=SC1091
source ".env.local"
fi
if [[ -f ".env" ]]; then
# shellcheck disable=SC1091
source ".env"
fi
if [[ -z "${DATABASE_URL:-}" ]]; then
echo "DATABASE_URL 未设置" >&2
exit 1
fi
find "db/migrations" -maxdepth 1 -type f -name "*.sql" | sort | while read -r migration; do
psql "$DATABASE_URL" -v ON_ERROR_STOP=1 -f "$migration"
echo "migration 已应用: $migration"
done

28
scripts/backup.sh Executable file
View File

@@ -0,0 +1,28 @@
#!/bin/bash
# backup.sh - 数据库备份 + OSS上传
set -euo pipefail
DB_URL="${DATABASE_URL:-host=/var/run/postgresql dbname=llm_intelligence user=long sslmode=disable}"
BACKUP_DIR="/tmp/llm_hub_backups"
DATE=$(date +%Y%m%d_%H%M%S)
BACKUP_FILE="${BACKUP_DIR}/llm_intelligence_${DATE}.sql"
mkdir -p "$BACKUP_DIR"
# 1. pg_dump 备份
echo "[$(date)] 开始备份..."
pg_dump "$DB_URL" > "$BACKUP_FILE"
gzip "$BACKUP_FILE"
# 2. 保留最近7天
echo "[$(date)] 清理过期备份..."
find "$BACKUP_DIR" -name "*.sql.gz" -mtime +7 -delete
# 3. 如果有 OSS 工具则上传
if command -v ossutil &> /dev/null; then
echo "[$(date)] 上传至 OSS..."
ossutil cp "${BACKUP_FILE}.gz" "oss://your-bucket/llm-hub-backups/" || true
fi
echo "[$(date)] 备份完成: ${BACKUP_FILE}.gz"
ls -lh "${BACKUP_FILE}.gz"

30
scripts/feishu_alert.sh Executable file
View File

@@ -0,0 +1,30 @@
#!/bin/bash
# feishu_alert.sh - 飞书告警脚本
# 用法: ./feishu_alert.sh "告警消息"
WEBHOOK_URL="${FEISHU_WEBHOOK:-}"
if [ -z "$WEBHOOK_URL" ]; then
echo "错误: FEISHU_WEBHOOK 环境变量未设置" >&2
exit 1
fi
MESSAGE="${1:-LLM Hub 告警}"
DATE=$(date '+%Y-%m-%d %H:%M:%S')
# 构造飞书文本消息
PAYLOAD=$(cat <<EOF
{
"msg_type": "text",
"content": {
"text": "🚨 LLM Intelligence Hub 告警\\n时间: ${DATE}\\n消息: ${MESSAGE}\\n请检查系统状态。"
}
}
EOF
)
curl -s -X POST \
-H "Content-Type: application/json" \
-d "$PAYLOAD" \
"$WEBHOOK_URL"
echo "告警已发送"

View File

@@ -0,0 +1,631 @@
//go:build llm_script
// fetch_multi_source.go - 多源 LLM 定价采集器
// 支持: OpenRouter, Moonshot, DeepSeek, OpenAI 等
package main
import (
"database/sql"
"encoding/json"
"flag"
"fmt"
"io"
"log/slog"
"net/http"
"os"
"sort"
"strings"
"time"
_ "github.com/lib/pq"
)
var logger *slog.Logger
func init() {
logger = slog.New(slog.NewJSONHandler(os.Stderr, &slog.HandlerOptions{Level: slog.LevelInfo}))
}
// ============ 统一数据接口 ============
type ModelPricing struct {
ModelID string
ModelName string
ProviderName string
ProviderCountry string
OperatorName string
OperatorType string // official / reseller / cloud
Region string
Currency string
InputPrice float64 // per 1M tokens
OutputPrice float64 // per 1M tokens
ContextLength int
IsFree bool
SourceURL string
Modality string
SceneTags []string
}
// DataSource 统一采集接口
type DataSource interface {
Name() string
FetchPricing() ([]ModelPricing, error)
SourceType() string // official / reseller
}
type runConfig struct {
DryRun bool
}
type sourceDefinition struct {
Key string
Name string
Factory func() DataSource
}
type runSummary struct {
SelectedSources int
SuccessfulSources int
TotalModels int
DomesticModels int
CurrencyCounts map[string]int
}
type pricingMetadataFields struct {
SourceType string
FreeQuota string
FreeLimitations string
RateLimit string
}
// ============ OpenRouter 采集器 ============
type OpenRouterSource struct {
APIKey string
}
func (s *OpenRouterSource) Name() string { return "OpenRouter" }
func (s *OpenRouterSource) SourceType() string { return "reseller" }
func (s *OpenRouterSource) FetchPricing() ([]ModelPricing, error) {
url := "https://openrouter.ai/api/v1/models"
req, err := http.NewRequest("GET", url, nil)
if err != nil {
return nil, err
}
if s.APIKey != "" {
req.Header.Set("Authorization", "Bearer "+s.APIKey)
}
client := &http.Client{Timeout: 30 * time.Second}
resp, err := client.Do(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()
body, _ := io.ReadAll(resp.Body)
var result struct {
Data []struct {
ID string `json:"id"`
Name string `json:"name"`
Pricing struct {
Prompt string `json:"prompt"`
Completion string `json:"completion"`
} `json:"pricing"`
ContextLength int `json:"context_length"`
} `json:"data"`
}
if err := json.Unmarshal(body, &result); err != nil {
return nil, fmt.Errorf("parse error: %w", err)
}
var prices []ModelPricing
for _, m := range result.Data {
inputPrice := parsePrice(m.Pricing.Prompt)
outputPrice := parsePrice(m.Pricing.Completion)
prices = append(prices, ModelPricing{
ModelID: m.ID,
ModelName: m.Name,
ProviderName: extractProvider(m.ID),
ProviderCountry: "US",
OperatorName: "OpenRouter",
OperatorType: "reseller",
Region: "global",
Currency: "USD",
InputPrice: inputPrice * 1000000,
OutputPrice: outputPrice * 1000000,
ContextLength: m.ContextLength,
IsFree: inputPrice == 0 && outputPrice == 0,
SourceURL: "https://openrouter.ai/docs#models",
Modality: "text",
})
}
logger.Info("OpenRouter采集完成", "models", len(prices))
return prices, nil
}
func parsePrice(s string) float64 {
var f float64
fmt.Sscanf(s, "%f", &f)
if f < 0 {
return 0
}
return f
}
func extractProvider(modelID string) string {
parts := strings.Split(modelID, "/")
if len(parts) > 1 {
return parts[0]
}
return "unknown"
}
func sourceDefinitions(apiKey string) []sourceDefinition {
return []sourceDefinition{
{
Key: "openrouter",
Name: "OpenRouter",
Factory: func() DataSource {
return &OpenRouterSource{APIKey: apiKey}
},
},
{
Key: "moonshot",
Name: "Moonshot",
Factory: func() DataSource {
return &MoonshotSource{}
},
},
{
Key: "deepseek",
Name: "DeepSeek",
Factory: func() DataSource {
return &DeepSeekSource{}
},
},
{
Key: "openai",
Name: "OpenAI",
Factory: func() DataSource {
return &OpenAISource{}
},
},
}
}
func parseSourceList(raw string) []string {
if strings.TrimSpace(raw) == "" {
return nil
}
parts := strings.Split(raw, ",")
sources := make([]string, 0, len(parts))
seen := make(map[string]struct{}, len(parts))
for _, part := range parts {
name := strings.ToLower(strings.TrimSpace(part))
if name == "" {
continue
}
if _, ok := seen[name]; ok {
continue
}
seen[name] = struct{}{}
sources = append(sources, name)
}
return sources
}
func buildSources(apiKey string, requested []string) ([]DataSource, error) {
definitions := sourceDefinitions(apiKey)
if len(requested) == 0 {
sources := make([]DataSource, 0, len(definitions))
for _, definition := range definitions {
sources = append(sources, definition.Factory())
}
return sources, nil
}
definitionByKey := make(map[string]sourceDefinition, len(definitions))
for _, definition := range definitions {
definitionByKey[definition.Key] = definition
}
sources := make([]DataSource, 0, len(requested))
for _, name := range requested {
definition, ok := definitionByKey[name]
if !ok {
return nil, fmt.Errorf("unknown source %q", name)
}
sources = append(sources, definition.Factory())
}
return sources, nil
}
func listSourceKeys(apiKey string) []string {
definitions := sourceDefinitions(apiKey)
keys := make([]string, 0, len(definitions))
for _, definition := range definitions {
keys = append(keys, definition.Key)
}
return keys
}
func summarizePrices(selectedSources int, successfulSources int, prices []ModelPricing) runSummary {
summary := runSummary{
SelectedSources: selectedSources,
SuccessfulSources: successfulSources,
TotalModels: len(prices),
CurrencyCounts: make(map[string]int),
}
for _, price := range prices {
if strings.EqualFold(price.ProviderCountry, "CN") {
summary.DomesticModels++
}
summary.CurrencyCounts[strings.ToUpper(price.Currency)]++
}
return summary
}
func formatCountMap(counts map[string]int) string {
if len(counts) == 0 {
return "none"
}
keys := make([]string, 0, len(counts))
for key := range counts {
keys = append(keys, key)
}
sort.Strings(keys)
parts := make([]string, 0, len(keys))
for _, key := range keys {
parts = append(parts, fmt.Sprintf("%s:%d", key, counts[key]))
}
return strings.Join(parts, ",")
}
func printSummary(w io.Writer, summary runSummary) error {
if w == nil {
return nil
}
_, err := fmt.Fprintf(
w,
"sources=%d successful_sources=%d models=%d domestic_models=%d currencies=%s\n",
summary.SelectedSources,
summary.SuccessfulSources,
summary.TotalModels,
summary.DomesticModels,
formatCountMap(summary.CurrencyCounts),
)
return err
}
func pricingMetadata(p ModelPricing) pricingMetadataFields {
sourceType := strings.TrimSpace(strings.ToLower(p.OperatorType))
if sourceType == "" {
sourceType = "official"
}
fields := pricingMetadataFields{
SourceType: sourceType,
FreeLimitations: "[]",
RateLimit: "{}",
}
if p.IsFree {
fields.SourceType = "free_tier"
fields.FreeQuota = "See source_url for provider free-tier details"
fields.FreeLimitations = `["See source_url for current quota and policy"]`
}
return fields
}
// ============ Moonshot 采集器 ============
type MoonshotSource struct{}
func (s *MoonshotSource) Name() string { return "Moonshot" }
func (s *MoonshotSource) SourceType() string { return "official" }
func (s *MoonshotSource) FetchPricing() ([]ModelPricing, error) {
prices := []ModelPricing{
{
ModelID: "kimi-k2.6", ModelName: "Kimi K2.6",
ProviderName: "Moonshot AI", ProviderCountry: "CN",
OperatorName: "Moonshot", OperatorType: "official",
Region: "CN", Currency: "CNY",
InputPrice: 6.50, OutputPrice: 27.00,
ContextLength: 262144, IsFree: false,
SourceURL: "https://platform.kimi.com/docs/pricing/chat-k26",
Modality: "multimodal",
SceneTags: []string{"对话", "视觉", "代码"},
},
{
ModelID: "kimi-k2-0905-preview", ModelName: "Kimi K2 0905 Preview",
ProviderName: "Moonshot AI", ProviderCountry: "CN",
OperatorName: "Moonshot", OperatorType: "official",
Region: "CN", Currency: "CNY",
InputPrice: 4.00, OutputPrice: 16.00,
ContextLength: 262144, IsFree: false,
SourceURL: "https://platform.kimi.com/docs/pricing/chat-k2",
Modality: "text",
SceneTags: []string{"代码", "对话"},
},
{
ModelID: "moonshot-v1-8k", ModelName: "Moonshot V1 8K",
ProviderName: "Moonshot AI", ProviderCountry: "CN",
OperatorName: "Moonshot", OperatorType: "official",
Region: "CN", Currency: "CNY",
InputPrice: 2.00, OutputPrice: 10.00,
ContextLength: 8192, IsFree: false,
SourceURL: "https://platform.kimi.com/docs/pricing/chat-v1",
Modality: "text",
SceneTags: []string{"对话"},
},
}
logger.Info("Moonshot采集完成", "models", len(prices))
return prices, nil
}
// ============ DeepSeek 采集器 ============
type DeepSeekSource struct{}
func (s *DeepSeekSource) Name() string { return "DeepSeek" }
func (s *DeepSeekSource) SourceType() string { return "official" }
func (s *DeepSeekSource) FetchPricing() ([]ModelPricing, error) {
prices := []ModelPricing{
{
ModelID: "deepseek-v4-flash", ModelName: "DeepSeek V4 Flash",
ProviderName: "DeepSeek", ProviderCountry: "CN",
OperatorName: "DeepSeek", OperatorType: "official",
Region: "global", Currency: "USD",
InputPrice: 0.14, OutputPrice: 0.28,
ContextLength: 1000000, IsFree: false,
SourceURL: "https://api-docs.deepseek.com/quick_start/pricing",
Modality: "text",
SceneTags: []string{"对话", "推理"},
},
{
ModelID: "deepseek-v4-pro", ModelName: "DeepSeek V4 Pro",
ProviderName: "DeepSeek", ProviderCountry: "CN",
OperatorName: "DeepSeek", OperatorType: "official",
Region: "global", Currency: "USD",
InputPrice: 0.435, OutputPrice: 0.87,
ContextLength: 1000000, IsFree: false,
SourceURL: "https://api-docs.deepseek.com/quick_start/pricing",
Modality: "code",
SceneTags: []string{"对话", "推理", "代码"},
},
}
logger.Info("DeepSeek采集完成", "models", len(prices))
return prices, nil
}
// ============ OpenAI 采集器 ============
type OpenAISource struct{}
func (s *OpenAISource) Name() string { return "OpenAI" }
func (s *OpenAISource) SourceType() string { return "official" }
func (s *OpenAISource) FetchPricing() ([]ModelPricing, error) {
prices := []ModelPricing{
{
ModelID: "gpt-5.5", ModelName: "GPT-5.5",
ProviderName: "OpenAI", ProviderCountry: "US",
OperatorName: "OpenAI", OperatorType: "official",
Region: "global", Currency: "USD",
InputPrice: 5.00, OutputPrice: 30.00,
ContextLength: 200000, IsFree: false,
SourceURL: "https://openai.com/api/pricing/",
Modality: "code",
SceneTags: []string{"代码", "推理", "对话"},
},
{
ModelID: "gpt-5.4", ModelName: "GPT-5.4",
ProviderName: "OpenAI", ProviderCountry: "US",
OperatorName: "OpenAI", OperatorType: "official",
Region: "global", Currency: "USD",
InputPrice: 2.50, OutputPrice: 15.00,
ContextLength: 200000, IsFree: false,
SourceURL: "https://openai.com/api/pricing/",
Modality: "text",
SceneTags: []string{"代码", "对话"},
},
{
ModelID: "gpt-5.4-mini", ModelName: "GPT-5.4 Mini",
ProviderName: "OpenAI", ProviderCountry: "US",
OperatorName: "OpenAI", OperatorType: "official",
Region: "global", Currency: "USD",
InputPrice: 0.75, OutputPrice: 4.50,
ContextLength: 200000, IsFree: false,
SourceURL: "https://openai.com/api/pricing/",
Modality: "text",
SceneTags: []string{"对话"},
},
}
logger.Info("OpenAI采集完成", "models", len(prices))
return prices, nil
}
// ============ 数据库写入 ============
func saveToDatabase(db *sql.DB, prices []ModelPricing, batchID string) error {
for _, p := range prices {
// 查找或创建 provider
var providerID int64
err := db.QueryRow(
"SELECT id FROM model_provider WHERE name = $1",
p.ProviderName,
).Scan(&providerID)
if err == sql.ErrNoRows {
err = db.QueryRow(
"INSERT INTO model_provider (name, country, website, status) VALUES ($1, $2, $3, 'active') RETURNING id",
p.ProviderName, p.ProviderCountry, "",
).Scan(&providerID)
}
if err != nil {
logger.Warn("provider error", "name", p.ProviderName, "error", err)
continue
}
// 查找或创建 operator
var operatorID int64
err = db.QueryRow(
"SELECT id FROM operator WHERE name = $1",
p.OperatorName,
).Scan(&operatorID)
if err == sql.ErrNoRows {
err = db.QueryRow(
"INSERT INTO operator (name, country, status) VALUES ($1, $2, 'active') RETURNING id",
p.OperatorName, p.ProviderCountry,
).Scan(&operatorID)
}
if err != nil {
logger.Warn("operator error", "name", p.OperatorName, "error", err)
continue
}
// 查找或创建 model (使用 external_id)
var modelID int64
err = db.QueryRow(
"SELECT id FROM models WHERE external_id = $1",
p.ModelID,
).Scan(&modelID)
if err == sql.ErrNoRows {
err = db.QueryRow(
`INSERT INTO models (external_id, name, provider_id, modality, context_length, status, source, batch_id)
VALUES ($1, $2, $3, $4, $5, 'active', $6, $7) RETURNING id`,
p.ModelID, p.ModelName, providerID, p.Modality, p.ContextLength, p.OperatorName, batchID,
).Scan(&modelID)
}
if err != nil {
logger.Warn("model error", "id", p.ModelID, "error", err)
continue
}
// 插入定价
metadata := pricingMetadata(p)
_, err = db.Exec(
`INSERT INTO region_pricing
(model_id, operator_id, region, currency, input_price_per_mtok, output_price_per_mtok, is_free, effective_date, source_url, source_type, free_quota, free_limitations, rate_limit)
VALUES ($1, $2, $3, $4, $5, $6, $7, CURRENT_DATE, $8, $9, $10, $11, $12)
ON CONFLICT (model_id, operator_id, region, currency, effective_date)
DO UPDATE SET input_price_per_mtok = EXCLUDED.input_price_per_mtok,
output_price_per_mtok = EXCLUDED.output_price_per_mtok,
is_free = EXCLUDED.is_free,
source_type = EXCLUDED.source_type,
free_quota = EXCLUDED.free_quota,
free_limitations = EXCLUDED.free_limitations,
rate_limit = EXCLUDED.rate_limit,
updated_at = CURRENT_TIMESTAMP`,
modelID, operatorID, p.Region, p.Currency, p.InputPrice, p.OutputPrice, p.IsFree, p.SourceURL,
metadata.SourceType, metadata.FreeQuota, metadata.FreeLimitations, metadata.RateLimit,
)
if err != nil {
logger.Warn("pricing insert error", "model", p.ModelID, "error", err)
continue
}
}
return nil
}
func savePricesToDatabase(dsn string, prices []ModelPricing) error {
db, err := sql.Open("postgres", dsn)
if err != nil {
return err
}
defer db.Close()
batchID := fmt.Sprintf("phase2-%s", time.Now().Format("20060102-150405"))
return saveToDatabase(db, prices, batchID)
}
func defaultDSN() string {
dsn := os.Getenv("DATABASE_URL")
if dsn != "" {
return dsn
}
return "postgres://long@/llm_intelligence?host=/var/run/postgresql"
}
func runCollector(cfg runConfig, sources []DataSource, saveFn func([]ModelPricing) error, out io.Writer) error {
allPrices := make([]ModelPricing, 0)
successfulSources := 0
for _, src := range sources {
prices, err := src.FetchPricing()
if err != nil {
logger.Error("采集失败", "source", src.Name(), "error", err)
continue
}
successfulSources++
allPrices = append(allPrices, prices...)
}
summary := summarizePrices(len(sources), successfulSources, allPrices)
if err := printSummary(out, summary); err != nil {
return err
}
if successfulSources == 0 {
return fmt.Errorf("no data source collected successfully")
}
if cfg.DryRun {
return nil
}
if saveFn == nil {
return fmt.Errorf("save function is required when dry-run is disabled")
}
if err := saveFn(allPrices); err != nil {
return err
}
logger.Info("多源采集完成", "total_models", len(allPrices), "sources", successfulSources)
return nil
}
// ============ 主程序 ============
func main() {
var sourcesFlag string
var dryRun bool
var listSources bool
flag.StringVar(&sourcesFlag, "sources", "", "comma-separated source keys: openrouter,moonshot,deepseek,openai")
flag.BoolVar(&dryRun, "dry-run", false, "collect and print summary without writing to database")
flag.BoolVar(&listSources, "list-sources", false, "print available source keys and exit")
flag.Parse()
apiKey := os.Getenv("OPENROUTER_API_KEY")
if listSources {
fmt.Println(strings.Join(listSourceKeys(apiKey), ","))
return
}
sources, err := buildSources(apiKey, parseSourceList(sourcesFlag))
if err != nil {
logger.Error("数据源参数非法", "error", err)
os.Exit(1)
}
cfg := runConfig{DryRun: dryRun}
if err := runCollector(cfg, sources, func(prices []ModelPricing) error {
return savePricesToDatabase(defaultDSN(), prices)
}, os.Stdout); err != nil {
logger.Error("多源采集失败", "error", err)
os.Exit(1)
}
}

View File

@@ -0,0 +1,108 @@
//go:build llm_script
package main
import (
"bytes"
"testing"
)
type fakeSource struct {
name string
prices []ModelPricing
err error
}
func (s fakeSource) Name() string { return s.name }
func (s fakeSource) FetchPricing() ([]ModelPricing, error) { return s.prices, s.err }
func (s fakeSource) SourceType() string { return "official" }
func TestBuildSourcesFiltersRequestedNames(t *testing.T) {
sources, err := buildSources("", []string{"moonshot", "openai"})
if err != nil {
t.Fatalf("buildSources returned error: %v", err)
}
if len(sources) != 2 {
t.Fatalf("expected 2 sources, got %d", len(sources))
}
if sources[0].Name() != "Moonshot" || sources[1].Name() != "OpenAI" {
t.Fatalf("unexpected source order: %s, %s", sources[0].Name(), sources[1].Name())
}
}
func TestBuildSourcesRejectsUnknownNames(t *testing.T) {
_, err := buildSources("", []string{"moonshot", "unknown"})
if err == nil {
t.Fatal("expected error for unknown source")
}
}
func TestRunCollectorDryRunSkipsDatabaseWrite(t *testing.T) {
cfg := runConfig{DryRun: true}
var out bytes.Buffer
writeCalled := false
err := runCollector(
cfg,
[]DataSource{
fakeSource{
name: "Moonshot",
prices: []ModelPricing{
{ModelID: "kimi-k2.6", ProviderCountry: "CN", Currency: "CNY"},
{ModelID: "kimi-k2-0905-preview", ProviderCountry: "CN", Currency: "CNY"},
},
},
fakeSource{
name: "OpenAI",
prices: []ModelPricing{
{ModelID: "gpt-5.5", ProviderCountry: "US", Currency: "USD"},
},
},
},
func([]ModelPricing) error {
writeCalled = true
return nil
},
&out,
)
if err != nil {
t.Fatalf("runCollector returned error: %v", err)
}
if writeCalled {
t.Fatal("expected dry-run to skip database write")
}
output := out.String()
if output == "" {
t.Fatal("expected dry-run summary output")
}
if !bytes.Contains(out.Bytes(), []byte("sources=2")) {
t.Fatalf("expected sources summary, got %q", output)
}
if !bytes.Contains(out.Bytes(), []byte("models=3")) {
t.Fatalf("expected model summary, got %q", output)
}
if !bytes.Contains(out.Bytes(), []byte("currencies=CNY:2,USD:1")) {
t.Fatalf("expected currency summary, got %q", output)
}
}
func TestPricingMetadataClassifiesSourceType(t *testing.T) {
freeTier := pricingMetadata(ModelPricing{OperatorType: "official", IsFree: true})
if freeTier.SourceType != "free_tier" {
t.Fatalf("expected free_tier, got %q", freeTier.SourceType)
}
if freeTier.FreeQuota == "" {
t.Fatal("expected free tier quota description")
}
reseller := pricingMetadata(ModelPricing{OperatorType: "reseller"})
if reseller.SourceType != "reseller" {
t.Fatalf("expected reseller, got %q", reseller.SourceType)
}
}

View File

@@ -1,17 +1,26 @@
// fetch_openrouter.go - OpenRouter 模型数据采集器
// Phase 1 单数据源采集器,抓取模型基础信息与价格信息
//go:build llm_script
// fetch_openrouter.go - OpenRouter 模型数据采集器 v2.0
// Sprint 2 增强版:指数退避重试 + 批量插入 + ProviderMapper + audit_log + 价格变动检测 + slog
package main
import (
"bufio"
"context"
"database/sql"
"encoding/json"
"flag"
"fmt"
"io"
"log/slog"
"net/http"
"os"
"strings"
"time"
"llm-intelligence/internal/collectors"
"llm-intelligence/internal/retry"
_ "github.com/lib/pq"
)
@@ -22,23 +31,19 @@ type Config struct {
OutPath string
MaxRetries int
TimeoutSec int
// PostgreSQL 连接参数(新增)
DBConn string // e.g. "host=/var/run/postgresql dbname=llm_intelligence sslmode=disable"
}
// OpenRouter API 响应结构(仅关键字段)
type APIResponse struct {
Data []ModelInfo `json:"data"`
BatchSize int
DBConn string
}
// ModelInfo 模型信息(与 collectors 包兼容)
type ModelInfo struct {
ID string `json:"id"`
Name string `json:"name,omitempty"`
Created int64 `json:"created,omitempty"`
Description string `json:"description,omitempty"`
ContextLength int `json:"context_length,omitempty"`
Capabilities []string `json:"capabilities,omitempty"`
Pricing ModelPricing `json:"pricing,omitempty"`
ID string `json:"id"`
Name string `json:"name,omitempty"`
Created int64 `json:"created,omitempty"`
Description string `json:"description,omitempty"`
ContextLength int `json:"context_length,omitempty"`
Capabilities []string `json:"capabilities,omitempty"`
Pricing ModelPricing `json:"pricing,omitempty"`
}
type ModelPricing struct {
@@ -46,21 +51,54 @@ type ModelPricing struct {
Output float64 `json:"output,omitempty"`
}
var (
collectorVersion = "v2.0"
logger *slog.Logger
)
func init() {
logger = slog.New(slog.NewJSONHandler(os.Stderr, &slog.HandlerOptions{
Level: slog.LevelInfo,
}))
}
func main() {
cfg := parseArgs()
start := time.Now()
logger.Info("采集器启动", "collector", "openrouter", "version", collectorVersion, "batch_size", cfg.BatchSize)
var runErr error
if err := run(cfg); err != nil {
fmt.Fprintf(os.Stderr, "采集失败: %v\n", err)
logger.Error("采集失败", "error", err, "duration", time.Since(start))
runErr = err
}
duration := time.Since(start)
// 写入采集统计
if cfg.DBConn != "" {
if err := recordCollectorStats(cfg.DBConn, runErr, duration); err != nil {
logger.Warn("采集统计写入失败", "error", err)
}
}
if runErr != nil {
os.Exit(1)
}
logger.Info("采集完成", "collector", "openrouter", "duration_ms", duration.Milliseconds())
}
func parseArgs() Config {
apiKey := flag.String("api-key", "", "OpenRouter API Key建议通过环境变量注入")
loadProjectEnv()
apiKey := flag.String("api-key", "", "OpenRouter API Key")
apiURL := flag.String("api-url", "https://openrouter.ai/api/v1/models", "API 地址")
outPath := flag.String("out", "models.json", "输出文件路径")
maxRetries := flag.Int("retry", 3, "最大重试次数")
timeoutSec := flag.Int("timeout", 30, "请求超时(秒)")
dbConn := flag.String("db", os.Getenv("DATABASE_URL"), "PostgreSQL 连接字符串(默认从 DATABASE_URL 环境变量读取)")
batchSize := flag.Int("batch", 100, "批量插入批次大小")
dbConn := flag.String("db", os.Getenv("DATABASE_URL"), "PostgreSQL 连接字符串")
flag.Parse()
return Config{
APIKey: *apiKey,
@@ -68,82 +106,131 @@ func parseArgs() Config {
OutPath: *outPath,
MaxRetries: *maxRetries,
TimeoutSec: *timeoutSec,
BatchSize: *batchSize,
DBConn: *dbConn,
}
}
func loadProjectEnv() {
for _, path := range []string{".env.local", ".env"} {
loadEnvFile(path)
}
}
func loadEnvFile(path string) {
f, err := os.Open(path)
if err != nil {
return
}
defer f.Close()
scanner := bufio.NewScanner(f)
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
if line == "" || strings.HasPrefix(line, "#") {
continue
}
key, value, ok := strings.Cut(line, "=")
if !ok {
continue
}
key = strings.TrimSpace(key)
value = strings.TrimSpace(value)
value = strings.Trim(value, `"'`)
if key == "" {
continue
}
if _, exists := os.LookupEnv(key); exists {
continue
}
_ = os.Setenv(key, value)
}
}
func run(cfg Config) error {
models, err := fetchModels(cfg)
if err != nil {
return err
}
// 优先写入 PostgreSQL若配置了 DBConn 则入库
logger.Info("API 数据获取完成", "records", len(models))
if cfg.DBConn != "" {
if err := summarizeDB(cfg.DBConn, models); err != nil {
fmt.Fprintf(os.Stderr, "警告: PostgreSQL 写入失败: %v\n", err)
fmt.Fprintln(os.Stderr, "降级为仅写入 JSON")
if err := summarizeDB(cfg.DBConn, models, cfg.BatchSize); err != nil {
logger.Error("PostgreSQL 写入失败", "error", err)
logger.Warn("降级为仅写入 JSON")
} else {
logger.Info("PostgreSQL 写入完成", "records", len(models))
}
}
return summarize(cfg.OutPath, models)
}
// fetchModels 抓取 OpenRouter 模型列表
// fetchModels 抓取 OpenRouter 模型列表(集成指数退避重试)
func fetchModels(cfg Config) ([]ModelInfo, error) {
// 无 API Key 时返回模拟数据(写入由后续 summarize 统一处理)
if cfg.APIKey == "" {
fmt.Println("警告: 未提供 API Key使用模拟数据")
logger.Warn("未提供 API Key使用模拟数据")
return []ModelInfo{
{ID: "openai/gpt-4o", ContextLength: 128000,
Pricing: ModelPricing{Input: 2.5, Output: 10.0}},
{ID: "anthropic/claude-3.5-sonnet:free", ContextLength: 200000,
Pricing: ModelPricing{}},
{ID: "openai/gpt-4o", ContextLength: 128000, Pricing: ModelPricing{Input: 2.5, Output: 10.0}},
{ID: "anthropic/claude-3.5-sonnet:free", ContextLength: 200000, Pricing: ModelPricing{}},
}, nil
}
client := &http.Client{Timeout: time.Duration(cfg.TimeoutSec) * time.Second}
req, err := http.NewRequest("GET", cfg.APIURL, nil)
if err != nil {
return nil, fmt.Errorf("构造请求失败: %w", err)
strategy := retry.Strategy{
MaxRetries: cfg.MaxRetries,
BaseDelay: 1 * time.Second,
MaxDelay: 30 * time.Second,
Multiplier: 2.0,
Jitter: true,
Retryable: retry.IsRetryable,
}
req.Header.Set("Authorization", "Bearer "+cfg.APIKey)
req.Header.Set("Content-Type", "application/json")
var resp *http.Response
for i := 0; i <= cfg.MaxRetries; i++ {
resp, err = client.Do(req)
if err == nil {
break
var models []ModelInfo
var lastErr error
err := retry.Do(context.Background(), strategy, func() error {
client := &http.Client{Timeout: time.Duration(cfg.TimeoutSec) * time.Second}
req, err := http.NewRequest("GET", cfg.APIURL, nil)
if err != nil {
return fmt.Errorf("构造请求失败: %w", err)
}
if i < cfg.MaxRetries {
time.Sleep(time.Duration(i+1) * time.Second)
req.Header.Set("Authorization", "Bearer "+cfg.APIKey)
req.Header.Set("Content-Type", "application/json")
resp, err := client.Do(req)
if err != nil {
lastErr = err
return fmt.Errorf("请求失败: %w", err)
}
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
body, _ := io.ReadAll(resp.Body)
lastErr = fmt.Errorf("非 200 响应: %d %s", resp.StatusCode, string(body))
return lastErr
}
body, err := io.ReadAll(resp.Body)
if err != nil {
lastErr = err
return fmt.Errorf("读取响应失败: %w", err)
}
models, err = parseModels(body)
if err != nil {
lastErr = err
return fmt.Errorf("JSON 解析失败: %w", err)
}
return nil
})
if err != nil {
return nil, fmt.Errorf("请求失败: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
body, _ := io.ReadAll(resp.Body)
return nil, fmt.Errorf("非 200 响应: %d %s", resp.StatusCode, string(body))
return nil, fmt.Errorf("采集失败(%d次尝试: %w", strategy.MaxRetries+1, lastErr)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return nil, fmt.Errorf("读取响应失败: %w", err)
}
// 健壮解析,兼容字段缺失和结构差异
models, err := parseModels(body)
if err != nil {
return nil, fmt.Errorf("JSON 解析失败: %w", err)
}
// TODO: 字段标准化映射OpenRouter id → 标准厂商名、模型名)
return models, nil
}
// parseModels 健壮解析模型列表,兼容字段缺失/类型不一致/嵌套结构差异
func parseModels(raw []byte) ([]ModelInfo, error) {
var wrapper struct {
Data json.RawMessage `json:"data"`
@@ -151,7 +238,6 @@ func parseModels(raw []byte) ([]ModelInfo, error) {
if err := json.Unmarshal(raw, &wrapper); err != nil {
return nil, fmt.Errorf("解析 data 字段失败: %w", err)
}
// data 为数组,每元素字段可能不同,统一用 map[string]any 兼容
var rawItems []any
if err := json.Unmarshal(wrapper.Data, &rawItems); err != nil {
return nil, fmt.Errorf("解析模型数组失败: %w", err)
@@ -161,17 +247,16 @@ func parseModels(raw []byte) ([]ModelInfo, error) {
for _, item := range rawItems {
m, ok := item.(map[string]any)
if !ok {
continue // 跳过非法条目
continue
}
model := ModelInfo{
ID: getString(m, "id"),
Name: getString(m, "name"),
}
ID: getString(m, "id"),
Name: getString(m, "name"),
}
if model.ID == "" {
continue // id 为必填
continue
}
// pricing 可能为嵌套对象(如 {openrouter: {input: 1}}),尝试多路径取值
if p, ok := m["pricing"].(map[string]any); ok {
model.Pricing.Input = getPrice(p, "input", "prompt")
model.Pricing.Output = getPrice(p, "output", "completion")
@@ -214,7 +299,6 @@ func getInt64(m map[string]any, key string) int64 {
return 0
}
// getPrice 多路径取值,兼容不同嵌套结构(如 {input:1} 或 {openrouter:{input:1}}
func getPrice(m map[string]any, keys ...string) float64 {
for _, k := range keys {
if v, ok := m[k].(float64); ok {
@@ -224,13 +308,12 @@ func getPrice(m map[string]any, keys ...string) float64 {
return 0
}
// summarize 输出采集摘要到 JSON 文件(保持向后兼容)
func summarize(outPath string, models []ModelInfo) error {
return writeJSON(outPath, models)
}
// summarizeDB 将采集结果写入 PostgreSQLmodels + model_prices 表
func summarizeDB(connStr string, models []ModelInfo) error {
// summarizeDB 将采集结果写入 PostgreSQL批量插入 + ProviderMapper + 价格变动检测 + audit_log
func summarizeDB(connStr string, models []ModelInfo, batchSize int) error {
db, err := sql.Open("postgres", connStr)
if err != nil {
return fmt.Errorf("连接数据库失败: %w", err)
@@ -241,66 +324,229 @@ func summarizeDB(connStr string, models []ModelInfo) error {
return fmt.Errorf("ping 数据库失败: %w", err)
}
tx, err := db.Begin()
if err != nil {
return fmt.Errorf("开启事务失败: %w", err)
}
defer tx.Rollback()
batchID := fmt.Sprintf("batch-%d", time.Now().Unix())
now := time.Now()
effectiveDate := now.Format("2006-01-02")
// 获取默认 operatorOpenRouter
var operatorID int64
err = db.QueryRow("SELECT id FROM operator WHERE name = 'OpenRouter' LIMIT 1").Scan(&operatorID)
if err != nil {
logger.Warn("未找到 OpenRouter operator使用 NULL", "error", err)
operatorID = 0
}
// 获取上次价格数据(用于变动检测)
lastPrices := make(map[int64]ModelPricing)
rows, err := db.Query(`
SELECT model_id, input_price_per_mtok, output_price_per_mtok
FROM region_pricing
WHERE operator_id = $1 AND effective_date = (
SELECT MAX(effective_date) FROM region_pricing WHERE operator_id = $1
)
`, operatorID)
if err == nil {
for rows.Next() {
var mid int64
var p ModelPricing
if err := rows.Scan(&mid, &p.Input, &p.Output); err == nil {
lastPrices[mid] = p
}
}
rows.Close()
}
insertedModels := 0
insertedPrices := 0
priceChanges := 0
for _, m := range models {
isFree := len(m.ID) > 5 && m.ID[len(m.ID)-5:] == ":free"
// upsert models 表
var modelID int64
err := tx.QueryRow(`
INSERT INTO models (source, external_id, name, description, context_length, capabilities, created_at_source, is_free, status, raw_payload, created_at, updated_at)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12)
ON CONFLICT (external_id) DO UPDATE SET
name = EXCLUDED.name,
description = EXCLUDED.description,
context_length = EXCLUDED.context_length,
capabilities = EXCLUDED.capabilities,
created_at_source = EXCLUDED.created_at_source,
is_free = EXCLUDED.is_free,
status = EXCLUDED.status,
raw_payload = EXCLUDED.raw_payload,
updated_at = $12
RETURNING id
`, "openrouter", m.ID, m.Name, m.Description, m.ContextLength,
jsonCapabilities(m.Capabilities), m.Created, isFree, "active",
rawPayload(m), now, now).Scan(&modelID)
if err != nil {
return fmt.Errorf("写入 models 失败 (%s): %w", m.ID, err)
// 批量处理
for i := 0; i < len(models); i += batchSize {
end := i + batchSize
if end > len(models) {
end = len(models)
}
insertedModels++
batch := models[i:end]
// upsert model_prices 表(当天有效日期)
effectiveDate := now.Format("2006-01-02")
_, err = tx.Exec(`
INSERT INTO model_prices (model_id, source, currency, input_price_per_mtok, output_price_per_mtok, effective_date, source_url, created_at)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
ON CONFLICT (model_id, source, currency, effective_date) DO UPDATE SET
input_price_per_mtok = EXCLUDED.input_price_per_mtok,
output_price_per_mtok = EXCLUDED.output_price_per_mtok,
created_at = EXCLUDED.created_at
`, modelID, "openrouter", "USD", m.Pricing.Input, m.Pricing.Output, effectiveDate, "https://openrouter.ai/api/v1/models", now)
tx, err := db.Begin()
if err != nil {
return fmt.Errorf("写入 model_prices 失败 (%s): %w", m.ID, err)
return fmt.Errorf("开启事务失败: %w", err)
}
insertedPrices++
for _, m := range batch {
// 使用 ProviderMapper 映射厂商
mapping, err := collectors.MapOpenRouterID(m.ID)
if err != nil {
logger.Warn("Provider 映射失败", "id", m.ID, "error", err)
mapping = collectors.ModelMapping{
Provider: collectors.ProviderInfo{ID: "unknown", Name: "Unknown"},
ModelName: m.Name,
RawID: m.ID,
IsFree: false,
}
}
// 查找或创建 provider_id
var providerID int64
err = tx.QueryRow("SELECT id FROM model_provider WHERE name = $1 LIMIT 1", mapping.Provider.Name).Scan(&providerID)
if err != nil {
// 未知厂商,插入
err = tx.QueryRow(`
INSERT INTO model_provider (name, name_cn, country, status)
VALUES ($1, $2, $3, 'active')
ON CONFLICT (name) DO UPDATE SET name = EXCLUDED.name
RETURNING id
`, mapping.Provider.Name, mapping.Provider.NameCN, mapping.Provider.Country).Scan(&providerID)
if err != nil {
logger.Warn("创建 provider 失败", "name", mapping.Provider.Name, "error", err)
providerID = 0
}
}
isFree := mapping.IsFree || (m.Pricing.Input == 0 && m.Pricing.Output == 0)
// upsert models 表(带新字段)
var modelID int64
err = tx.QueryRow(`
INSERT INTO models (
source, external_id, name, description, context_length,
capabilities, created_at_source, is_free, status,
raw_payload, provider_id, version, modality,
data_confidence, retrieved_at, batch_id, collector_version,
source_url, created_at, updated_at
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $19)
ON CONFLICT (external_id) DO UPDATE SET
name = EXCLUDED.name,
description = EXCLUDED.description,
context_length = EXCLUDED.context_length,
capabilities = EXCLUDED.capabilities,
created_at_source = EXCLUDED.created_at_source,
is_free = EXCLUDED.is_free,
status = EXCLUDED.status,
raw_payload = EXCLUDED.raw_payload,
provider_id = EXCLUDED.provider_id,
data_confidence = 'official',
retrieved_at = EXCLUDED.retrieved_at,
batch_id = EXCLUDED.batch_id,
collector_version = EXCLUDED.collector_version,
updated_at = EXCLUDED.updated_at
RETURNING id
`,
"openrouter", m.ID, m.Name, m.Description, m.ContextLength,
jsonCapabilities(m.Capabilities), m.Created, isFree, "active",
rawPayload(m), providerID, "", "text",
"official", now, batchID, collectorVersion,
"https://openrouter.ai/api/v1/models", now).Scan(&modelID)
if err != nil {
_ = tx.Rollback()
return fmt.Errorf("写入 models 失败 (%s): %w", m.ID, err)
}
insertedModels++
// 写入 audit_log
_, _ = tx.Exec(`
INSERT INTO audit_log (table_name, record_id, field_name, old_value, new_value, operation, operator, batch_id, source_url)
VALUES ('models', $1, 'external_id', NULL, $2, 'INSERT', 'fetch_openrouter', $3, $4)
`, modelID, m.ID, batchID, "https://openrouter.ai/api/v1/models")
// upsert region_pricing 表(替代 model_prices
sourceType := "reseller"
freeQuota := ""
freeLimitations := "[]"
rateLimit := "{}"
if isFree {
sourceType = "free_tier"
freeQuota = "Imported free-tier pricing entry"
freeLimitations = `["See source_url for current quota and policy"]`
}
var pricingID int64
err = tx.QueryRow(`
INSERT INTO region_pricing (
model_id, operator_id, region, currency,
input_price_per_mtok, output_price_per_mtok,
is_free, effective_date, source_url, source_type,
free_quota, free_limitations, rate_limit,
created_at, updated_at
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $14)
ON CONFLICT (model_id, operator_id, region, currency, effective_date) DO UPDATE SET
input_price_per_mtok = EXCLUDED.input_price_per_mtok,
output_price_per_mtok = EXCLUDED.output_price_per_mtok,
is_free = EXCLUDED.is_free,
source_type = EXCLUDED.source_type,
free_quota = EXCLUDED.free_quota,
free_limitations = EXCLUDED.free_limitations,
rate_limit = EXCLUDED.rate_limit,
updated_at = EXCLUDED.updated_at
RETURNING id
`, modelID, operatorID, "global", "USD", m.Pricing.Input, m.Pricing.Output,
isFree, effectiveDate, "https://openrouter.ai/api/v1/models", sourceType,
freeQuota, freeLimitations, rateLimit, now).Scan(&pricingID)
if err != nil {
_ = tx.Rollback()
return fmt.Errorf("写入 region_pricing 失败 (%s): %w", m.ID, err)
}
insertedPrices++
// 价格变动检测(>5%
if lastPrice, ok := lastPrices[modelID]; ok {
inputChange := calcChangePercent(lastPrice.Input, m.Pricing.Input)
outputChange := calcChangePercent(lastPrice.Output, m.Pricing.Output)
if abs(inputChange) > 5 || abs(outputChange) > 5 {
_, _ = tx.Exec(`
INSERT INTO pricing_history (
model_id, region, currency,
old_input_price, new_input_price,
old_output_price, new_output_price,
change_percent, changed_at
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)
`, modelID, "global", "USD",
lastPrice.Input, m.Pricing.Input,
lastPrice.Output, m.Pricing.Output,
max(abs(inputChange), abs(outputChange)), now)
priceChanges++
}
}
}
if err := tx.Commit(); err != nil {
return fmt.Errorf("提交事务失败: %w", err)
}
logger.Info("批次完成", "batch", i/batchSize+1, "records", len(batch))
}
if err := tx.Commit(); err != nil {
return fmt.Errorf("提交事务失败: %w", err)
}
fmt.Printf("PostgreSQL 写入完成: %d models, %d prices\n", insertedModels, insertedPrices)
logger.Info("PostgreSQL 写入完成",
"models", insertedModels,
"prices", insertedPrices,
"price_changes", priceChanges,
"batch_id", batchID)
return nil
}
func calcChangePercent(old, new float64) float64 {
if old == 0 {
if new == 0 {
return 0
}
return 100
}
return ((new - old) / old) * 100
}
func abs(v float64) float64 {
if v < 0 {
return -v
}
return v
}
func max(a, b float64) float64 {
if a > b {
return a
}
return b
}
func jsonCapabilities(caps []string) []byte {
if len(caps) == 0 {
return []byte("[]")
@@ -314,7 +560,6 @@ func rawPayload(m ModelInfo) []byte {
return b
}
// writeJSON 统一写入 JSON 文件(含摘要信息)
func writeJSON(outPath string, models []ModelInfo) error {
total := len(models)
var freeCnt, paidCnt int
@@ -349,3 +594,24 @@ func writeJSON(outPath string, models []ModelInfo) error {
fmt.Printf("结果已写入: %s\n", outPath)
return nil
}
// recordCollectorStats 记录采集统计到 collector_stats 表
func recordCollectorStats(connStr string, runErr error, duration time.Duration) error {
db, err := sql.Open("postgres", connStr)
if err != nil {
return err
}
defer db.Close()
success := runErr == nil
errMsg := ""
if runErr != nil {
errMsg = runErr.Error()
}
_, err = db.Exec(`
INSERT INTO collector_stats (source, batch_id, success, duration_ms, error_message, created_at)
VALUES ('openrouter', $1, $2, $3, $4, $5)
`, fmt.Sprintf("batch-%d", time.Now().Unix()), success, int(duration.Milliseconds()), errMsg, time.Now())
return err
}

View File

@@ -1,3 +1,5 @@
//go:build llm_script
package main
import (

View File

@@ -0,0 +1,61 @@
//go:build llm_script
package main
import (
"flag"
"fmt"
"io"
"net/http"
"os"
"time"
)
func main() {
var rawURL string
var dryRun bool
var timeoutSeconds int
var fixturePath string
flag.StringVar(&rawURL, "url", defaultTencentCatalogURL, "腾讯云公开目录 URL")
flag.BoolVar(&dryRun, "dry-run", false, "仅抓取并打印摘要,不写入数据库")
flag.IntVar(&timeoutSeconds, "timeout", int(defaultTencentCatalogTimeout/time.Second), "请求超时(秒)")
flag.StringVar(&fixturePath, "fixture", "", "本地 HTML/Text 样例文件,优先用于离线 dry-run")
flag.Parse()
cfg := fetchTencentCatalogConfig{
URL: rawURL,
DryRun: dryRun,
Timeout: time.Duration(timeoutSeconds) * time.Second,
Fixture: fixturePath,
}
client := &http.Client{Timeout: cfg.Timeout}
if err := runTencentCatalog(cfg, client, os.Stdout); err != nil {
fmt.Fprintf(os.Stderr, "fetch_tencent_catalog: %v\n", err)
os.Exit(1)
}
}
func runTencentCatalog(cfg fetchTencentCatalogConfig, client *http.Client, out io.Writer) error {
raw, err := fetchTencentCatalogContent(cfg, client)
if err != nil {
return err
}
catalog, err := parseTencentCatalog(raw)
if err != nil {
return err
}
_, err = fmt.Fprintf(
out,
"source=tencent-public-catalog updated_at=%s plans=%d models=%d series=%s dry_run=%t\n",
catalog.UpdatedAt,
len(catalog.Plans),
len(catalog.Models),
formatSeriesSummary(catalog.Plans),
cfg.DryRun,
)
return err
}

View File

@@ -0,0 +1,98 @@
//go:build llm_script
package main
import (
"bytes"
"net/http"
"net/http/httptest"
"os"
"path/filepath"
"strings"
"testing"
)
func TestParseTencentCatalogExtractsPlansAndModels(t *testing.T) {
raw, err := os.ReadFile(filepath.Join("testdata", "tencent_token_plan_sample.txt"))
if err != nil {
t.Fatalf("读取样例失败: %v", err)
}
catalog, err := parseTencentCatalog(string(raw))
if err != nil {
t.Fatalf("parseTencentCatalog 失败: %v", err)
}
if catalog.UpdatedAt != "2026-04-27 17:18:02" {
t.Fatalf("更新时间错误: %q", catalog.UpdatedAt)
}
if len(catalog.Plans) != 8 {
t.Fatalf("期望 8 个套餐,实际 %d", len(catalog.Plans))
}
if len(catalog.Models) != 11 {
t.Fatalf("期望 11 个模型,实际 %d", len(catalog.Models))
}
firstPlan := catalog.Plans[0]
if firstPlan.Series != "通用 Token Plan" {
t.Fatalf("套餐系列错误: %q", firstPlan.Series)
}
if firstPlan.Tier != "Lite" {
t.Fatalf("套餐档位错误: %q", firstPlan.Tier)
}
if firstPlan.Price != "39元/月" {
t.Fatalf("套餐价格错误: %q", firstPlan.Price)
}
if firstPlan.Quota != "3500万 Tokens" {
t.Fatalf("套餐额度错误: %q", firstPlan.Quota)
}
lastModel := catalog.Models[len(catalog.Models)-1]
if lastModel.Name != "Hy3 preview" {
t.Fatalf("最后一个模型错误: %q", lastModel.Name)
}
if lastModel.ModelID != "hy3-preview" {
t.Fatalf("最后一个模型 ID 错误: %q", lastModel.ModelID)
}
if lastModel.ContextLength != 262144 {
t.Fatalf("Hy3 preview 上下文长度错误: %d", lastModel.ContextLength)
}
}
func TestRunTencentCatalogDryRunPrintsSummary(t *testing.T) {
raw, err := os.ReadFile(filepath.Join("testdata", "tencent_token_plan_sample.txt"))
if err != nil {
t.Fatalf("读取样例失败: %v", err)
}
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "text/html; charset=utf-8")
_, _ = w.Write([]byte("<html><body><main><pre>" + string(raw) + "</pre></main></body></html>"))
}))
defer server.Close()
var out bytes.Buffer
err = runTencentCatalog(fetchTencentCatalogConfig{
URL: server.URL,
DryRun: true,
Timeout: defaultTencentCatalogTimeout,
}, server.Client(), &out)
if err != nil {
t.Fatalf("runTencentCatalog 失败: %v", err)
}
output := out.String()
for _, want := range []string{
"source=tencent-public-catalog",
"plans=8",
"models=11",
"series=Hy Token Plan:4,通用 Token Plan:4",
"updated_at=2026-04-27 17:18:02",
} {
if !strings.Contains(output, want) {
t.Fatalf("输出缺少 %q实际: %q", want, output)
}
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,115 @@
//go:build llm_script
package main
import (
"os"
"path/filepath"
"strings"
"testing"
)
func TestGenerateMarkdownV3IncludesTencentSubscriptionSection(t *testing.T) {
path := filepath.Join(t.TempDir(), "daily_report.md")
report := &ReportV3{
Date: "2026-05-13",
TotalModels: 502,
QualitySummary: DataQualitySummary{
Total: 502,
Fresh: 490,
CNY: 126,
USD: 376,
},
TencentSubscriptionPlans: []SubscriptionPlanInfo{
{
PlanName: "通用 Token Plan Lite",
PlanFamily: "token_plan",
Tier: "Lite",
Currency: "CNY",
ListPrice: 39,
QuotaValue: 35000000,
QuotaUnit: "tokens/month",
ContextWindow: 0,
ModelCount: 10,
ModelPreview: "tc-code-latest, glm-5, glm-5.1",
},
{
PlanName: "Hy Token Plan Max",
PlanFamily: "token_plan",
Tier: "Max",
Currency: "CNY",
ListPrice: 468,
QuotaValue: 650000000,
QuotaUnit: "tokens/month",
ContextWindow: 262144,
ModelCount: 1,
ModelPreview: "hy3-preview",
},
},
}
if err := generateMarkdownV3(report, path); err != nil {
t.Fatalf("generateMarkdownV3 returned error: %v", err)
}
body, err := os.ReadFile(path)
if err != nil {
t.Fatalf("read markdown output: %v", err)
}
content := string(body)
for _, want := range []string{
"## 💳 腾讯云套餐订阅价",
"通用 Token Plan Lite",
"Hy Token Plan Max",
"¥39.00/月",
"3500万 Tokens/月",
"256K",
} {
if !strings.Contains(content, want) {
t.Fatalf("markdown missing %q\n%s", want, content)
}
}
}
func TestGenerateHTMLV3IncludesTencentSubscriptionSection(t *testing.T) {
path := filepath.Join(t.TempDir(), "daily_report.html")
report := &ReportV3{
Date: "2026-05-13",
TotalModels: 502,
TencentSubscriptionPlans: []SubscriptionPlanInfo{
{
PlanName: "通用 Token Plan Lite",
PlanFamily: "token_plan",
Tier: "Lite",
Currency: "CNY",
ListPrice: 39,
QuotaValue: 35000000,
QuotaUnit: "tokens/month",
ModelCount: 10,
ModelPreview: "tc-code-latest, glm-5, glm-5.1",
},
},
}
if err := generateHTMLV3(report, path); err != nil {
t.Fatalf("generateHTMLV3 returned error: %v", err)
}
body, err := os.ReadFile(path)
if err != nil {
t.Fatalf("read html output: %v", err)
}
content := string(body)
for _, want := range []string{
"💳 腾讯云套餐订阅价",
"通用 Token Plan Lite",
"¥39.00/月",
"3500万 Tokens/月",
} {
if !strings.Contains(content, want) {
t.Fatalf("html missing %q\n%s", want, content)
}
}
}

View File

@@ -0,0 +1,617 @@
//go:build llm_script
package main
import (
"bytes"
"encoding/binary"
"encoding/json"
"errors"
"flag"
"fmt"
"image"
"image/color"
"image/draw"
"image/gif"
"image/png"
"math"
"os"
"path/filepath"
"sort"
"strings"
"time"
)
type reportRow struct {
Model string `json:"model"`
Provider string `json:"provider"`
Scene string `json:"scene"`
Input string `json:"input,omitempty"`
Output string `json:"output,omitempty"`
Context string `json:"context,omitempty"`
}
type dailyReport struct {
ReportDate string `json:"report_date"`
Stats map[string]string `json:"stats"`
International []reportRow `json:"international"`
Domestic []reportRow `json:"domestic"`
SourceReport string `json:"source_report"`
GeneratedAt string `json:"generated_at,omitempty"`
SelectedSection string `json:"selected_section,omitempty"`
}
type DigestCard struct {
Slug string `json:"slug"`
Title string `json:"title"`
Headline string `json:"headline"`
BulletLines []string `json:"bullet_lines"`
Narration string `json:"narration"`
FramePath string `json:"frame_path,omitempty"`
ScriptPath string `json:"script_path,omitempty"`
}
type digestManifest struct {
ReportDate string `json:"report_date"`
SourceReport string `json:"source_report"`
GeneratedAt string `json:"generated_at"`
OutputDir string `json:"output_dir"`
VideoPath string `json:"video_path"`
AudioPath string `json:"audio_path"`
Cards []DigestCard `json:"cards"`
}
var framePalette = color.Palette{
color.RGBA{12, 18, 28, 255},
color.RGBA{32, 48, 74, 255},
color.RGBA{91, 192, 190, 255},
color.RGBA{245, 247, 250, 255},
color.RGBA{255, 196, 61, 255},
color.RGBA{255, 107, 107, 255},
}
var slideBackgrounds = []uint8{1, 2, 1, 4, 5}
func main() {
var reportPath string
var reportDate string
var outputDir string
flag.StringVar(&reportPath, "report", "", "path to daily markdown report")
flag.StringVar(&reportDate, "date", "", "report date in YYYY-MM-DD")
flag.StringVar(&outputDir, "output-dir", "", "output directory for video digest artifacts")
flag.Parse()
resolvedReport, err := resolveReportPath(reportPath, reportDate)
if err != nil {
fmt.Fprintln(os.Stderr, err)
os.Exit(1)
}
content, err := os.ReadFile(resolvedReport)
if err != nil {
fmt.Fprintf(os.Stderr, "read report failed: %v\n", err)
os.Exit(1)
}
report, err := parseDailyReport(content)
if err != nil {
fmt.Fprintf(os.Stderr, "parse report failed: %v\n", err)
os.Exit(1)
}
report.SourceReport = resolvedReport
if reportDate == "" {
reportDate = report.ReportDate
}
if reportDate == "" {
reportDate = time.Now().Format("2006-01-02")
}
cards := buildDigestCards(report)
if len(cards) == 0 {
fmt.Fprintln(os.Stderr, "no digest cards generated")
os.Exit(1)
}
if outputDir == "" {
outputDir = filepath.Join(filepath.Dir(resolvedReport), "video", reportDate)
}
manifest, err := generateDigestArtifacts(report, cards, outputDir)
if err != nil {
fmt.Fprintf(os.Stderr, "generate digest artifacts failed: %v\n", err)
os.Exit(1)
}
fmt.Printf("report=%s\n", manifest.SourceReport)
fmt.Printf("output=%s\n", manifest.OutputDir)
fmt.Printf("cards=%d\n", len(manifest.Cards))
fmt.Printf("video=%s\n", manifest.VideoPath)
fmt.Printf("audio=%s\n", manifest.AudioPath)
}
func resolveReportPath(explicitPath string, reportDate string) (string, error) {
if explicitPath != "" {
return explicitPath, nil
}
root := "/home/long/project/llm-intelligence/reports/daily"
if reportDate != "" {
return filepath.Join(root, fmt.Sprintf("daily_report_%s.md", reportDate)), nil
}
matches, err := filepath.Glob(filepath.Join(root, "daily_report_*.md"))
if err != nil {
return "", err
}
if len(matches) == 0 {
return "", errors.New("no daily report markdown files found")
}
sort.Strings(matches)
return matches[len(matches)-1], nil
}
func parseDailyReport(content []byte) (dailyReport, error) {
report := dailyReport{
Stats: make(map[string]string),
}
lines := strings.Split(string(content), "\n")
section := ""
for _, rawLine := range lines {
line := strings.TrimSpace(rawLine)
if strings.HasPrefix(line, "**报告日期**:") {
report.ReportDate = strings.TrimSpace(strings.TrimPrefix(line, "**报告日期**:"))
continue
}
switch line {
case "## 📊 数据质量摘要":
section = "stats"
continue
case "## 🌍 国际推荐模型 TOP 5":
section = "international"
continue
case "## 🇨🇳 国内模型 TOP 10":
section = "domestic"
continue
}
if strings.HasPrefix(line, "## ") {
section = ""
continue
}
if !strings.HasPrefix(line, "|") || strings.Contains(line, "------") {
continue
}
parts := splitMarkdownTableLine(line)
switch section {
case "stats":
if len(parts) >= 2 && parts[0] != "指标" {
report.Stats[parts[0]] = parts[1]
}
case "international":
if row, ok := parseModelRow(parts); ok {
report.International = append(report.International, row)
}
case "domestic":
if row, ok := parseModelRow(parts); ok {
report.Domestic = append(report.Domestic, row)
}
}
}
if report.ReportDate == "" {
return report, errors.New("report date not found")
}
return report, nil
}
func splitMarkdownTableLine(line string) []string {
trimmed := strings.Trim(line, "|")
parts := strings.Split(trimmed, "|")
out := make([]string, 0, len(parts))
for _, part := range parts {
out = append(out, strings.TrimSpace(part))
}
return out
}
func parseModelRow(parts []string) (reportRow, bool) {
if len(parts) < 7 || parts[0] == "排名" {
return reportRow{}, false
}
return reportRow{
Model: parts[1],
Provider: parts[2],
Scene: parts[3],
Input: parts[4],
Output: parts[5],
Context: parts[6],
}, true
}
func buildDigestCards(report dailyReport) []DigestCard {
total := report.Stats["模型总数"]
cny := report.Stats["CNY定价"]
usd := report.Stats["USD定价"]
codeRows := pickSceneRows(report, "代码")
reasoningRows := pickSceneRows(report, "推理")
visionRows := pickSceneRows(report, "视觉")
cards := []DigestCard{
newDigestCard(
"code",
"Code Digest",
fmt.Sprintf("%s total models tracked", total),
codeRows,
fmt.Sprintf("Code digest highlights %d candidate models. CNY priced entries %s.", len(codeRows), cny),
),
newDigestCard(
"reasoning",
"Reasoning Digest",
fmt.Sprintf("USD priced entries %s", usd),
reasoningRows,
fmt.Sprintf("Reasoning digest focuses on %d reasoning oriented models.", len(reasoningRows)),
),
newDigestCard(
"vision",
"Vision Digest",
fmt.Sprintf("CNY priced entries %s", cny),
visionRows,
fmt.Sprintf("Vision digest contains %d multimodal candidates from the latest report.", len(visionRows)),
),
newDigestCard(
"domestic",
"Domestic Digest",
fmt.Sprintf("Domestic pricing entries %s", cny),
firstRows(report.Domestic, 3),
fmt.Sprintf("Domestic digest summarizes top local platforms with %d highlighted entries.", min(3, len(report.Domestic))),
),
newDigestCard(
"global",
"Global Digest",
fmt.Sprintf("Global pricing entries %s", usd),
firstRows(report.International, 3),
fmt.Sprintf("Global digest summarizes top international recommendations with %d highlighted entries.", min(3, len(report.International))),
),
}
return cards
}
func newDigestCard(slug string, title string, headline string, rows []reportRow, narration string) DigestCard {
bullets := make([]string, 0, len(rows))
for _, row := range rows {
bullets = append(bullets, fmt.Sprintf("%s - %s - %s", row.Model, row.Provider, row.Scene))
}
if len(bullets) == 0 {
bullets = append(bullets, "No matching models in current report")
}
return DigestCard{
Slug: slug,
Title: title,
Headline: headline,
BulletLines: bullets,
Narration: narration,
}
}
func pickSceneRows(report dailyReport, scene string) []reportRow {
rows := make([]reportRow, 0, 3)
for _, source := range [][]reportRow{report.Domestic, report.International} {
for _, row := range source {
if strings.Contains(row.Scene, scene) {
rows = append(rows, row)
}
if len(rows) == 3 {
return rows
}
}
}
return rows
}
func firstRows(rows []reportRow, n int) []reportRow {
if len(rows) < n {
n = len(rows)
}
out := make([]reportRow, 0, n)
for i := 0; i < n; i++ {
out = append(out, rows[i])
}
return out
}
func generateDigestArtifacts(report dailyReport, cards []DigestCard, outputDir string) (digestManifest, error) {
scriptDir := filepath.Join(outputDir, "scripts")
frameDir := filepath.Join(outputDir, "frames")
if err := os.MkdirAll(scriptDir, 0o755); err != nil {
return digestManifest{}, err
}
if err := os.MkdirAll(frameDir, 0o755); err != nil {
return digestManifest{}, err
}
frames := make([]*image.Paletted, 0, len(cards))
delays := make([]int, 0, len(cards))
for i, card := range cards {
frame := renderCardFrame(card, i)
framePath := filepath.Join(frameDir, fmt.Sprintf("%02d_%s.png", i+1, card.Slug))
if err := writePNG(framePath, frame); err != nil {
return digestManifest{}, err
}
scriptPath := filepath.Join(scriptDir, fmt.Sprintf("%02d_%s.md", i+1, card.Slug))
if err := os.WriteFile(scriptPath, []byte(renderCardScript(card)), 0o644); err != nil {
return digestManifest{}, err
}
card.FramePath = framePath
card.ScriptPath = scriptPath
cards[i] = card
frames = append(frames, frame)
delays = append(delays, 120)
}
videoPath := filepath.Join(outputDir, "video_digest.gif")
if err := writeAnimatedGIF(videoPath, frames, delays); err != nil {
return digestManifest{}, err
}
audioData, err := buildNarrationAudio(cards)
if err != nil {
return digestManifest{}, err
}
audioPath := filepath.Join(outputDir, "narration.wav")
if err := os.WriteFile(audioPath, audioData, 0o644); err != nil {
return digestManifest{}, err
}
manifest := digestManifest{
ReportDate: report.ReportDate,
SourceReport: report.SourceReport,
GeneratedAt: time.Now().Format(time.RFC3339),
OutputDir: outputDir,
VideoPath: videoPath,
AudioPath: audioPath,
Cards: cards,
}
manifestPath := filepath.Join(outputDir, "manifest.json")
payload, err := json.MarshalIndent(manifest, "", " ")
if err != nil {
return digestManifest{}, err
}
if err := os.WriteFile(manifestPath, payload, 0o644); err != nil {
return digestManifest{}, err
}
return manifest, nil
}
func renderCardScript(card DigestCard) string {
var b strings.Builder
b.WriteString("# " + card.Title + "\n\n")
b.WriteString("## Headline\n")
b.WriteString("- " + card.Headline + "\n\n")
b.WriteString("## Narration\n")
b.WriteString("- " + card.Narration + "\n\n")
b.WriteString("## Bullet Lines\n")
for _, line := range card.BulletLines {
b.WriteString("- " + line + "\n")
}
return b.String()
}
func writePNG(path string, img image.Image) error {
f, err := os.Create(path)
if err != nil {
return err
}
defer f.Close()
return png.Encode(f, img)
}
func writeAnimatedGIF(path string, frames []*image.Paletted, delays []int) error {
f, err := os.Create(path)
if err != nil {
return err
}
defer f.Close()
return gif.EncodeAll(f, &gif.GIF{Image: frames, Delay: delays, LoopCount: 0})
}
func renderCardFrame(card DigestCard, index int) *image.Paletted {
rect := image.Rect(0, 0, 640, 360)
img := image.NewPaletted(rect, framePalette)
bg := slideBackgrounds[index%len(slideBackgrounds)]
draw.Draw(img, rect, &image.Uniform{framePalette[bg]}, image.Point{}, draw.Src)
fillRect(img, 18, 18, 622, 342, 0)
fillRect(img, 28, 28, 612, 88, bg)
fillRect(img, 28, 104, 612, 332, 1)
drawRasterText(img, 40, 42, 3, sanitizeFrameText(card.Title), 3)
drawRasterText(img, 40, 116, 2, sanitizeFrameText(card.Headline), 4)
for i, line := range firstStrings(card.BulletLines, 3) {
drawRasterText(img, 40, 160+i*42, 2, sanitizeFrameText(line), 3)
}
drawRasterText(img, 40, 302, 1, sanitizeFrameText("LLM INTELLIGENCE VIDEO DIGEST"), 4)
return img
}
func firstStrings(lines []string, n int) []string {
if len(lines) < n {
n = len(lines)
}
out := make([]string, 0, n)
for i := 0; i < n; i++ {
out = append(out, lines[i])
}
return out
}
func sanitizeFrameText(input string) string {
upper := strings.ToUpper(input)
var b strings.Builder
for _, r := range upper {
if _, ok := glyphs[r]; ok {
b.WriteRune(r)
continue
}
switch {
case r >= 'A' && r <= 'Z':
b.WriteRune(r)
case r >= '0' && r <= '9':
b.WriteRune(r)
default:
b.WriteRune(' ')
}
}
return strings.Join(strings.Fields(b.String()), " ")
}
func fillRect(img *image.Paletted, x1 int, y1 int, x2 int, y2 int, idx uint8) {
for y := y1; y < y2; y++ {
for x := x1; x < x2; x++ {
img.SetColorIndex(x, y, idx)
}
}
}
func drawRasterText(img *image.Paletted, x int, y int, scale int, text string, idx uint8) {
cursor := x
for _, r := range text {
pattern, ok := glyphs[r]
if !ok {
pattern = glyphs[' ']
}
for row, bits := range pattern {
for col := 0; col < 5; col++ {
if bits&(1<<(4-col)) == 0 {
continue
}
fillRect(img, cursor+col*scale, y+row*scale, cursor+(col+1)*scale, y+(row+1)*scale, idx)
}
}
cursor += 6 * scale
}
}
func buildNarrationAudio(cards []DigestCard) ([]byte, error) {
const sampleRate = 16000
var pcm []int16
for i, card := range cards {
freq := 330.0 + float64(i)*55.0
duration := 0.9 + float64(len(card.BulletLines))*0.18
pcm = append(pcm, synthTone(freq, duration, sampleRate)...)
pcm = append(pcm, make([]int16, sampleRate/5)...)
}
return encodeWAV(pcm, sampleRate), nil
}
func synthTone(freq float64, duration float64, sampleRate int) []int16 {
samples := int(duration * float64(sampleRate))
out := make([]int16, 0, samples)
for i := 0; i < samples; i++ {
t := float64(i) / float64(sampleRate)
envelope := 1.0
if i < sampleRate/50 {
envelope = float64(i) / float64(sampleRate/50)
}
if i > samples-sampleRate/25 {
remaining := samples - i
if remaining > 0 {
envelope = minFloat(envelope, float64(remaining)/float64(sampleRate/25))
}
}
value := math.Sin(2*math.Pi*freq*t) + 0.35*math.Sin(2*math.Pi*(freq/2)*t)
out = append(out, int16(value*envelope*12000))
}
return out
}
func encodeWAV(samples []int16, sampleRate int) []byte {
const channels = 1
const bitsPerSample = 16
dataSize := len(samples) * 2
byteRate := sampleRate * channels * bitsPerSample / 8
blockAlign := channels * bitsPerSample / 8
var buf bytes.Buffer
buf.WriteString("RIFF")
_ = binary.Write(&buf, binary.LittleEndian, uint32(36+dataSize))
buf.WriteString("WAVE")
buf.WriteString("fmt ")
_ = binary.Write(&buf, binary.LittleEndian, uint32(16))
_ = binary.Write(&buf, binary.LittleEndian, uint16(1))
_ = binary.Write(&buf, binary.LittleEndian, uint16(channels))
_ = binary.Write(&buf, binary.LittleEndian, uint32(sampleRate))
_ = binary.Write(&buf, binary.LittleEndian, uint32(byteRate))
_ = binary.Write(&buf, binary.LittleEndian, uint16(blockAlign))
_ = binary.Write(&buf, binary.LittleEndian, uint16(bitsPerSample))
buf.WriteString("data")
_ = binary.Write(&buf, binary.LittleEndian, uint32(dataSize))
for _, sample := range samples {
_ = binary.Write(&buf, binary.LittleEndian, sample)
}
return buf.Bytes()
}
func min(a int, b int) int {
if a < b {
return a
}
return b
}
func minFloat(a float64, b float64) float64 {
if a < b {
return a
}
return b
}
var glyphs = map[rune][7]uint8{
' ': {0, 0, 0, 0, 0, 0, 0},
'-': {0, 0, 0, 31, 0, 0, 0},
'.': {0, 0, 0, 0, 0, 12, 12},
':': {0, 12, 12, 0, 12, 12, 0},
'/': {1, 2, 4, 8, 16, 0, 0},
'+': {0, 4, 4, 31, 4, 4, 0},
'(': {2, 4, 8, 8, 8, 4, 2},
')': {8, 4, 2, 2, 2, 4, 8},
'0': {14, 17, 19, 21, 25, 17, 14},
'1': {4, 12, 4, 4, 4, 4, 14},
'2': {14, 17, 1, 2, 4, 8, 31},
'3': {30, 1, 1, 14, 1, 1, 30},
'4': {2, 6, 10, 18, 31, 2, 2},
'5': {31, 16, 16, 30, 1, 1, 30},
'6': {14, 16, 16, 30, 17, 17, 14},
'7': {31, 1, 2, 4, 8, 8, 8},
'8': {14, 17, 17, 14, 17, 17, 14},
'9': {14, 17, 17, 15, 1, 1, 14},
'A': {14, 17, 17, 31, 17, 17, 17},
'B': {30, 17, 17, 30, 17, 17, 30},
'C': {14, 17, 16, 16, 16, 17, 14},
'D': {28, 18, 17, 17, 17, 18, 28},
'E': {31, 16, 16, 30, 16, 16, 31},
'F': {31, 16, 16, 30, 16, 16, 16},
'G': {14, 17, 16, 16, 19, 17, 15},
'H': {17, 17, 17, 31, 17, 17, 17},
'I': {14, 4, 4, 4, 4, 4, 14},
'J': {7, 2, 2, 2, 18, 18, 12},
'K': {17, 18, 20, 24, 20, 18, 17},
'L': {16, 16, 16, 16, 16, 16, 31},
'M': {17, 27, 21, 17, 17, 17, 17},
'N': {17, 25, 21, 19, 17, 17, 17},
'O': {14, 17, 17, 17, 17, 17, 14},
'P': {30, 17, 17, 30, 16, 16, 16},
'Q': {14, 17, 17, 17, 21, 18, 13},
'R': {30, 17, 17, 30, 20, 18, 17},
'S': {15, 16, 16, 14, 1, 1, 30},
'T': {31, 4, 4, 4, 4, 4, 4},
'U': {17, 17, 17, 17, 17, 17, 14},
'V': {17, 17, 17, 17, 17, 10, 4},
'W': {17, 17, 17, 17, 21, 27, 17},
'X': {17, 17, 10, 4, 10, 17, 17},
'Y': {17, 17, 10, 4, 4, 4, 4},
'Z': {31, 1, 2, 4, 8, 16, 31},
}

View File

@@ -0,0 +1,77 @@
//go:build llm_script
package main
import (
"bytes"
"testing"
)
const sampleDailyReport = `# Daily Report
**报告日期**: 2026-05-11
## 📊 数据质量摘要
| 指标 | 数值 |
|------|------|
| 模型总数 | 501 |
| CNY定价 | 126 |
| USD定价 | 375 |
## 🌍 国际推荐模型 TOP 5
| 排名 | 模型 | 厂商 | 场景 | 输入(原价) | 输出(原价) | 上下文 |
|------|------|------|------|-----------|-----------|--------|
| 1 | GPT-5.4 Mini | OpenAI | 代码 | $0.75 | $4.50 | 200000 |
| 2 | DeepSeek-V3 | DeepSeek | 推理 | ¥1.00 | ¥2.00 | 64000 |
| 3 | Qwen3-VL-32B | Alibaba | 视觉 | ¥0.50 | ¥1.00 | 32000 |
## 🇨🇳 国内模型 TOP 10
| 排名 | 模型 | 厂商 | 场景 | 输入(CNY) | 输出(CNY) | 上下文 |
|------|------|------|------|-----------|-----------|--------|
| 1 | DeepSeek V4 Flash | DeepSeek | 对话 | ¥1.02 | ¥2.03 | 1000000 |
| 2 | GLM-4.6V-FlashX | Zhipu AI | 视觉 | ¥0.15 | ¥1.50 | 8000 |
| 3 | doubao-seed-code | ByteDance | 代码 | ¥1.20 | ¥2.40 | 32000 |
| 4 | deepseek-r1 | ByteDance | 推理 | ¥4.00 | ¥8.00 | 32000 |
`
func TestExtractDigestCardsBuildsFiveCategories(t *testing.T) {
report, err := parseDailyReport([]byte(sampleDailyReport))
if err != nil {
t.Fatalf("parseDailyReport returned error: %v", err)
}
cards := buildDigestCards(report)
if len(cards) != 5 {
t.Fatalf("expected 5 digest cards, got %d", len(cards))
}
if cards[0].Slug != "code" {
t.Fatalf("expected first card slug code, got %q", cards[0].Slug)
}
if len(cards[0].BulletLines) == 0 {
t.Fatal("expected code card to contain bullet lines")
}
if cards[4].Slug != "global" {
t.Fatalf("expected last card slug global, got %q", cards[4].Slug)
}
}
func TestBuildNarrationAudioProducesWAV(t *testing.T) {
audio, err := buildNarrationAudio([]DigestCard{
{Slug: "code", Narration: "Code digest update"},
{Slug: "global", Narration: "Global digest update"},
})
if err != nil {
t.Fatalf("buildNarrationAudio returned error: %v", err)
}
if len(audio) < 44 {
t.Fatalf("expected wav payload, got %d bytes", len(audio))
}
if !bytes.HasPrefix(audio, []byte("RIFF")) {
t.Fatalf("expected RIFF header, got %q", audio[:4])
}
}

View File

@@ -0,0 +1,162 @@
//go:build llm_script
package main
import (
"database/sql"
"encoding/json"
"log"
"os"
_ "github.com/lib/pq"
)
type ModelPricing struct {
ModelID string
ModelName string
ProviderName string
ProviderCountry string
OperatorName string
OperatorType string
Region string
Currency string
InputPrice float64
OutputPrice float64
ContextLength int
IsFree bool
SourceURL string
Modality string
}
func main() {
dsn := os.Getenv("DATABASE_URL")
if dsn == "" {
dsn = "postgres://long@/llm_intelligence?host=/var/run/postgresql"
}
db, err := sql.Open("postgres", dsn)
if err != nil {
log.Fatal(err)
}
defer db.Close()
// Read raw data
data, err := os.ReadFile("/tmp/bytedance_raw.json")
if err != nil {
log.Fatal("Failed to read raw data:", err)
}
var raw struct {
Bytedance []struct {
Model string `json:"model"`
InputPrice float64 `json:"inputPrice"`
OutputPrice float64 `json:"outputPrice"`
ContextLength int `json:"contextLength"`
Operator string `json:"operator"`
Region string `json:"region"`
Currency string `json:"currency"`
} `json:"bytedance"`
}
if err := json.Unmarshal(data, &raw); err != nil {
log.Fatal("Failed to parse raw data:", err)
}
log.Printf("Importing %d ByteDance models...", len(raw.Bytedance))
batchID := "manual-seed"
for _, b := range raw.Bytedance {
p := ModelPricing{
ModelID: "bytedance-" + b.Model,
ModelName: b.Model,
ProviderName: "ByteDance",
ProviderCountry: "CN",
OperatorName: "ByteDance Volcano",
OperatorType: "official",
Region: "CN",
Currency: "CNY",
InputPrice: b.InputPrice,
OutputPrice: b.OutputPrice,
ContextLength: b.ContextLength,
IsFree: b.InputPrice == 0,
SourceURL: "https://www.volcengine.com/docs/82379/1099320",
Modality: "text",
}
// Find or create provider
var providerID int64
err := db.QueryRow("SELECT id FROM model_provider WHERE name = $1", p.ProviderName).Scan(&providerID)
if err == sql.ErrNoRows {
err = db.QueryRow(
"INSERT INTO model_provider (name, country, website, status) VALUES ($1, $2, $3, 'active') RETURNING id",
p.ProviderName, p.ProviderCountry, "",
).Scan(&providerID)
}
if err != nil {
log.Printf("Provider error: %v", err)
continue
}
// Find or create operator
var operatorID int64
err = db.QueryRow("SELECT id FROM operator WHERE name = $1", p.OperatorName).Scan(&operatorID)
if err == sql.ErrNoRows {
err = db.QueryRow(
"INSERT INTO operator (name, country, status) VALUES ($1, $2, 'active') RETURNING id",
p.OperatorName, p.ProviderCountry,
).Scan(&operatorID)
}
if err != nil {
log.Printf("Operator error: %v", err)
continue
}
// Find or create model
var modelID int64
err = db.QueryRow("SELECT id FROM models WHERE external_id = $1", p.ModelID).Scan(&modelID)
if err == sql.ErrNoRows {
err = db.QueryRow(
`INSERT INTO models (external_id, name, provider_id, modality, context_length, status, source, batch_id)
VALUES ($1, $2, $3, $4, $5, 'active', $6, $7) RETURNING id`,
p.ModelID, p.ModelName, providerID, p.Modality, p.ContextLength, p.OperatorName, batchID,
).Scan(&modelID)
}
if err != nil {
log.Printf("Model error for %s: %v", p.ModelID, err)
continue
}
// Insert pricing
sourceType := p.OperatorType
freeQuota := ""
freeLimitations := "[]"
rateLimit := "{}"
if p.IsFree {
sourceType = "free_tier"
freeQuota = "Imported free-tier pricing entry"
freeLimitations = `["See source_url for current quota and policy"]`
}
_, err = db.Exec(
`INSERT INTO region_pricing
(model_id, operator_id, region, currency, input_price_per_mtok, output_price_per_mtok, is_free, effective_date, source_url, source_type, free_quota, free_limitations, rate_limit)
VALUES ($1, $2, $3, $4, $5, $6, $7, CURRENT_DATE, $8, $9, $10, $11, $12)
ON CONFLICT (model_id, operator_id, region, currency, effective_date)
DO UPDATE SET input_price_per_mtok = EXCLUDED.input_price_per_mtok,
output_price_per_mtok = EXCLUDED.output_price_per_mtok,
is_free = EXCLUDED.is_free,
source_type = EXCLUDED.source_type,
free_quota = EXCLUDED.free_quota,
free_limitations = EXCLUDED.free_limitations,
rate_limit = EXCLUDED.rate_limit,
updated_at = CURRENT_TIMESTAMP`,
modelID, operatorID, p.Region, p.Currency, p.InputPrice, p.OutputPrice, p.IsFree, p.SourceURL,
sourceType, freeQuota, freeLimitations, rateLimit,
)
if err != nil {
log.Printf("Pricing error for %s: %v", p.ModelID, err)
continue
}
}
log.Printf("Successfully imported %d ByteDance models", len(raw.Bytedance))
}

View File

@@ -0,0 +1,235 @@
//go:build llm_script
package main
import (
"database/sql"
"encoding/json"
"fmt"
"log"
"os"
"strings"
_ "github.com/lib/pq"
)
type RawData struct {
Zhipu []struct {
Model string `json:"model"`
Context string `json:"context"`
InputPrice string `json:"inputPrice"`
OutputPrice string `json:"outputPrice"`
Operator string `json:"operator"`
Region string `json:"region"`
Currency string `json:"currency"`
} `json:"zhipu"`
Baidu []struct {
Model string `json:"model"`
Type string `json:"type"`
InputPrice *float64 `json:"inputPrice"`
OutputPrice *float64 `json:"outputPrice"`
Operator string `json:"operator"`
Region string `json:"region"`
Currency string `json:"currency"`
} `json:"baidu"`
}
type ModelPricing struct {
ModelID string
ModelName string
ProviderName string
ProviderCountry string
OperatorName string
OperatorType string
Region string
Currency string
InputPrice float64
OutputPrice float64
ContextLength int
IsFree bool
SourceURL string
Modality string
SceneTags []string
}
func parseZhipuPrice(s string) float64 {
// Extract price from strings like "6元", "免费", "限时免费"
if strings.Contains(s, "免费") {
return 0
}
var f float64
fmt.Sscanf(s, "%f", &f)
return f
}
func extractContextLength(context string) int {
if strings.Contains(context, "1M") || strings.Contains(context, "1000K") {
return 1000000
}
if strings.Contains(context, "200K") {
return 200000
}
if strings.Contains(context, "128K") {
return 128000
}
if strings.Contains(context, "32K") {
return 32000
}
if strings.Contains(context, "8K") {
return 8000
}
if strings.Contains(context, "262144") || strings.Contains(context, "256K") {
return 262144
}
if strings.Contains(context, "8192") {
return 8192
}
return 0
}
func main() {
dsn := os.Getenv("DATABASE_URL")
if dsn == "" {
dsn = "postgres://long@/llm_intelligence?host=/var/run/postgresql"
}
db, err := sql.Open("postgres", dsn)
if err != nil {
log.Fatal(err)
}
defer db.Close()
// Read raw data
data, err := os.ReadFile("/tmp/phase2_raw_data.json")
if err != nil {
log.Fatal("Failed to read raw data:", err)
}
var raw RawData
if err := json.Unmarshal(data, &raw); err != nil {
log.Fatal("Failed to parse raw data:", err)
}
var prices []ModelPricing
batchID := "manual-seed"
// Process Baidu data
modelPrices := make(map[string]map[string]float64) // model -> type -> price
for _, b := range raw.Baidu {
if modelPrices[b.Model] == nil {
modelPrices[b.Model] = make(map[string]float64)
}
if b.InputPrice != nil {
if strings.Contains(b.Type, "输入") {
modelPrices[b.Model]["input"] = *b.InputPrice * 1000000 // Convert to per 1M
}
if strings.Contains(b.Type, "输出") {
modelPrices[b.Model]["output"] = *b.InputPrice * 1000000
}
}
if b.OutputPrice != nil {
if strings.Contains(b.Type, "输出") {
modelPrices[b.Model]["output"] = *b.OutputPrice * 1000000
}
}
}
for model, pricesMap := range modelPrices {
prices = append(prices, ModelPricing{
ModelID: "baidu-" + strings.ToLower(strings.ReplaceAll(model, " ", "-")),
ModelName: model,
ProviderName: "Baidu",
ProviderCountry: "CN",
OperatorName: "Baidu Qianfan",
OperatorType: "official",
Region: "CN",
Currency: "CNY",
InputPrice: pricesMap["input"],
OutputPrice: pricesMap["output"],
IsFree: pricesMap["input"] == 0 && pricesMap["output"] == 0,
SourceURL: "https://cloud.baidu.com/doc/qianfan/s/wmh4sv6ya",
Modality: "text",
})
}
log.Printf("Parsed %d unique models from Baidu", len(prices))
// Save to database
for _, p := range prices {
// Find or create provider
var providerID int64
err := db.QueryRow("SELECT id FROM model_provider WHERE name = $1", p.ProviderName).Scan(&providerID)
if err == sql.ErrNoRows {
err = db.QueryRow(
"INSERT INTO model_provider (name, country, website, status) VALUES ($1, $2, $3, 'active') RETURNING id",
p.ProviderName, p.ProviderCountry, "",
).Scan(&providerID)
}
if err != nil {
log.Printf("Provider error: %v", err)
continue
}
// Find or create operator
var operatorID int64
err = db.QueryRow("SELECT id FROM operator WHERE name = $1", p.OperatorName).Scan(&operatorID)
if err == sql.ErrNoRows {
err = db.QueryRow(
"INSERT INTO operator (name, country, status) VALUES ($1, $2, 'active') RETURNING id",
p.OperatorName, p.ProviderCountry,
).Scan(&operatorID)
}
if err != nil {
log.Printf("Operator error: %v", err)
continue
}
// Find or create model
var modelID int64
err = db.QueryRow("SELECT id FROM models WHERE external_id = $1", p.ModelID).Scan(&modelID)
if err == sql.ErrNoRows {
err = db.QueryRow(
`INSERT INTO models (external_id, name, provider_id, modality, context_length, status, source, batch_id)
VALUES ($1, $2, $3, $4, $5, 'active', $6, $7) RETURNING id`,
p.ModelID, p.ModelName, providerID, p.Modality, p.ContextLength, p.OperatorName, batchID,
).Scan(&modelID)
}
if err != nil {
log.Printf("Model error: %v", err)
continue
}
// Insert pricing
sourceType := p.OperatorType
freeQuota := ""
freeLimitations := "[]"
rateLimit := "{}"
if p.IsFree {
sourceType = "free_tier"
freeQuota = "Imported free-tier pricing entry"
freeLimitations = `["See source_url for current quota and policy"]`
}
_, err = db.Exec(
`INSERT INTO region_pricing
(model_id, operator_id, region, currency, input_price_per_mtok, output_price_per_mtok, is_free, effective_date, source_url, source_type, free_quota, free_limitations, rate_limit)
VALUES ($1, $2, $3, $4, $5, $6, $7, CURRENT_DATE, $8, $9, $10, $11, $12)
ON CONFLICT (model_id, operator_id, region, currency, effective_date)
DO UPDATE SET input_price_per_mtok = EXCLUDED.input_price_per_mtok,
output_price_per_mtok = EXCLUDED.output_price_per_mtok,
is_free = EXCLUDED.is_free,
source_type = EXCLUDED.source_type,
free_quota = EXCLUDED.free_quota,
free_limitations = EXCLUDED.free_limitations,
rate_limit = EXCLUDED.rate_limit,
updated_at = CURRENT_TIMESTAMP`,
modelID, operatorID, p.Region, p.Currency, p.InputPrice, p.OutputPrice, p.IsFree, p.SourceURL,
sourceType, freeQuota, freeLimitations, rateLimit,
)
if err != nil {
log.Printf("Pricing error for %s: %v", p.ModelID, err)
continue
}
}
log.Printf("Successfully imported %d models into database", len(prices))
}

View File

@@ -0,0 +1,420 @@
//go:build llm_script
package main
import (
"database/sql"
"encoding/json"
"flag"
"fmt"
"io"
"net/http"
"os"
"regexp"
"strconv"
"strings"
"time"
_ "github.com/lib/pq"
)
type importTencentSubscriptionConfig struct {
URL string
Fixture string
DryRun bool
Timeout time.Duration
}
type subscriptionPlanRow struct {
ProviderName string
ProviderCN string
ProviderCountry string
OperatorName string
OperatorCN string
OperatorCountry string
OperatorType string
PlanFamily string
PlanCode string
PlanName string
Tier string
BillingCycle string
Currency string
ListPrice float64
PriceUnit string
QuotaValue int64
QuotaUnit string
ContextWindow int
PlanScope string
ModelScope string
SourceURL string
PublishedAt string
EffectiveDate string
Notes string
}
func main() {
loadImportProjectEnv()
var rawURL string
var fixturePath string
var dryRun bool
var timeoutSeconds int
flag.StringVar(&rawURL, "url", defaultTencentCatalogURL, "腾讯云公开目录 URL")
flag.StringVar(&fixturePath, "fixture", "", "本地 HTML/Text 样例文件,优先用于离线导入")
flag.BoolVar(&dryRun, "dry-run", false, "仅解析并打印摘要,不写入数据库")
flag.IntVar(&timeoutSeconds, "timeout", int(defaultTencentCatalogTimeout/time.Second), "请求超时(秒)")
flag.Parse()
cfg := importTencentSubscriptionConfig{
URL: rawURL,
Fixture: fixturePath,
DryRun: dryRun,
Timeout: time.Duration(timeoutSeconds) * time.Second,
}
var db *sql.DB
var err error
if !cfg.DryRun {
dsn := os.Getenv("DATABASE_URL")
if dsn == "" {
dsn = "postgres://long@/llm_intelligence?host=/var/run/postgresql"
}
db, err = sql.Open("postgres", dsn)
if err != nil {
fmt.Fprintf(os.Stderr, "open db: %v\n", err)
os.Exit(1)
}
defer db.Close()
}
if err := runTencentSubscriptionImport(cfg, db, os.Stdout); err != nil {
fmt.Fprintf(os.Stderr, "import_tencent_subscription: %v\n", err)
os.Exit(1)
}
}
func loadImportProjectEnv() {
for _, path := range []string{".env.local", ".env"} {
loadImportEnvFile(path)
}
}
func loadImportEnvFile(path string) {
data, err := os.ReadFile(path)
if err != nil {
return
}
for _, line := range strings.Split(string(data), "\n") {
line = strings.TrimSpace(line)
if line == "" || strings.HasPrefix(line, "#") {
continue
}
key, value, ok := strings.Cut(line, "=")
if !ok {
continue
}
key = strings.TrimSpace(key)
value = strings.Trim(strings.TrimSpace(value), `"'`)
if key == "" {
continue
}
if _, exists := os.LookupEnv(key); exists {
continue
}
_ = os.Setenv(key, value)
}
}
func runTencentSubscriptionImport(cfg importTencentSubscriptionConfig, db *sql.DB, out io.Writer) error {
raw, err := fetchTencentCatalogContent(fetchTencentCatalogConfig{
URL: cfg.URL,
DryRun: cfg.DryRun,
Timeout: cfg.Timeout,
Fixture: cfg.Fixture,
}, &http.Client{Timeout: cfg.Timeout})
if err != nil {
return err
}
catalog, err := parseTencentCatalog(raw)
if err != nil {
return err
}
plans := buildSubscriptionPlans(catalog, cfg.URL)
if cfg.DryRun {
_, err = fmt.Fprintf(
out,
"source=tencent-subscription-import updated_at=%s plans=%d provider=%s operator=%s dry_run=true\n",
catalog.UpdatedAt,
len(plans),
plans[0].ProviderName,
plans[0].OperatorName,
)
return err
}
if db == nil {
return fmt.Errorf("db is required when dry-run=false")
}
if err := upsertSubscriptionPlans(db, plans); err != nil {
return err
}
var tableRows int
if err := db.QueryRow(`SELECT COUNT(*) FROM subscription_plan`).Scan(&tableRows); err != nil {
return fmt.Errorf("count subscription_plan: %w", err)
}
summary := fmt.Sprintf(
"source=tencent-subscription-import updated_at=%s plans=%d provider=%s operator=%s table_rows=%d dry_run=false\n",
catalog.UpdatedAt,
len(plans),
plans[0].ProviderName,
plans[0].OperatorName,
tableRows,
)
if _, err := io.WriteString(out, summary); err != nil {
return err
}
if err := writeTencentImportSummary(summary); err != nil {
return err
}
return nil
}
func buildSubscriptionPlans(catalog tencentCatalog, sourceURL string) []subscriptionPlanRow {
modelsBySeries := make(map[string][]tencentModel)
for _, model := range catalog.Models {
modelsBySeries[model.Series] = append(modelsBySeries[model.Series], model)
}
plans := make([]subscriptionPlanRow, 0, len(catalog.Plans))
for _, plan := range catalog.Plans {
models := modelsBySeries[plan.Series]
plans = append(plans, subscriptionPlanRow{
ProviderName: "Tencent",
ProviderCN: "腾讯",
ProviderCountry: "CN",
OperatorName: "Tencent Cloud",
OperatorCN: "腾讯云",
OperatorCountry: "CN",
OperatorType: "cloud",
PlanFamily: inferPlanFamily(plan.Series),
PlanCode: slugifyPlanCode(plan.Series, plan.Tier),
PlanName: fmt.Sprintf("%s %s", plan.Series, plan.Tier),
Tier: plan.Tier,
BillingCycle: normalizeBillingCycle(plan.BillingCycle),
Currency: "CNY",
ListPrice: parsePlanPrice(plan.Price),
PriceUnit: "CNY/month",
QuotaValue: parseQuotaValue(plan.Quota),
QuotaUnit: "tokens/month",
ContextWindow: maxContextWindow(models),
PlanScope: plan.Series,
ModelScope: encodeModelScope(models),
SourceURL: sourceURL,
PublishedAt: catalog.UpdatedAt,
EffectiveDate: extractEffectiveDate(catalog.UpdatedAt),
Notes: strings.TrimSpace(plan.Scene),
})
}
return plans
}
func inferPlanFamily(series string) string {
lower := strings.ToLower(series)
if strings.Contains(lower, "coding plan") {
return "coding_plan"
}
return "token_plan"
}
func slugifyPlanCode(series string, tier string) string {
seriesCode := strings.TrimSpace(series)
switch seriesCode {
case "通用 Token Plan":
seriesCode = "token-plan"
case "Hy Token Plan":
seriesCode = "hy-token-plan"
}
raw := strings.ToLower(strings.TrimSpace(seriesCode + "-" + tier))
replacer := strings.NewReplacer(" ", "-", "/", "-", "_", "-", ".", "-", "", "", "", "", "(", "", ")", "", ":", "-", "--", "-")
raw = replacer.Replace(raw)
raw = strings.Trim(raw, "-")
return raw
}
func normalizeBillingCycle(raw string) string {
if strings.Contains(raw, "月") {
return "monthly"
}
return strings.TrimSpace(raw)
}
func parsePlanPrice(raw string) float64 {
value := strings.TrimSpace(strings.TrimSuffix(raw, "元/月"))
f, _ := strconv.ParseFloat(value, 64)
return f
}
func parseQuotaValue(raw string) int64 {
quotaPattern := regexp.MustCompile(`([\d.]+)\s*([万亿]?)\s*Tokens`)
matches := quotaPattern.FindStringSubmatch(raw)
if len(matches) != 3 {
return 0
}
base, _ := strconv.ParseFloat(matches[1], 64)
switch matches[2] {
case "万":
base *= 10000
case "亿":
base *= 100000000
}
return int64(base)
}
func maxContextWindow(models []tencentModel) int {
max := 0
for _, model := range models {
if model.ContextLength > max {
max = model.ContextLength
}
}
return max
}
func encodeModelScope(models []tencentModel) string {
ids := make([]string, 0, len(models))
for _, model := range models {
ids = append(ids, model.ModelID)
}
data, _ := json.Marshal(ids)
return string(data)
}
func extractEffectiveDate(updatedAt string) string {
if len(updatedAt) >= len("2006-01-02") {
return updatedAt[:10]
}
return time.Now().Format("2006-01-02")
}
func upsertSubscriptionPlans(db *sql.DB, plans []subscriptionPlanRow) error {
providerID, err := ensureModelProvider(db, plans[0])
if err != nil {
return err
}
operatorID, err := ensureOperator(db, plans[0])
if err != nil {
return err
}
for _, plan := range plans {
publishedAt, err := time.Parse("2006-01-02 15:04:05", plan.PublishedAt)
if err != nil {
return fmt.Errorf("parse published_at for %s: %w", plan.PlanCode, err)
}
effectiveDate, err := time.Parse("2006-01-02", plan.EffectiveDate)
if err != nil {
return fmt.Errorf("parse effective_date for %s: %w", plan.PlanCode, err)
}
_, err = db.Exec(
`INSERT INTO subscription_plan (
provider_id, operator_id, plan_family, plan_code, plan_name, tier,
billing_cycle, currency, list_price, price_unit, quota_value, quota_unit,
context_window, plan_scope, model_scope, source_url, published_at, effective_date, notes
) VALUES (
$1, $2, $3, $4, $5, $6,
$7, $8, $9, $10, $11, $12,
$13, $14, $15, $16, $17, $18, $19
)
ON CONFLICT (provider_id, plan_code, effective_date)
DO UPDATE SET
operator_id = EXCLUDED.operator_id,
plan_family = EXCLUDED.plan_family,
plan_name = EXCLUDED.plan_name,
tier = EXCLUDED.tier,
billing_cycle = EXCLUDED.billing_cycle,
currency = EXCLUDED.currency,
list_price = EXCLUDED.list_price,
price_unit = EXCLUDED.price_unit,
quota_value = EXCLUDED.quota_value,
quota_unit = EXCLUDED.quota_unit,
context_window = EXCLUDED.context_window,
plan_scope = EXCLUDED.plan_scope,
model_scope = EXCLUDED.model_scope,
source_url = EXCLUDED.source_url,
published_at = EXCLUDED.published_at,
notes = EXCLUDED.notes,
updated_at = CURRENT_TIMESTAMP`,
providerID, operatorID, plan.PlanFamily, plan.PlanCode, plan.PlanName, plan.Tier,
plan.BillingCycle, plan.Currency, plan.ListPrice, plan.PriceUnit, plan.QuotaValue, plan.QuotaUnit,
nullIfZero(plan.ContextWindow), plan.PlanScope, plan.ModelScope, plan.SourceURL, publishedAt, effectiveDate, plan.Notes,
)
if err != nil {
return fmt.Errorf("upsert subscription_plan %s: %w", plan.PlanCode, err)
}
}
return nil
}
func ensureModelProvider(db *sql.DB, plan subscriptionPlanRow) (int64, error) {
var providerID int64
err := db.QueryRow(`SELECT id FROM model_provider WHERE name = $1`, plan.ProviderName).Scan(&providerID)
if err == nil {
return providerID, nil
}
if err != sql.ErrNoRows {
return 0, err
}
err = db.QueryRow(
`INSERT INTO model_provider (name, name_cn, country, website, status)
VALUES ($1, $2, $3, $4, 'active')
RETURNING id`,
plan.ProviderName, plan.ProviderCN, plan.ProviderCountry, "https://cloud.tencent.com",
).Scan(&providerID)
return providerID, err
}
func ensureOperator(db *sql.DB, plan subscriptionPlanRow) (int64, error) {
var operatorID int64
err := db.QueryRow(`SELECT id FROM operator WHERE name = $1`, plan.OperatorName).Scan(&operatorID)
if err == nil {
return operatorID, nil
}
if err != sql.ErrNoRows {
return 0, err
}
err = db.QueryRow(
`INSERT INTO operator (name, name_cn, country, website, description, status, type)
VALUES ($1, $2, $3, $4, $5, 'active', $6)
RETURNING id`,
plan.OperatorName, plan.OperatorCN, plan.OperatorCountry, "https://cloud.tencent.com",
"Tencent Cloud subscription plans", plan.OperatorType,
).Scan(&operatorID)
return operatorID, err
}
func nullIfZero(value int) any {
if value == 0 {
return nil
}
return value
}
func writeTencentImportSummary(summary string) error {
const summaryPath = "reports/verification/tencent_subscription_import_latest.txt"
if err := os.MkdirAll("reports/verification", 0755); err != nil {
return err
}
return os.WriteFile(summaryPath, []byte(summary), 0644)
}

View File

@@ -0,0 +1,90 @@
//go:build llm_script
package main
import (
"bytes"
"os"
"path/filepath"
"strings"
"testing"
)
func TestBuildSubscriptionPlansFromCatalog(t *testing.T) {
raw, err := os.ReadFile(filepath.Join("testdata", "tencent_token_plan_sample.txt"))
if err != nil {
t.Fatalf("读取样例失败: %v", err)
}
catalog, err := parseTencentCatalog(string(raw))
if err != nil {
t.Fatalf("parseTencentCatalog 失败: %v", err)
}
plans := buildSubscriptionPlans(catalog, defaultTencentCatalogURL)
if len(plans) != 8 {
t.Fatalf("期望 8 条套餐记录,实际 %d", len(plans))
}
first := plans[0]
if first.ProviderName != "Tencent" {
t.Fatalf("provider 错误: %q", first.ProviderName)
}
if first.OperatorName != "Tencent Cloud" {
t.Fatalf("operator 错误: %q", first.OperatorName)
}
if first.PlanFamily != "token_plan" {
t.Fatalf("plan family 错误: %q", first.PlanFamily)
}
if first.PlanCode != "token-plan-lite" {
t.Fatalf("plan code 错误: %q", first.PlanCode)
}
if first.ListPrice != 39 {
t.Fatalf("list price 错误: %v", first.ListPrice)
}
if first.QuotaValue != 35000000 {
t.Fatalf("quota value 错误: %d", first.QuotaValue)
}
if !strings.Contains(first.ModelScope, "\"glm-5\"") {
t.Fatalf("model_scope 缺少 glm-5: %q", first.ModelScope)
}
if first.PublishedAt != "2026-04-27 17:18:02" {
t.Fatalf("published_at 错误: %q", first.PublishedAt)
}
last := plans[len(plans)-1]
if last.PlanFamily != "token_plan" {
t.Fatalf("Hy Token Plan family 错误: %q", last.PlanFamily)
}
if last.PlanCode != "hy-token-plan-max" {
t.Fatalf("Hy Token Plan code 错误: %q", last.PlanCode)
}
if last.ContextWindow != 262144 {
t.Fatalf("Hy Token Plan context 错误: %d", last.ContextWindow)
}
}
func TestRunTencentSubscriptionImportDryRunPrintsSummary(t *testing.T) {
var out bytes.Buffer
err := runTencentSubscriptionImport(importTencentSubscriptionConfig{
Fixture: filepath.Join("testdata", "tencent_token_plan_sample.txt"),
DryRun: true,
URL: defaultTencentCatalogURL,
}, nil, &out)
if err != nil {
t.Fatalf("runTencentSubscriptionImport 失败: %v", err)
}
output := out.String()
for _, want := range []string{
"source=tencent-subscription-import",
"plans=8",
"provider=Tencent",
"operator=Tencent Cloud",
"dry_run=true",
} {
if !strings.Contains(output, want) {
t.Fatalf("输出缺少 %q实际: %q", want, output)
}
}
}

View File

@@ -0,0 +1,192 @@
//go:build llm_script
package main
import (
"database/sql"
"log"
"os"
_ "github.com/lib/pq"
)
type ModelPricing struct {
ModelID string
ModelName string
ProviderName string
ProviderCountry string
OperatorName string
OperatorType string
Region string
Currency string
InputPrice float64
OutputPrice float64
ContextLength int
IsFree bool
SourceURL string
Modality string
SceneTags []string
}
func main() {
dsn := os.Getenv("DATABASE_URL")
if dsn == "" {
dsn = "postgres://long@/llm_intelligence?host=/var/run/postgresql"
}
db, err := sql.Open("postgres", dsn)
if err != nil {
log.Fatal(err)
}
defer db.Close()
// 智谱AI定价数据从第一次无头浏览器抓取中提取
prices := []ModelPricing{
// GLM-5.1系列
{ModelID: "glm-5.1", ModelName: "GLM-5.1", ProviderName: "Zhipu AI", ProviderCountry: "CN", OperatorName: "Zhipu", OperatorType: "official", Region: "CN", Currency: "CNY", InputPrice: 6.0, OutputPrice: 24.0, ContextLength: 32000, SourceURL: "https://open.bigmodel.cn/pricing", Modality: "text", SceneTags: []string{"对话", "推理", "代码"}},
{ModelID: "glm-5.1-32k", ModelName: "GLM-5.1 (32K+)", ProviderName: "Zhipu AI", ProviderCountry: "CN", OperatorName: "Zhipu", OperatorType: "official", Region: "CN", Currency: "CNY", InputPrice: 8.0, OutputPrice: 28.0, ContextLength: 200000, SourceURL: "https://open.bigmodel.cn/pricing", Modality: "text", SceneTags: []string{"对话", "推理", "代码"}},
// GLM-5-Turbo
{ModelID: "glm-5-turbo", ModelName: "GLM-5-Turbo", ProviderName: "Zhipu AI", ProviderCountry: "CN", OperatorName: "Zhipu", OperatorType: "official", Region: "CN", Currency: "CNY", InputPrice: 5.0, OutputPrice: 22.0, ContextLength: 32000, SourceURL: "https://open.bigmodel.cn/pricing", Modality: "text", SceneTags: []string{"对话", "推理"}},
{ModelID: "glm-5-turbo-32k", ModelName: "GLM-5-Turbo (32K+)", ProviderName: "Zhipu AI", ProviderCountry: "CN", OperatorName: "Zhipu", OperatorType: "official", Region: "CN", Currency: "CNY", InputPrice: 7.0, OutputPrice: 26.0, ContextLength: 200000, SourceURL: "https://open.bigmodel.cn/pricing", Modality: "text", SceneTags: []string{"对话", "推理"}},
// GLM-5
{ModelID: "glm-5", ModelName: "GLM-5", ProviderName: "Zhipu AI", ProviderCountry: "CN", OperatorName: "Zhipu", OperatorType: "official", Region: "CN", Currency: "CNY", InputPrice: 4.0, OutputPrice: 18.0, ContextLength: 32000, SourceURL: "https://open.bigmodel.cn/pricing", Modality: "text", SceneTags: []string{"对话", "推理"}},
{ModelID: "glm-5-32k", ModelName: "GLM-5 (32K+)", ProviderName: "Zhipu AI", ProviderCountry: "CN", OperatorName: "Zhipu", OperatorType: "official", Region: "CN", Currency: "CNY", InputPrice: 6.0, OutputPrice: 22.0, ContextLength: 200000, SourceURL: "https://open.bigmodel.cn/pricing", Modality: "text", SceneTags: []string{"对话", "推理"}},
// GLM-4.7
{ModelID: "glm-4.7", ModelName: "GLM-4.7", ProviderName: "Zhipu AI", ProviderCountry: "CN", OperatorName: "Zhipu", OperatorType: "official", Region: "CN", Currency: "CNY", InputPrice: 2.0, OutputPrice: 8.0, ContextLength: 32000, SourceURL: "https://open.bigmodel.cn/pricing", Modality: "text", SceneTags: []string{"对话", "推理", "代码"}},
{ModelID: "glm-4.7-32k", ModelName: "GLM-4.7 (32K+)", ProviderName: "Zhipu AI", ProviderCountry: "CN", OperatorName: "Zhipu", OperatorType: "official", Region: "CN", Currency: "CNY", InputPrice: 4.0, OutputPrice: 16.0, ContextLength: 200000, SourceURL: "https://open.bigmodel.cn/pricing", Modality: "text", SceneTags: []string{"对话", "推理", "代码"}},
// GLM-4.5-Air
{ModelID: "glm-4.5-air", ModelName: "GLM-4.5-Air", ProviderName: "Zhipu AI", ProviderCountry: "CN", OperatorName: "Zhipu", OperatorType: "official", Region: "CN", Currency: "CNY", InputPrice: 0.8, OutputPrice: 2.0, ContextLength: 32000, SourceURL: "https://open.bigmodel.cn/pricing", Modality: "text", SceneTags: []string{"对话"}},
{ModelID: "glm-4.5-air-32k", ModelName: "GLM-4.5-Air (32K+)", ProviderName: "Zhipu AI", ProviderCountry: "CN", OperatorName: "Zhipu", OperatorType: "official", Region: "CN", Currency: "CNY", InputPrice: 1.2, OutputPrice: 8.0, ContextLength: 128000, SourceURL: "https://open.bigmodel.cn/pricing", Modality: "text", SceneTags: []string{"对话"}},
// GLM-4.7-FlashX
{ModelID: "glm-4.7-flashx", ModelName: "GLM-4.7-FlashX", ProviderName: "Zhipu AI", ProviderCountry: "CN", OperatorName: "Zhipu", OperatorType: "official", Region: "CN", Currency: "CNY", InputPrice: 0.5, OutputPrice: 3.0, ContextLength: 200000, SourceURL: "https://open.bigmodel.cn/pricing", Modality: "text", SceneTags: []string{"对话"}},
// GLM-4.7-Flash (Free)
{ModelID: "glm-4.7-flash", ModelName: "GLM-4.7-Flash", ProviderName: "Zhipu AI", ProviderCountry: "CN", OperatorName: "Zhipu", OperatorType: "official", Region: "CN", Currency: "CNY", InputPrice: 0, OutputPrice: 0, ContextLength: 200000, IsFree: true, SourceURL: "https://open.bigmodel.cn/pricing", Modality: "text", SceneTags: []string{"对话"}},
// GLM-4.6V (Vision)
{ModelID: "glm-4.6v", ModelName: "GLM-4.6V", ProviderName: "Zhipu AI", ProviderCountry: "CN", OperatorName: "Zhipu", OperatorType: "official", Region: "CN", Currency: "CNY", InputPrice: 2.0, OutputPrice: 6.0, ContextLength: 8000, SourceURL: "https://open.bigmodel.cn/pricing", Modality: "multimodal", SceneTags: []string{"视觉", "对话"}},
// GLM-4.6V-FlashX
{ModelID: "glm-4.6v-flashx", ModelName: "GLM-4.6V-FlashX", ProviderName: "Zhipu AI", ProviderCountry: "CN", OperatorName: "Zhipu", OperatorType: "official", Region: "CN", Currency: "CNY", InputPrice: 0.15, OutputPrice: 1.5, ContextLength: 8000, SourceURL: "https://open.bigmodel.cn/pricing", Modality: "multimodal", SceneTags: []string{"视觉", "对话"}},
// GLM-4.5V
{ModelID: "glm-4.5v", ModelName: "GLM-4.5V", ProviderName: "Zhipu AI", ProviderCountry: "CN", OperatorName: "Zhipu", OperatorType: "official", Region: "CN", Currency: "CNY", InputPrice: 2.0, OutputPrice: 6.0, ContextLength: 32000, SourceURL: "https://open.bigmodel.cn/pricing", Modality: "multimodal", SceneTags: []string{"视觉", "对话"}},
// GLM-4系列 (Legacy)
{ModelID: "glm-4-0520", ModelName: "GLM-4-0520", ProviderName: "Zhipu AI", ProviderCountry: "CN", OperatorName: "Zhipu", OperatorType: "official", Region: "CN", Currency: "CNY", InputPrice: 100.0, OutputPrice: 50.0, ContextLength: 128000, SourceURL: "https://open.bigmodel.cn/pricing", Modality: "text", SceneTags: []string{"对话", "推理"}},
{ModelID: "glm-4-air", ModelName: "GLM-4-Air", ProviderName: "Zhipu AI", ProviderCountry: "CN", OperatorName: "Zhipu", OperatorType: "official", Region: "CN", Currency: "CNY", InputPrice: 0.5, OutputPrice: 0.25, ContextLength: 128000, SourceURL: "https://open.bigmodel.cn/pricing", Modality: "text", SceneTags: []string{"对话"}},
{ModelID: "glm-4-airx", ModelName: "GLM-4-AirX", ProviderName: "Zhipu AI", ProviderCountry: "CN", OperatorName: "Zhipu", OperatorType: "official", Region: "CN", Currency: "CNY", InputPrice: 10.0, OutputPrice: 10.0, ContextLength: 8000, SourceURL: "https://open.bigmodel.cn/pricing", Modality: "text", SceneTags: []string{"对话", "极速"}},
{ModelID: "glm-4-long", ModelName: "GLM-4-Long", ProviderName: "Zhipu AI", ProviderCountry: "CN", OperatorName: "Zhipu", OperatorType: "official", Region: "CN", Currency: "CNY", InputPrice: 1.0, OutputPrice: 0.5, ContextLength: 1000000, SourceURL: "https://open.bigmodel.cn/pricing", Modality: "text", SceneTags: []string{"对话", "长文本"}},
// GLM-4V (Vision Legacy)
{ModelID: "glm-4v-plus", ModelName: "GLM-4V-Plus", ProviderName: "Zhipu AI", ProviderCountry: "CN", OperatorName: "Zhipu", OperatorType: "official", Region: "CN", Currency: "CNY", InputPrice: 4.0, OutputPrice: 4.0, ContextLength: 8000, SourceURL: "https://open.bigmodel.cn/pricing", Modality: "multimodal", SceneTags: []string{"视觉", "对话"}},
{ModelID: "glm-4v", ModelName: "GLM-4V", ProviderName: "Zhipu AI", ProviderCountry: "CN", OperatorName: "Zhipu", OperatorType: "official", Region: "CN", Currency: "CNY", InputPrice: 50.0, OutputPrice: 50.0, ContextLength: 2000, SourceURL: "https://open.bigmodel.cn/pricing", Modality: "multimodal", SceneTags: []string{"视觉", "对话"}},
// ChatGLM3
{ModelID: "chatglm3-6b", ModelName: "ChatGLM3-6B", ProviderName: "Zhipu AI", ProviderCountry: "CN", OperatorName: "Zhipu", OperatorType: "official", Region: "CN", Currency: "CNY", InputPrice: 0, OutputPrice: 0, ContextLength: 8000, IsFree: true, SourceURL: "https://open.bigmodel.cn/pricing", Modality: "text", SceneTags: []string{"对话"}},
// GLM-4-9B
{ModelID: "glm-4-9b", ModelName: "GLM-4-9B", ProviderName: "Zhipu AI", ProviderCountry: "CN", OperatorName: "Zhipu", OperatorType: "official", Region: "CN", Currency: "CNY", InputPrice: 0, OutputPrice: 0, ContextLength: 8000, IsFree: true, SourceURL: "https://open.bigmodel.cn/pricing", Modality: "text", SceneTags: []string{"对话"}},
// GLM-Realtime
{ModelID: "glm-realtime-flash", ModelName: "GLM-Realtime-Flash", ProviderName: "Zhipu AI", ProviderCountry: "CN", OperatorName: "Zhipu", OperatorType: "official", Region: "CN", Currency: "CNY", InputPrice: 0.18, OutputPrice: 0.18, ContextLength: 8000, SourceURL: "https://open.bigmodel.cn/pricing", Modality: "audio", SceneTags: []string{"实时", "音视频"}},
{ModelID: "glm-realtime-air", ModelName: "GLM-Realtime-Air", ProviderName: "Zhipu AI", ProviderCountry: "CN", OperatorName: "Zhipu", OperatorType: "official", Region: "CN", Currency: "CNY", InputPrice: 0.3, OutputPrice: 0.3, ContextLength: 8000, SourceURL: "https://open.bigmodel.cn/pricing", Modality: "audio", SceneTags: []string{"实时", "音视频"}},
// GLM-TTS
{ModelID: "glm-tts", ModelName: "GLM-TTS", ProviderName: "Zhipu AI", ProviderCountry: "CN", OperatorName: "Zhipu", OperatorType: "official", Region: "CN", Currency: "CNY", InputPrice: 2.0, OutputPrice: 0, ContextLength: 8000, SourceURL: "https://open.bigmodel.cn/pricing", Modality: "audio", SceneTags: []string{"语音合成"}},
{ModelID: "glm-tts-clone", ModelName: "GLM-TTS-Clone", ProviderName: "Zhipu AI", ProviderCountry: "CN", OperatorName: "Zhipu", OperatorType: "official", Region: "CN", Currency: "CNY", InputPrice: 6.0, OutputPrice: 0, ContextLength: 8000, SourceURL: "https://open.bigmodel.cn/pricing", Modality: "audio", SceneTags: []string{"音色克隆"}},
// GLM-ASR
{ModelID: "glm-asr-2512", ModelName: "GLM-ASR-2512", ProviderName: "Zhipu AI", ProviderCountry: "CN", OperatorName: "Zhipu", OperatorType: "official", Region: "CN", Currency: "CNY", InputPrice: 16.0, OutputPrice: 0, ContextLength: 8000, SourceURL: "https://open.bigmodel.cn/pricing", Modality: "audio", SceneTags: []string{"语音识别"}},
// GLM-4-Voice
{ModelID: "glm-4-voice", ModelName: "GLM-4-Voice", ProviderName: "Zhipu AI", ProviderCountry: "CN", OperatorName: "Zhipu", OperatorType: "official", Region: "CN", Currency: "CNY", InputPrice: 80.0, OutputPrice: 80.0, ContextLength: 8000, SourceURL: "https://open.bigmodel.cn/pricing", Modality: "audio", SceneTags: []string{"语音模型"}},
}
batchID := "manual-seed"
log.Printf("Importing %d Zhipu AI models...", len(prices))
// Save to database
for _, p := range prices {
// Find or create provider
var providerID int64
err := db.QueryRow("SELECT id FROM model_provider WHERE name = $1", p.ProviderName).Scan(&providerID)
if err == sql.ErrNoRows {
err = db.QueryRow(
"INSERT INTO model_provider (name, country, website, status) VALUES ($1, $2, $3, 'active') RETURNING id",
p.ProviderName, p.ProviderCountry, "",
).Scan(&providerID)
}
if err != nil {
log.Printf("Provider error: %v", err)
continue
}
// Find or create operator
var operatorID int64
err = db.QueryRow("SELECT id FROM operator WHERE name = $1", p.OperatorName).Scan(&operatorID)
if err == sql.ErrNoRows {
err = db.QueryRow(
"INSERT INTO operator (name, country, status) VALUES ($1, $2, 'active') RETURNING id",
p.OperatorName, p.ProviderCountry,
).Scan(&operatorID)
}
if err != nil {
log.Printf("Operator error: %v", err)
continue
}
// Find or create model
var modelID int64
err = db.QueryRow("SELECT id FROM models WHERE external_id = $1", p.ModelID).Scan(&modelID)
if err == sql.ErrNoRows {
err = db.QueryRow(
`INSERT INTO models (external_id, name, provider_id, modality, context_length, status, source, batch_id)
VALUES ($1, $2, $3, $4, $5, 'active', $6, $7) RETURNING id`,
p.ModelID, p.ModelName, providerID, p.Modality, p.ContextLength, p.OperatorName, batchID,
).Scan(&modelID)
}
if err != nil {
log.Printf("Model error for %s: %v", p.ModelID, err)
continue
}
// Insert pricing
sourceType := p.OperatorType
freeQuota := ""
freeLimitations := "[]"
rateLimit := "{}"
if p.IsFree {
sourceType = "free_tier"
freeQuota = "Imported free-tier pricing entry"
freeLimitations = `["See source_url for current quota and policy"]`
}
_, err = db.Exec(
`INSERT INTO region_pricing
(model_id, operator_id, region, currency, input_price_per_mtok, output_price_per_mtok, is_free, effective_date, source_url, source_type, free_quota, free_limitations, rate_limit)
VALUES ($1, $2, $3, $4, $5, $6, $7, CURRENT_DATE, $8, $9, $10, $11, $12)
ON CONFLICT (model_id, operator_id, region, currency, effective_date)
DO UPDATE SET input_price_per_mtok = EXCLUDED.input_price_per_mtok,
output_price_per_mtok = EXCLUDED.output_price_per_mtok,
is_free = EXCLUDED.is_free,
source_type = EXCLUDED.source_type,
free_quota = EXCLUDED.free_quota,
free_limitations = EXCLUDED.free_limitations,
rate_limit = EXCLUDED.rate_limit,
updated_at = CURRENT_TIMESTAMP`,
modelID, operatorID, p.Region, p.Currency, p.InputPrice, p.OutputPrice, p.IsFree, p.SourceURL,
sourceType, freeQuota, freeLimitations, rateLimit,
)
if err != nil {
log.Printf("Pricing error for %s: %v", p.ModelID, err)
continue
}
}
log.Printf("Successfully imported %d Zhipu AI models", len(prices))
}

38
scripts/restore.sh Executable file
View File

@@ -0,0 +1,38 @@
#!/usr/bin/env bash
set -euo pipefail
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
cd "$ROOT_DIR"
if [[ "${1:-}" != "--force" || -z "${2:-}" ]]; then
echo "用法: bash scripts/restore.sh --force <backup.sql|backup.sql.gz>" >&2
exit 1
fi
BACKUP_FILE="$2"
if [[ ! -f "$BACKUP_FILE" ]]; then
echo "备份文件不存在: $BACKUP_FILE" >&2
exit 1
fi
if [[ -f ".env.local" ]]; then
# shellcheck disable=SC1091
source ".env.local"
fi
if [[ -f ".env" ]]; then
# shellcheck disable=SC1091
source ".env"
fi
DB_URL="${DATABASE_URL:-host=/var/run/postgresql dbname=llm_intelligence user=long sslmode=disable}"
echo "开始恢复到目标数据库..."
psql "$DB_URL" -v ON_ERROR_STOP=1 -c "DROP SCHEMA IF EXISTS public CASCADE; CREATE SCHEMA public;"
if [[ "$BACKUP_FILE" == *.gz ]]; then
gzip -dc "$BACKUP_FILE" | psql "$DB_URL" -v ON_ERROR_STOP=1
else
psql "$DB_URL" -v ON_ERROR_STOP=1 -f "$BACKUP_FILE"
fi
echo "恢复完成: $BACKUP_FILE"

View File

@@ -0,0 +1,77 @@
# OpenClaw Multi Review Prompt
目标:对 `llm-intelligence` 项目执行一次高频真实状态 review并顺手沉淀 OpenClaw 能力优化项。
执行要求:
1. 只基于当前仓库真实状态做判断。
2. 如果本轮要写 `TASKS.md``GOALS.md`,必须先执行预检守卫:
- `bash scripts/review/preflight_task_write_guard.sh llm-intelligence-review /home/long/project/llm-intelligence/TASKS.md`
- `bash scripts/review/preflight_task_write_guard.sh llm-intelligence-review /home/long/project/llm-intelligence/GOALS.md`
- 守卫失败时,立即停止写回,不得继续尝试 `edit``write`
3. 必须先检查:
- `git status --short`
- 最近提交记录
- `TASKS.md``GOALS.md``OPENCLAW_EXECUTION.md``reports/`
- 当前可执行的验证入口(例如 `Makefile`、脚本、前后端命令)
4. 选择最合适的非破坏性验证命令执行;不要伪造“已验证”。
5. 如果某项能力缺失,明确写成 gap不要包装成“基本完成”。
6. 这个 review 任务默认不改业务代码;重点是判断真实进展、识别缺口、更新 OpenClaw 优化 backlog。
7. 默认**不要更新任何 TASKS/GOALS 状态**。review 是审查,不是任务回收。
8. 如果用户明确要求在 review 中同步任务状态:
- 只能写 `/home/long/project/llm-intelligence/TASKS.md`
- 禁止写 `~/.openclaw/workspace/TASKS.md``~/.openclaw/workspace/GOALS.md`
- 写回前必须先跑一次对应目标文件的预检守卫
- 必须先重新读取最新文件,再决定是否 `write`
输出文件:
1. 单次 review 报告:
- 路径:`reports/openclaw/YYYY-MM-DD-HHMM-review.md`
- 模板:`reports/openclaw/REVIEW_TEMPLATE.md`
2. OpenClaw 能力优化 backlog
- 路径:`reports/openclaw/OPENCLAW_CAPABILITY_BACKLOG.md`
- 追加或更新发现的问题与建议
落盘规则:
1. 写输出文件前,先 `read` 现有文件内容。
2. 生成输出文件时,统一使用 `write` 工具整文件重写。
3. 不要使用 `edit` 工具追加、替换或局部修改文件。
4. 如果需要更新 `OPENCLAW_CAPABILITY_BACKLOG.md`,先读完整文件,再把旧内容与本次新增内容合并后一次性 `write` 回去。
5. 如果工具返回错误,不要原样重试同一个 `edit`;改为重新读取文件并使用 `write` 全量覆盖。
6. 对任何共享文档,禁止连续使用同一份 stale `oldText` 重试 `edit`
7. 如果仓库状态与上一次 review 相比没有 delta不要机械重复整份完成项清单要显式写出“无 delta”并把重点转向风险老化、未提交变更、未验证项。
`YYYY-MM-DD-HHMM-review.md` 必须与项目 daily memory 使用完全一致的字段命名:
- 允许保留标题与 metadata block
- 除标题与 metadata block 外,顶层 section 只允许:
- `## Context`
- `## Evidence`
- `## Outcome`
- `## Next`
- 不要再使用 `Executive Summary``当前真实阶段判断``已完成项``未完成项` 作为顶层 section 标题
- 推荐字段映射:
- `Context`review ID、trigger、scope、时间窗口、当前真实阶段判断、本轮背景
- `Evidence`:验证命令与结果、已完成项、未完成项、伪进展/文档与实现不一致项、关键 gap 及其证据
- `Outcome`:执行摘要、风险判断、阶段结论、本轮最重要的落地结论
- `Next`:下一轮最值得推进的 3 件事、明确 owner 或建议动作
- `Evidence``Next` 下允许继续使用二级小节或表格,但字段名必须保持上述四段式
- `Evidence` 段中的每条关键结论,必须尽量标明证据等级:
- `runtime-verified`
- `artifact-present`
- `doc-claimed`
- 如果只有 `doc-claimed`,必须直接指出“未做真实验证”,不能包装成完成
- 新报告默认参考 `reports/openclaw/REVIEW_TEMPLATE.md` 生成,避免自由发挥
`OPENCLAW_CAPABILITY_BACKLOG.md` 必须包含:
- 日期时间
- 本次 review 暴露出的 OpenClaw 能力问题
- 问题影响
- 优化建议
- 优先级P0/P1/P2
- 建议验证方法
完成后,在最终回复中只输出简洁摘要,并列出本次生成/更新的文件。

View File

@@ -0,0 +1,47 @@
#!/usr/bin/env bash
set -euo pipefail
PROJECT_ROOT="/home/long/project/llm-intelligence"
GLOBAL_GUARD="/home/long/.openclaw/workspace/scripts/preflight_task_write_guard.sh"
usage() {
cat <<'EOF'
Usage:
preflight_task_write_guard.sh <writer-role> <target-path> [target-path...]
Writer roles:
main-session
llm-intelligence-agent
llm-intelligence-review
llm-intelligence-cron
EOF
}
if [[ $# -lt 2 ]]; then
usage >&2
exit 64
fi
writer_role="$1"
shift
case "$writer_role" in
main-session)
generic_role="main-session"
;;
llm-intelligence-agent)
generic_role="project-agent"
;;
llm-intelligence-review)
generic_role="project-review"
;;
llm-intelligence-cron)
generic_role="project-cron"
;;
*)
printf '%s\n' "preflight: unsupported writer role: $writer_role" >&2
exit 68
;;
esac
exec "$GLOBAL_GUARD" "$generic_role" "$PROJECT_ROOT" "$@"

109
scripts/run_daily.sh Executable file
View File

@@ -0,0 +1,109 @@
#!/bin/bash
# run_daily.sh - 每日数据采集与报告生成流水线
# Sprint 3: 完整调度脚本(采集→质量检查→报告生成→归档→通知)
set -euo pipefail
PROJECT_DIR="/home/long/project/llm-intelligence"
DB_URL="${DATABASE_URL:-host=/var/run/postgresql dbname=llm_intelligence user=long sslmode=disable}"
REPORT_DATE=$(date +%Y-%m-%d)
LOG_FILE="/tmp/llm_hub_daily_${REPORT_DATE}.log"
FEISHU_WEBHOOK="${FEISHU_WEBHOOK:-}"
# 日志函数
log() {
echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" | tee -a "$LOG_FILE"
}
# 错误处理
error_exit() {
log "❌ 错误: $1"
# 降级:复制昨日报告
fallback_report
# 发送告警
if [ -n "$FEISHU_WEBHOOK" ]; then
send_alert "$1"
fi
exit 1
}
# 降级:复制昨日报告
fallback_report() {
local yesterday=$(date -d "yesterday" +%Y-%m-%d)
local yesterday_md="${PROJECT_DIR}/reports/daily/daily_report_${yesterday}.md"
local today_md="${PROJECT_DIR}/reports/daily/daily_report_${REPORT_DATE}.md"
if [ -f "$yesterday_md" ]; then
cp "$yesterday_md" "$today_md"
sed -i "s/${yesterday}/${REPORT_DATE}/g" "$today_md"
sed -i "1s/^/# [数据延迟] /" "$today_md"
log "⚠️ 已复制昨日报告并标记[数据延迟]"
else
log "⚠️ 无昨日报告可供复制"
fi
}
# 发送飞书告警
send_alert() {
local msg="$1"
local payload="{\"msg_type\":\"text\",\"content\":{\"text\":\"🚨 LLM Hub 日报失败\\n日期: ${REPORT_DATE}\\n错误: ${msg}\\n请检查日志: ${LOG_FILE}\"}}"
curl -s -X POST -H "Content-Type: application/json" \
-d "$payload" \
"$FEISHU_WEBHOOK" > /dev/null || true
log "📢 飞书告警已发送"
}
# 主流程
log "🚀 开始每日流水线: ${REPORT_DATE}"
cd "$PROJECT_DIR"
# 1. 数据采集
log "1⃣ 数据采集..."
if ! go run scripts/fetch_openrouter.go >> "$LOG_FILE" 2>&1; then
error_exit "数据采集失败"
fi
log "✅ 数据采集完成"
# 2. 数据质量检查
log "2⃣ 数据质量检查..."
MODEL_COUNT=$(psql "$DB_URL" -t -c "SELECT COUNT(*) FROM models WHERE deleted_at IS NULL" 2>/dev/null | tr -d ' ')
if [ "$MODEL_COUNT" -lt 10 ]; then
error_exit "模型数量不足: ${MODEL_COUNT} < 10"
fi
log "✅ 数据质量检查通过 (模型数: ${MODEL_COUNT})"
# 3. 生成日报
log "3⃣ 生成日报..."
export DATABASE_URL="$DB_URL"
if ! go run scripts/generate_daily_report.go >> "$LOG_FILE" 2>&1; then
error_exit "日报生成失败"
fi
log "✅ 日报生成完成"
# 4. 归档
log "4⃣ 归档报告..."
ARCHIVE_DIR="reports/daily/$(date +%Y/%m)"
mkdir -p "$ARCHIVE_DIR"
cp "reports/daily/daily_report_${REPORT_DATE}.md" "$ARCHIVE_DIR/" 2>/dev/null || true
cp "reports/daily/html/daily_report_${REPORT_DATE}.html" "$ARCHIVE_DIR/" 2>/dev/null || true
log "✅ 归档完成"
# 5. 更新 daily_report 表
log "5⃣ 更新日报记录..."
psql "$DB_URL" -c "
INSERT INTO daily_report (report_date, status, model_count, output_path, created_at, updated_at)
VALUES ('${REPORT_DATE}', 'generated', ${MODEL_COUNT}, 'reports/daily/daily_report_${REPORT_DATE}.md', NOW(), NOW())
ON CONFLICT (report_date) DO UPDATE SET
status = 'generated',
model_count = EXCLUDED.model_count,
output_path = EXCLUDED.output_path,
updated_at = NOW()
" >> "$LOG_FILE" 2>&1
log "✅ 日报记录更新完成"
log "🎉 每日流水线全部完成!"
log "📄 Markdown: reports/daily/daily_report_${REPORT_DATE}.md"
log "🌐 HTML: reports/daily/html/daily_report_${REPORT_DATE}.html"
exit 0

38
scripts/run_real_pipeline.sh Executable file
View File

@@ -0,0 +1,38 @@
#!/usr/bin/env bash
set -euo pipefail
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
cd "$ROOT_DIR"
if [[ -f ".env.local" ]]; then
# shellcheck disable=SC1091
source ".env.local"
fi
if [[ -f ".env" ]]; then
# shellcheck disable=SC1091
source ".env"
fi
if [[ -z "${DATABASE_URL:-}" ]]; then
echo "DATABASE_URL 未设置" >&2
exit 1
fi
if [[ -z "${OPENROUTER_API_KEY:-}" ]]; then
echo "OPENROUTER_API_KEY 未设置,无法执行真实采集" >&2
exit 1
fi
"$ROOT_DIR/scripts/apply_migration.sh"
go run "./scripts/fetch_openrouter.go" \
-api-key "$OPENROUTER_API_KEY" \
-db "$DATABASE_URL" \
-out "$ROOT_DIR/models.json"
go run "./scripts/generate_daily_report.go" \
-json "$ROOT_DIR/models.json" \
-out "$ROOT_DIR/reports/daily"
psql "$DATABASE_URL" -Atqc \
"select 'models', count(*) from models union all select 'model_prices', count(*) from model_prices union all select 'report_runs', count(*) from report_runs order by 1;"

View File

@@ -0,0 +1,331 @@
//go:build llm_script
package main
import (
"fmt"
"html"
"io"
"net/http"
"os"
"regexp"
"sort"
"strings"
"time"
)
const (
defaultTencentCatalogURL = "https://cloud.tencent.com/document/product/1823/130060"
)
var defaultTencentCatalogTimeout = 20 * time.Second
type fetchTencentCatalogConfig struct {
URL string
DryRun bool
Timeout time.Duration
Fixture string
}
type tencentCatalog struct {
UpdatedAt string
Plans []tencentPlan
Models []tencentModel
}
type tencentPlan struct {
Series string
Tier string
Quota string
Price string
BillingCycle string
Scene string
}
type tencentModel struct {
Series string
Name string
ModelID string
ContextLength int
Notes []string
}
func fetchTencentCatalogContent(cfg fetchTencentCatalogConfig, client *http.Client) (string, error) {
if strings.TrimSpace(cfg.Fixture) != "" {
data, err := os.ReadFile(cfg.Fixture)
if err != nil {
return "", err
}
return string(data), nil
}
req, err := http.NewRequest(http.MethodGet, cfg.URL, nil)
if err != nil {
return "", err
}
req.Header.Set("User-Agent", "llm-intelligence/tencent-catalog-fetcher")
resp, err := client.Do(req)
if err != nil {
return "", err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("unexpected status %d", resp.StatusCode)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return "", err
}
return string(body), nil
}
func parseTencentCatalog(raw string) (tencentCatalog, error) {
lines := normalizeTencentCatalogLines(raw)
var catalog tencentCatalog
var currentSeries string
var currentMode string
for i := 0; i < len(lines); i++ {
line := lines[i]
if catalog.UpdatedAt == "" {
if updatedAt := extractUpdatedAt(line); updatedAt != "" {
catalog.UpdatedAt = updatedAt
continue
}
}
if series := extractSeriesHeading(line); series != "" {
currentSeries = series
currentMode = ""
continue
}
switch line {
case "### 套餐详情":
currentMode = "plans"
continue
case "### 可用模型":
currentMode = "models"
continue
}
switch currentMode {
case "plans":
plan, nextIndex, ok := tryParseTencentPlan(lines, i, currentSeries)
if ok {
catalog.Plans = append(catalog.Plans, plan)
i = nextIndex
}
case "models":
model, nextIndex, ok := tryParseTencentModel(lines, i, currentSeries)
if ok {
catalog.Models = append(catalog.Models, model)
i = nextIndex
}
}
}
if catalog.UpdatedAt == "" {
return tencentCatalog{}, fmt.Errorf("catalog updated_at not found")
}
if len(catalog.Plans) == 0 {
return tencentCatalog{}, fmt.Errorf("catalog plans not found")
}
if len(catalog.Models) == 0 {
return tencentCatalog{}, fmt.Errorf("catalog models not found")
}
return catalog, nil
}
func normalizeTencentCatalogLines(raw string) []string {
text := html.UnescapeString(raw)
replacements := []string{"<br>", "<br/>", "<br />", "</p>", "</div>", "</li>", "</tr>", "</td>", "</h1>", "</h2>", "</h3>", "</h4>", "</pre>", "</main>"}
for _, replacement := range replacements {
text = strings.ReplaceAll(text, replacement, "\n")
}
tagPattern := regexp.MustCompile(`<[^>]+>`)
text = tagPattern.ReplaceAllString(text, "")
text = strings.ReplaceAll(text, "\r\n", "\n")
text = strings.ReplaceAll(text, "\r", "\n")
rawLines := strings.Split(text, "\n")
lines := make([]string, 0, len(rawLines))
for _, rawLine := range rawLines {
line := strings.TrimSpace(rawLine)
if line == "" {
continue
}
lines = append(lines, line)
}
return lines
}
func extractUpdatedAt(line string) string {
const prefix = "最近更新时间:"
if strings.HasPrefix(line, prefix) {
return strings.TrimSpace(strings.TrimPrefix(line, prefix))
}
return ""
}
func extractSeriesHeading(line string) string {
if !strings.HasPrefix(line, "## ") {
return ""
}
series := strings.TrimSpace(strings.TrimPrefix(line, "## "))
if strings.Contains(series, "Token Plan") || strings.Contains(series, "Coding Plan") {
return strings.TrimSpace(strings.TrimSuffix(series, "套餐"))
}
return ""
}
func tryParseTencentPlan(lines []string, start int, series string) (tencentPlan, int, bool) {
if start+4 >= len(lines) {
return tencentPlan{}, start, false
}
if !isTencentPlanName(lines[start]) {
return tencentPlan{}, start, false
}
if !isTencentPlanTier(lines[start+1]) {
return tencentPlan{}, start, false
}
if !strings.Contains(lines[start+2], "订阅月") {
return tencentPlan{}, start, false
}
if !strings.Contains(lines[start+3], "Tokens") {
return tencentPlan{}, start, false
}
if !strings.Contains(lines[start+4], "元/月") {
return tencentPlan{}, start, false
}
plan := tencentPlan{
Series: series,
Tier: strings.Trim(lines[start+1], "() "),
BillingCycle: lines[start+2],
Quota: lines[start+3],
Price: lines[start+4],
}
nextIndex := start + 4
if start+5 < len(lines) && !strings.HasPrefix(lines[start+5], "### ") && !isTencentPlanName(lines[start+5]) {
plan.Scene = lines[start+5]
nextIndex = start + 5
}
return plan, nextIndex, true
}
func tryParseTencentModel(lines []string, start int, series string) (tencentModel, int, bool) {
if start+1 >= len(lines) {
return tencentModel{}, start, false
}
if !isTencentModelID(lines[start+1]) {
return tencentModel{}, start, false
}
if isReservedTencentLine(lines[start]) {
return tencentModel{}, start, false
}
model := tencentModel{
Series: series,
Name: lines[start],
ModelID: lines[start+1],
}
notes := make([]string, 0, 4)
nextIndex := start + 1
for i := start + 2; i < len(lines); i++ {
line := lines[i]
if strings.HasPrefix(line, "## ") || strings.HasPrefix(line, "### ") {
break
}
if isTencentPlanName(line) && i+1 < len(lines) && isTencentPlanTier(lines[i+1]) {
break
}
if i+1 < len(lines) && isTencentModelID(lines[i+1]) && !isReservedTencentLine(line) {
break
}
notes = append(notes, line)
nextIndex = i
}
model.Notes = notes
model.ContextLength = extractContextLength(strings.Join(notes, " "))
return model, nextIndex, true
}
func isTencentPlanName(line string) bool {
switch line {
case "体验套餐", "基础套餐", "进阶套餐", "专业套餐":
return true
default:
return false
}
}
func isTencentPlanTier(line string) bool {
return strings.HasPrefix(line, "") && strings.HasSuffix(line, "")
}
func isReservedTencentLine(line string) bool {
if strings.HasPrefix(line, "#") {
return true
}
switch line {
case "Token Plan 个人版套餐概览", "套餐详情", "可用模型":
return true
default:
return false
}
}
func isTencentModelID(line string) bool {
modelIDPattern := regexp.MustCompile(`^[a-z0-9][a-z0-9._-]*$`)
return modelIDPattern.MatchString(line)
}
func extractContextLength(text string) int {
contextPattern := regexp.MustCompile(`(?i)(\d+)\s*([KM])\s*上下文`)
matches := contextPattern.FindStringSubmatch(text)
if len(matches) != 3 {
return 0
}
value := 0
fmt.Sscanf(matches[1], "%d", &value)
switch strings.ToUpper(matches[2]) {
case "K":
return value * 1024
case "M":
return value * 1024 * 1024
default:
return 0
}
}
func formatSeriesSummary(plans []tencentPlan) string {
counts := make(map[string]int)
for _, plan := range plans {
counts[plan.Series]++
}
series := make([]string, 0, len(counts))
for name := range counts {
series = append(series, name)
}
sort.Strings(series)
parts := make([]string, 0, len(series))
for _, name := range series {
parts = append(parts, fmt.Sprintf("%s:%d", name, counts[name]))
}
return strings.Join(parts, ",")
}

View File

@@ -0,0 +1,108 @@
# Token Plan 个人版套餐概览
最近更新时间2026-04-27 17:18:02
## 通用 Token Plan 套餐
### 套餐详情
体验套餐
Lite
每订阅月
3500万 Tokens
39元/月
新手尝鲜,入门首选。
基础套餐
Standard
每订阅月
1亿 Tokens
99元/月
日常使用,高性价比。
进阶套餐
Pro
每订阅月
3.2亿 Tokens
299元/月
高频 AI 开发。
专业套餐
Max
每订阅月
6.5亿 Tokens
599元/月
重度 AI 开发首选。
### 可用模型
Auto
tc-code-latest
智能路由
MiniMax-M2.5
minimax-m2.5
深度思考、文本生成
MiniMax-M2.7
minimax-m2.7
深度思考、文本生成
GLM-5
glm-5
深度思考、文本生成
GLM-5.1
glm-5.1
深度思考、文本生成
kimi-k2.5
kimi-k2.5
深度思考、文本生成
Tencent HY 2.0 Instruct
hunyuan-2.0-instruct
文本生成
Tencent HY 2.0 Think
hunyuan-2.0-thinking
深度思考、文本生成
Hunyuan-T1
hunyuan-t1
文本生成
Hunyuan-TurboS
hunyuan-turbo
文本生成
## Hy Token Plan 套餐
### 套餐详情
体验套餐
Lite
每订阅月
3500万 Tokens
28元/月
新手尝鲜,入门首选。
基础套餐
Standard
每订阅月
1亿 Tokens
78元/月
日常使用,高性价比。
进阶套餐
Pro
每订阅月
3.2亿 Tokens
238元/月
高频 AI 开发。
专业套餐
Max
每订阅月
6.5亿 Tokens
468元/月
重度 AI 开发首选。
### 可用模型
Hy3 preview
hy3-preview
原生支持 256K 上下文。

View File

@@ -3,6 +3,8 @@
// matches expected_evidence, outputs pass/fail report.
//
// Usage: go run scripts/verification_executor.go [--dry-run] [--task T-Q2-1.1]
//go:build llm_script
package main
import (
@@ -21,28 +23,36 @@ import (
)
type Verification struct {
Mode string
Command string
Mode string
Command string
ExpectedEvidence string
TimeoutSeconds int
TimeoutSeconds int
EvidenceGrade string
TaskType string
}
type TaskResult struct {
TaskID string
TaskName string
Verified bool
Command string
ExitCode int
Stdout string
Stderr string
Error string
Reason string
TaskID string
TaskName string
Verified bool
Command string
ExitCode int
Stdout string
Stderr string
StdoutSummary string
StderrSummary string
Error string
Reason string
EvidenceGrade string
TaskType string
}
func main() {
dryRun := flag.Bool("dry-run", false, "print commands without executing")
taskFilter := flag.String("task", "", "filter by task ID (e.g. T-Q2-1.1)")
tasksPathFlag := flag.String("tasks", "", "path to TASKS.md")
statusFilter := flag.String("status", "all", "filter by normalized status: all|completed|in_progress|planned|paused|unknown")
completedOnly := flag.Bool("completed-only", false, "shortcut for --status completed")
flag.Parse()
tasksPath := resolveTasksPath(*tasksPathFlag)
@@ -65,8 +75,18 @@ func main() {
tasks = filtered
}
effectiveStatus := *statusFilter
if *completedOnly {
effectiveStatus = "completed"
}
tasks, err = filterTasksByStatus(tasks, effectiveStatus)
if err != nil {
fmt.Fprintf(os.Stderr, "filter tasks: %v\n", err)
os.Exit(1)
}
fmt.Printf("=== Verification Report (%s) ===\n", time.Now().Format("2006-01-02 15:04"))
fmt.Printf("Tasks checked: %d | Dry-run: %v | TASKS: %s\n\n", len(tasks), *dryRun, tasksPath)
fmt.Printf("Tasks checked: %d | Dry-run: %v | Status: %s | TASKS: %s\n\n", len(tasks), *dryRun, effectiveStatus, tasksPath)
var passed, failed int
var results []TaskResult
@@ -87,17 +107,24 @@ func main() {
icon = "❌"
}
fmt.Printf("%s [%s] %s\n", icon, r.TaskID, r.TaskName)
if r.Command != "" {
fmt.Printf(" cmd: %s\n", r.Command)
}
if r.EvidenceGrade != "" || r.TaskType != "" {
fmt.Printf(" grade: %s | type: %s\n", r.EvidenceGrade, r.TaskType)
}
if r.StderrSummary != "" {
fmt.Printf(" stderr: %s\n", r.StderrSummary)
}
if r.StdoutSummary != "" && (!r.Verified || r.Reason != "" || r.Error != "") {
fmt.Printf(" stdout: %s\n", r.StdoutSummary)
}
if r.Error != "" {
fmt.Printf(" ERROR: %s\n", r.Error)
} else {
if r.Command != "" {
fmt.Printf(" cmd: %s\n", r.Command)
}
if r.ExitCode != 0 && r.Stdout != "" {
fmt.Printf(" output: %s\n", strings.TrimSpace(r.Stdout))
} else if r.Reason != "" {
fmt.Printf(" reason: %s\n", r.Reason)
}
} else if r.ExitCode != 0 && r.Stdout != "" {
fmt.Printf(" output: %s\n", strings.TrimSpace(r.Stdout))
} else if r.Reason != "" {
fmt.Printf(" reason: %s\n", r.Reason)
}
}
@@ -108,28 +135,40 @@ func main() {
}
func resolveTasksPath(flagValue string) string {
envValue := os.Getenv("TASKS_PATH")
wd := ""
if currentWD, err := os.Getwd(); err == nil {
wd = currentWD
}
sourceDir := ""
if _, sourcePath, _, ok := runtime.Caller(0); ok {
sourceDir = filepath.Dir(sourcePath)
}
return resolveTasksPathWithContext(flagValue, envValue, wd, sourceDir, "/home/long/.openclaw/workspace/TASKS.md")
}
func resolveTasksPathWithContext(flagValue, envValue, wd, sourceDir, globalTasksPath string) string {
candidates := []string{}
if flagValue != "" {
candidates = append(candidates, flagValue)
}
if envValue := os.Getenv("TASKS_PATH"); envValue != "" {
if envValue != "" {
candidates = append(candidates, envValue)
}
if wd, err := os.Getwd(); err == nil {
if wd != "" {
candidates = append(candidates,
filepath.Join(wd, "TASKS.md"),
filepath.Join(wd, "..", "TASKS.md"),
)
}
if _, sourcePath, _, ok := runtime.Caller(0); ok {
scriptDir := filepath.Dir(sourcePath)
candidates = append(candidates, filepath.Join(scriptDir, "..", "TASKS.md"))
defaultProjectTasks := ""
if sourceDir != "" {
defaultProjectTasks = filepath.Join(sourceDir, "..", "TASKS.md")
candidates = append(candidates, defaultProjectTasks)
}
candidates = append(candidates, "/home/long/.openclaw/workspace/TASKS.md")
seen := map[string]struct{}{}
for _, candidate := range candidates {
if candidate == "" {
@@ -148,16 +187,26 @@ func resolveTasksPath(flagValue string) string {
if flagValue != "" {
return filepath.Clean(flagValue)
}
if envValue := os.Getenv("TASKS_PATH"); envValue != "" {
if envValue != "" {
return filepath.Clean(envValue)
}
return "/home/long/.openclaw/workspace/TASKS.md"
if defaultProjectTasks != "" {
return filepath.Clean(defaultProjectTasks)
}
if wd != "" {
return filepath.Clean(filepath.Join(wd, "TASKS.md"))
}
if globalTasksPath != "" {
return filepath.Clean(globalTasksPath)
}
return "TASKS.md"
}
type taskEntry struct {
ID string
Name string
Verification Verification
ID string
Name string
Status string
Verification Verification
HasVerification bool
}
@@ -176,7 +225,7 @@ func parseTasks(f *os.File) []taskEntry {
if currentTask != nil {
tasks = append(tasks, *currentTask)
}
currentTask = &taskEntry{ID: m[1], Name: m[2]}
currentTask = &taskEntry{ID: m[1], Name: m[2], Status: normalizeStatusFromText(line)}
inVerification = false
continue
}
@@ -193,6 +242,10 @@ func parseTasks(f *os.File) []taskEntry {
}
if !inVerification {
statusRe := regexp.MustCompile(`^\s*-\s+\*\*状态\*\*(.+)$`)
if m := statusRe.FindStringSubmatch(line); m != nil {
currentTask.Status = normalizeStatusFromText(m[1])
}
continue
}
@@ -216,6 +269,18 @@ func parseTasks(f *os.File) []taskEntry {
continue
}
evidenceGradeRe := regexp.MustCompile(`^\s+- evidence_grade:\s+` + "`" + `([^` + "`" + `]+)` + "`")
if m := evidenceGradeRe.FindStringSubmatch(line); m != nil {
currentTask.Verification.EvidenceGrade = m[1]
continue
}
taskTypeRe := regexp.MustCompile(`^\s+- task_type:\s+` + "`" + `([^` + "`" + `]+)` + "`")
if m := taskTypeRe.FindStringSubmatch(line); m != nil {
currentTask.Verification.TaskType = m[1]
continue
}
timeoutRe := regexp.MustCompile(`^\s+- timeout_seconds:\s+(\d+)`)
if m := timeoutRe.FindStringSubmatch(line); m != nil {
fmt.Sscanf(m[1], "%d", &currentTask.Verification.TimeoutSeconds)
@@ -244,6 +309,18 @@ func verifyTask(t taskEntry, dryRun bool) TaskResult {
return r
}
t.Verification.Mode = strings.TrimSpace(t.Verification.Mode)
t.Verification.TaskType = normalizeTaskType(t.Verification.TaskType)
t.Verification.EvidenceGrade = normalizeEvidenceGrade(t.Verification.Mode, t.Verification.EvidenceGrade)
r.TaskType = t.Verification.TaskType
r.EvidenceGrade = t.Verification.EvidenceGrade
if validationErr := validateVerification(t.Verification); validationErr != "" {
r.Verified = false
r.Reason = validationErr
return r
}
if t.Verification.Command == "" {
r.Reason = "verification.command is empty"
r.Verified = false
@@ -283,6 +360,8 @@ func verifyTask(t taskEntry, dryRun bool) TaskResult {
r.Stdout = stdout.String()
r.Stderr = stderr.String()
r.StdoutSummary = summarizeOutput(r.Stdout)
r.StderrSummary = summarizeOutput(r.Stderr)
if r.ExitCode != 0 && t.Verification.Mode == "test_pass" {
r.Verified = false
@@ -325,3 +404,134 @@ func verifyTask(t taskEntry, dryRun bool) TaskResult {
return r
}
func normalizeEvidenceGrade(mode, explicit string) string {
if explicit = strings.TrimSpace(explicit); explicit != "" {
return explicit
}
switch strings.TrimSpace(mode) {
case "test_pass":
return "runtime-verified"
case "artifact_present":
return "artifact-present"
case "semantic":
return "doc-claimed"
default:
return ""
}
}
func normalizeTaskType(raw string) string {
raw = strings.TrimSpace(raw)
if raw == "" {
return "unspecified"
}
return raw
}
func normalizeStatusFromText(raw string) string {
lower := strings.ToLower(strings.TrimSpace(raw))
switch {
case strings.Contains(raw, "✅") || strings.Contains(raw, "完成"):
return "completed"
case strings.Contains(raw, "🟡") || strings.Contains(raw, "进行中"):
return "in_progress"
case strings.Contains(raw, "🔶") || strings.Contains(raw, "🔴") || strings.Contains(raw, "待启动") || strings.Contains(raw, "未开始"):
return "planned"
case strings.Contains(raw, "⏸️") || strings.Contains(raw, "待规划") || strings.Contains(raw, "暂停"):
return "paused"
case lower == "":
return "unknown"
default:
return "unknown"
}
}
func filterTasksByStatus(tasks []taskEntry, filter string) ([]taskEntry, error) {
filter = strings.TrimSpace(filter)
if filter == "" {
filter = "all"
}
valid := map[string]struct{}{
"all": {},
"completed": {},
"in_progress": {},
"planned": {},
"paused": {},
"unknown": {},
}
if _, ok := valid[filter]; !ok {
return nil, fmt.Errorf("unsupported status filter: %s", filter)
}
if filter == "all" {
return tasks, nil
}
filtered := make([]taskEntry, 0, len(tasks))
for _, t := range tasks {
status := t.Status
if status == "" {
status = "unknown"
}
if status == filter {
filtered = append(filtered, t)
}
}
return filtered, nil
}
func summarizeOutput(raw string) string {
cleaned := strings.TrimSpace(raw)
if cleaned == "" {
return ""
}
cleaned = strings.Join(strings.Fields(cleaned), " ")
const limit = 220
if len(cleaned) <= limit {
return cleaned
}
return cleaned[:limit] + "..."
}
func validateVerification(v Verification) string {
validModes := map[string]struct{}{
"test_pass": {},
"artifact_present": {},
"semantic": {},
}
if _, ok := validModes[v.Mode]; !ok {
return fmt.Sprintf("unsupported verification mode: %s", v.Mode)
}
validGrades := map[string]struct{}{
"runtime-verified": {},
"artifact-present": {},
"doc-claimed": {},
}
if v.EvidenceGrade != "" {
if _, ok := validGrades[v.EvidenceGrade]; !ok {
return fmt.Sprintf("unsupported evidence grade: %s", v.EvidenceGrade)
}
}
validTaskTypes := map[string]struct{}{
"unspecified": {},
"code": {},
"automation": {},
"documentation": {},
"configuration": {},
"data": {},
"analysis": {},
}
if _, ok := validTaskTypes[v.TaskType]; !ok {
return fmt.Sprintf("unsupported task type: %s", v.TaskType)
}
if (v.TaskType == "code" || v.TaskType == "automation") && v.Mode == "semantic" {
return fmt.Sprintf("semantic-only verification is not allowed for %s tasks", v.TaskType)
}
return ""
}

View File

@@ -0,0 +1,268 @@
//go:build llm_script
package main
import (
"os"
"path/filepath"
"strings"
"testing"
)
func TestParseTasksParsesEvidenceFields(t *testing.T) {
md := `# Tasks
### T-1 ✅ Example
- **verification**:
- mode: ` + "`test_pass`" + `
- command: ` + "`echo ok`" + `
- expected_evidence: ` + "`ok`" + `
- evidence_grade: ` + "`runtime-verified`" + `
- task_type: ` + "`code`" + `
- timeout_seconds: 15
`
tmpFile, err := os.CreateTemp(t.TempDir(), "tasks-*.md")
if err != nil {
t.Fatalf("create temp file: %v", err)
}
defer tmpFile.Close()
if _, err := tmpFile.WriteString(md); err != nil {
t.Fatalf("write temp file: %v", err)
}
if _, err := tmpFile.Seek(0, 0); err != nil {
t.Fatalf("seek temp file: %v", err)
}
tasks := parseTasks(tmpFile)
if len(tasks) != 1 {
t.Fatalf("expected 1 task, got %d", len(tasks))
}
got := tasks[0].Verification
if got.Mode != "test_pass" {
t.Fatalf("expected mode test_pass, got %q", got.Mode)
}
if got.Command != "echo ok" {
t.Fatalf("expected command echo ok, got %q", got.Command)
}
if got.ExpectedEvidence != "ok" {
t.Fatalf("expected evidence ok, got %q", got.ExpectedEvidence)
}
if got.EvidenceGrade != "runtime-verified" {
t.Fatalf("expected evidence grade runtime-verified, got %q", got.EvidenceGrade)
}
if got.TaskType != "code" {
t.Fatalf("expected task type code, got %q", got.TaskType)
}
if got.TimeoutSeconds != 15 {
t.Fatalf("expected timeout 15, got %d", got.TimeoutSeconds)
}
}
func TestVerifyTaskRejectsSemanticOnlyForCodeTask(t *testing.T) {
task := taskEntry{
ID: "T-1",
Name: "semantic code task",
Verification: Verification{
Mode: "semantic",
Command: "echo ok",
TaskType: "code",
EvidenceGrade: "doc-claimed",
},
HasVerification: true,
}
result := verifyTask(task, true)
if result.Verified {
t.Fatalf("expected semantic-only code task to fail")
}
if !strings.Contains(result.Reason, "semantic-only") {
t.Fatalf("expected semantic-only rejection reason, got %q", result.Reason)
}
}
func TestVerifyTaskDefaultsEvidenceGradeFromMode(t *testing.T) {
task := taskEntry{
ID: "T-2",
Name: "artifact task",
Verification: Verification{
Mode: "artifact_present",
Command: "echo exists",
ExpectedEvidence: "exists",
},
HasVerification: true,
}
result := verifyTask(task, true)
if !result.Verified {
t.Fatalf("expected dry-run artifact task to pass, got reason %q", result.Reason)
}
if result.EvidenceGrade != "artifact-present" {
t.Fatalf("expected default evidence grade artifact-present, got %q", result.EvidenceGrade)
}
}
func TestResolveTasksPathDoesNotImplicitlyFallbackToGlobal(t *testing.T) {
root := t.TempDir()
projectDir := filepath.Join(root, "project")
globalDir := filepath.Join(root, "workspace")
scriptDir := filepath.Join(projectDir, "scripts")
if err := os.MkdirAll(projectDir, 0o755); err != nil {
t.Fatalf("mkdir project dir: %v", err)
}
if err := os.MkdirAll(globalDir, 0o755); err != nil {
t.Fatalf("mkdir global dir: %v", err)
}
if err := os.MkdirAll(scriptDir, 0o755); err != nil {
t.Fatalf("mkdir script dir: %v", err)
}
projectTasks := filepath.Join(projectDir, "TASKS.md")
globalTasks := filepath.Join(globalDir, "TASKS.md")
if err := os.WriteFile(projectTasks, []byte("# project"), 0o644); err != nil {
t.Fatalf("write project tasks: %v", err)
}
if err := os.WriteFile(globalTasks, []byte("# global"), 0o644); err != nil {
t.Fatalf("write global tasks: %v", err)
}
got := resolveTasksPathWithContext("", "", filepath.Join(root, "outside"), scriptDir, globalTasks)
if got != projectTasks {
t.Fatalf("expected project tasks path, got %q", got)
}
}
func TestResolveTasksPathAllowsExplicitGlobalPath(t *testing.T) {
root := t.TempDir()
projectDir := filepath.Join(root, "project")
globalDir := filepath.Join(root, "workspace")
scriptDir := filepath.Join(projectDir, "scripts")
if err := os.MkdirAll(projectDir, 0o755); err != nil {
t.Fatalf("mkdir project dir: %v", err)
}
if err := os.MkdirAll(globalDir, 0o755); err != nil {
t.Fatalf("mkdir global dir: %v", err)
}
if err := os.MkdirAll(scriptDir, 0o755); err != nil {
t.Fatalf("mkdir script dir: %v", err)
}
projectTasks := filepath.Join(projectDir, "TASKS.md")
globalTasks := filepath.Join(globalDir, "TASKS.md")
if err := os.WriteFile(projectTasks, []byte("# project"), 0o644); err != nil {
t.Fatalf("write project tasks: %v", err)
}
if err := os.WriteFile(globalTasks, []byte("# global"), 0o644); err != nil {
t.Fatalf("write global tasks: %v", err)
}
got := resolveTasksPathWithContext(globalTasks, "", filepath.Join(root, "outside"), scriptDir, globalTasks)
if got != globalTasks {
t.Fatalf("expected explicit global tasks path, got %q", got)
}
}
func TestVerifyTaskCapturesFailureSummaries(t *testing.T) {
task := taskEntry{
ID: "T-3",
Name: "failing task",
Verification: Verification{
Mode: "test_pass",
Command: "echo standard-output && echo standard-error 1>&2 && exit 1",
ExpectedEvidence: "unused",
TaskType: "automation",
},
HasVerification: true,
}
result := verifyTask(task, false)
if result.Verified {
t.Fatalf("expected failing task to fail")
}
if !strings.Contains(result.StdoutSummary, "standard-output") {
t.Fatalf("expected stdout summary to contain command output, got %q", result.StdoutSummary)
}
if !strings.Contains(result.StderrSummary, "standard-error") {
t.Fatalf("expected stderr summary to contain command error, got %q", result.StderrSummary)
}
}
func TestParseTasksParsesNormalizedStatus(t *testing.T) {
md := `# Tasks
### T-1 ✅ Done task
- **状态**:✅ 完成2026-05-11
- **verification**:
- mode: ` + "`test_pass`" + `
- command: ` + "`echo ok`" + `
- expected_evidence: ` + "`ok`" + `
### T-2 🔶 Planned task
- **状态**:🔶 待启动
- **verification**:
- mode: ` + "`test_pass`" + `
- command: ` + "`echo ok`" + `
- expected_evidence: ` + "`ok`" + `
### T-3 ⏸️ Paused task
- **状态**:⏸️ 待规划
- **verification**:
- mode: ` + "`test_pass`" + `
- command: ` + "`echo ok`" + `
- expected_evidence: ` + "`ok`" + `
`
tmpFile, err := os.CreateTemp(t.TempDir(), "tasks-status-*.md")
if err != nil {
t.Fatalf("create temp file: %v", err)
}
defer tmpFile.Close()
if _, err := tmpFile.WriteString(md); err != nil {
t.Fatalf("write temp file: %v", err)
}
if _, err := tmpFile.Seek(0, 0); err != nil {
t.Fatalf("seek temp file: %v", err)
}
tasks := parseTasks(tmpFile)
if len(tasks) != 3 {
t.Fatalf("expected 3 tasks, got %d", len(tasks))
}
if tasks[0].Status != "completed" {
t.Fatalf("expected first task status completed, got %q", tasks[0].Status)
}
if tasks[1].Status != "planned" {
t.Fatalf("expected second task status planned, got %q", tasks[1].Status)
}
if tasks[2].Status != "paused" {
t.Fatalf("expected third task status paused, got %q", tasks[2].Status)
}
}
func TestFilterTasksByStatus(t *testing.T) {
tasks := []taskEntry{
{ID: "T-1", Status: "completed"},
{ID: "T-2", Status: "planned"},
{ID: "T-3", Status: "in_progress"},
}
completed, err := filterTasksByStatus(tasks, "completed")
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if len(completed) != 1 || completed[0].ID != "T-1" {
t.Fatalf("expected only completed task, got %#v", completed)
}
all, err := filterTasksByStatus(tasks, "all")
if err != nil {
t.Fatalf("unexpected error for all: %v", err)
}
if len(all) != 3 {
t.Fatalf("expected all 3 tasks, got %d", len(all))
}
}

124
scripts/verify_common.sh Executable file
View File

@@ -0,0 +1,124 @@
#!/bin/bash
set -u
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
VERIFY_DB_NAME="${VERIFY_DB_NAME:-llm_intelligence}"
PASS_COUNT=0
FAIL_COUNT=0
WARN_COUNT=0
pass() {
echo "[PASS] $1"
PASS_COUNT=$((PASS_COUNT + 1))
}
fail() {
echo "[FAIL] $1"
FAIL_COUNT=$((FAIL_COUNT + 1))
}
warn() {
echo "[WARN] $1"
WARN_COUNT=$((WARN_COUNT + 1))
}
sql_scalar() {
local sql="$1"
psql -d "$VERIFY_DB_NAME" -Atqc "$sql"
}
check_file() {
local path="$1"
local desc="$2"
if [ -f "$PROJECT_ROOT/$path" ]; then
pass "$desc"
else
fail "$desc (缺少文件: $path)"
fi
}
check_executable() {
local path="$1"
local desc="$2"
if [ -x "$PROJECT_ROOT/$path" ]; then
pass "$desc"
else
fail "$desc (不可执行: $path)"
fi
}
check_shell() {
local desc="$1"
local cmd="$2"
if bash -lc "cd \"$PROJECT_ROOT\" && $cmd" >/tmp/llm_verify_cmd.out 2>/tmp/llm_verify_cmd.err; then
pass "$desc"
else
local details=""
if [ -s /tmp/llm_verify_cmd.err ]; then
details="$(tr '\n' ' ' </tmp/llm_verify_cmd.err | sed 's/[[:space:]]\+/ /g' | sed 's/ $//')"
elif [ -s /tmp/llm_verify_cmd.out ]; then
details="$(tr '\n' ' ' </tmp/llm_verify_cmd.out | sed 's/[[:space:]]\+/ /g' | sed 's/ $//')"
fi
if [ -n "$details" ]; then
fail "$desc ($details)"
else
fail "$desc"
fi
fi
}
check_sql_int_ge() {
local desc="$1"
local sql="$2"
local expected="$3"
local actual rc details
set +e
actual="$(sql_scalar "$sql" 2>/tmp/llm_verify_sql.err | tr -d '[:space:]')"
rc=$?
set -e
if [ "$rc" -ne 0 ]; then
details="$(tr '\n' ' ' </tmp/llm_verify_sql.err | sed 's/[[:space:]]\+/ /g' | sed 's/ $//')"
fail "$desc (SQL执行失败: ${details:-unknown error})"
return
fi
if [[ "$actual" =~ ^-?[0-9]+$ ]] && [ "$actual" -ge "$expected" ]; then
pass "$desc (当前: $actual, 期望 >= $expected)"
else
fail "$desc (当前: ${actual:-N/A}, 期望 >= $expected)"
fi
}
check_sql_int_eq() {
local desc="$1"
local sql="$2"
local expected="$3"
local actual rc details
set +e
actual="$(sql_scalar "$sql" 2>/tmp/llm_verify_sql.err | tr -d '[:space:]')"
rc=$?
set -e
if [ "$rc" -ne 0 ]; then
details="$(tr '\n' ' ' </tmp/llm_verify_sql.err | sed 's/[[:space:]]\+/ /g' | sed 's/ $//')"
fail "$desc (SQL执行失败: ${details:-unknown error})"
return
fi
if [[ "$actual" =~ ^-?[0-9]+$ ]] && [ "$actual" -eq "$expected" ]; then
pass "$desc (当前: $actual)"
else
fail "$desc (当前: ${actual:-N/A}, 期望 = $expected)"
fi
}
finish_phase() {
echo
echo "SUMMARY pass=$PASS_COUNT fail=$FAIL_COUNT warn=$WARN_COUNT"
if [ "$FAIL_COUNT" -eq 0 ]; then
echo "PHASE_RESULT: PASS"
exit 0
fi
echo "PHASE_RESULT: FAIL"
exit 1
}

View File

@@ -0,0 +1,92 @@
#!/usr/bin/env bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
FRONTEND_DIR="$PROJECT_ROOT/frontend"
LIGHTHOUSE_PORT="${LIGHTHOUSE_PORT:-4173}"
LIGHTHOUSE_SCORE_THRESHOLD="${LIGHTHOUSE_SCORE_THRESHOLD:-80}"
LIGHTHOUSE_FCP_THRESHOLD_MS="${LIGHTHOUSE_FCP_THRESHOLD_MS:-2000}"
LIGHTHOUSE_URL="http://127.0.0.1:${LIGHTHOUSE_PORT}"
LIGHTHOUSE_JSON="${TMPDIR:-/tmp}/llm_lighthouse_report.json"
PREVIEW_LOG="${TMPDIR:-/tmp}/llm_lighthouse_preview.log"
PREVIEW_PID=""
CHROME_BIN=""
cleanup() {
if [[ -n "${PREVIEW_PID:-}" ]] && kill -0 "$PREVIEW_PID" >/dev/null 2>&1; then
kill "$PREVIEW_PID" >/dev/null 2>&1 || true
wait "$PREVIEW_PID" >/dev/null 2>&1 || true
fi
}
trap cleanup EXIT
for candidate in /usr/bin/google-chrome-stable /usr/bin/google-chrome /snap/bin/chromium /usr/bin/chromium /usr/bin/chromium-browser; do
if [[ -x "$candidate" ]]; then
CHROME_BIN="$candidate"
break
fi
done
if [[ -z "$CHROME_BIN" ]]; then
echo "LIGHTHOUSE_ERROR=chrome-not-found"
exit 1
fi
cd "$FRONTEND_DIR"
npm run build >/tmp/llm_lighthouse_build.out 2>/tmp/llm_lighthouse_build.err
npm exec vite preview -- --host 127.0.0.1 --port "$LIGHTHOUSE_PORT" >"$PREVIEW_LOG" 2>&1 &
PREVIEW_PID=$!
for _ in $(seq 1 30); do
if curl -fsS "$LIGHTHOUSE_URL" >/dev/null 2>&1; then
break
fi
sleep 0.5
done
if ! curl -fsS "$LIGHTHOUSE_URL" >/dev/null 2>&1; then
echo "LIGHTHOUSE_ERROR=preview-not-ready"
exit 1
fi
npx lighthouse "$LIGHTHOUSE_URL" \
--chrome-path="$CHROME_BIN" \
--only-categories=performance \
--preset=desktop \
--output=json \
--output-path="$LIGHTHOUSE_JSON" \
--quiet \
--chrome-flags="--headless=new --no-sandbox --disable-dev-shm-usage" \
>/tmp/llm_lighthouse_stdout.out 2>/tmp/llm_lighthouse_stderr.err
read -r score fcp speed_index lcp <<EOF
$(node -e '
const fs = require("fs");
const report = JSON.parse(fs.readFileSync(process.argv[1], "utf8"));
const perf = Math.round((report.categories.performance.score || 0) * 100);
const fcp = Math.round(report.audits["first-contentful-paint"]?.numericValue || 0);
const speedIndex = Math.round(report.audits["speed-index"]?.numericValue || 0);
const lcp = Math.round(report.audits["largest-contentful-paint"]?.numericValue || 0);
console.log(`${perf} ${fcp} ${speedIndex} ${lcp}`);
' "$LIGHTHOUSE_JSON")
EOF
echo "LIGHTHOUSE_SCORE=${score}"
echo "LIGHTHOUSE_FCP_MS=${fcp}"
echo "LIGHTHOUSE_SPEED_INDEX_MS=${speed_index}"
echo "LIGHTHOUSE_LCP_MS=${lcp}"
if (( score < LIGHTHOUSE_SCORE_THRESHOLD )); then
echo "LIGHTHOUSE_ERROR=score-below-threshold"
exit 1
fi
if (( fcp >= LIGHTHOUSE_FCP_THRESHOLD_MS )); then
echo "LIGHTHOUSE_ERROR=fcp-above-threshold"
exit 1
fi

34
scripts/verify_phase1.sh Executable file
View File

@@ -0,0 +1,34 @@
#!/bin/bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
. "$SCRIPT_DIR/verify_common.sh"
echo "=== Phase 1 验收检查 ==="
check_file "db/migrations/001_phase1_core_tables.sql" "核心三表迁移文件存在"
check_file "db/migrations/002_sprint1_complete_schema.sql" "Sprint 1 扩展迁移文件存在"
check_sql_int_eq "Sprint 1 扩展表全部存在" \
"select count(*) from pg_tables where schemaname='public' and tablename in ('model_provider','operator','region_pricing','pricing_history','free_tier','daily_report','user_subscription','audit_log');" \
8
check_sql_int_eq "models 扩展字段已落库" \
"select count(*) from information_schema.columns where table_name='models' and column_name in ('provider_id','modality','batch_id','data_confidence');" \
4
check_sql_int_eq "关键 CHECK 约束存在" \
"select count(*) from pg_constraint where conname in ('chk_price_non_negative','chk_currency_valid','chk_models_context_length','chk_models_modality','chk_models_data_confidence');" \
5
check_sql_int_eq "updated_at 触发器已挂载到业务表" \
"select count(*) from pg_trigger where tgname in ('models_updated_at','model_provider_updated_at','operator_updated_at','region_pricing_updated_at','pricing_history_updated_at','free_tier_updated_at','daily_report_updated_at','user_subscription_updated_at');" \
8
check_sql_int_ge "厂商种子数据不少于 6 条" \
"select count(*) from model_provider;" \
6
check_sql_int_ge "region_pricing 已有迁移数据" \
"select count(*) from region_pricing;" \
1
check_sql_int_eq "血缘字段 batch_id 已完成回填" \
"select count(*) from models where batch_id is null;" \
0
finish_phase

32
scripts/verify_phase2.sh Executable file
View File

@@ -0,0 +1,32 @@
#!/bin/bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
. "$SCRIPT_DIR/verify_common.sh"
echo "=== Phase 2 验收检查 ==="
check_shell "ProviderMapper 单元测试通过" "go test ./internal/collectors/..."
check_shell "重试组件单元测试通过" "go test ./internal/retry/..."
check_shell "OpenRouter 采集器可独立构建" "go build -o /dev/null ./scripts/fetch_openrouter.go"
check_sql_int_ge "国内厂商种子数不少于 7 家" \
"select count(*) from model_provider where country='CN';" \
7
check_sql_int_ge "国内厂商模型数不少于 10 条" \
"select count(*) from models m join model_provider p on m.provider_id = p.id where p.country='CN';" \
10
check_sql_int_ge "CNY 定价记录不少于 10 条" \
"select count(*) from region_pricing where currency='CNY';" \
10
check_sql_int_ge "采集成功统计记录至少 1 条" \
"select count(*) from collector_stats where success = true;" \
1
check_sql_int_ge "models 总量达到 371+" \
"select count(*) from models where deleted_at is null;" \
371
check_sql_int_ge "models 审计日志达到 371+" \
"select count(*) from audit_log where table_name='models';" \
371
finish_phase

26
scripts/verify_phase3.sh Executable file
View File

@@ -0,0 +1,26 @@
#!/bin/bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
. "$SCRIPT_DIR/verify_common.sh"
TODAY="$(date +%Y-%m-%d)"
ARCHIVE_DIR="reports/daily/$(date +%Y/%m)"
echo "=== Phase 3 验收检查 ==="
check_executable "scripts/run_daily.sh" "日报流水线脚本可执行"
check_executable "scripts/feishu_alert.sh" "飞书告警脚本可执行"
check_shell "日报生成器可独立构建" "go build -o /dev/null ./scripts/generate_daily_report.go"
check_shell "日报脚本包含降级逻辑" "grep -q 'fallback_report' scripts/run_daily.sh"
check_shell "日报脚本包含飞书告警逻辑" "grep -q 'send_alert' scripts/run_daily.sh"
check_shell "今日日报文件存在且包含数据质量摘要" "test -f reports/daily/daily_report_${TODAY}.md && grep -q '数据质量摘要' reports/daily/daily_report_${TODAY}.md"
check_shell "今日归档报告存在" "test -f ${ARCHIVE_DIR}/daily_report_${TODAY}.md"
check_sql_int_ge "daily_report 已写入至少 1 条 generated 记录" \
"select count(*) from daily_report where status='generated';" \
1
check_shell "crontab 已配置每日调度" "crontab -l 2>/dev/null | grep -q 'scripts/run_daily.sh'"
check_shell "真实采集 API Key 已配置" "([ -n \"${OPENROUTER_API_KEY:-}\" ] || ([ -f .env.local ] && grep -Eq '^OPENROUTER_API_KEY=.+' .env.local) || ([ -f .env ] && grep -Eq '^OPENROUTER_API_KEY=.+' .env))"
finish_phase

21
scripts/verify_phase4.sh Executable file
View File

@@ -0,0 +1,21 @@
#!/bin/bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
. "$SCRIPT_DIR/verify_common.sh"
echo "=== Phase 4 验收检查 ==="
check_file "frontend/package.json" "前端 package.json 存在"
check_file "frontend/vite.config.ts" "前端 vite.config.ts 存在"
check_file "frontend/tsconfig.json" "前端 tsconfig.json 存在"
check_shell "前端生产构建通过" "cd frontend && npm run build >/tmp/llm_phase4_build.log 2>&1"
check_shell "App 已接入 Dashboard 和 Explorer 入口" "grep -q 'Dashboard' frontend/src/App.tsx && grep -q 'Explorer' frontend/src/App.tsx"
check_shell "Explorer 已实现分页/排序/筛选" "grep -q 'PAGE_SIZE' frontend/src/pages/Explorer.tsx && grep -q 'toggleSort' frontend/src/pages/Explorer.tsx && grep -q 'providerFilter' frontend/src/pages/Explorer.tsx && grep -q 'modalityFilter' frontend/src/pages/Explorer.tsx"
check_shell "Explorer 具备 API 失败回退到本地 JSON" "grep -q \"latest_models.json\" frontend/src/lib/models.ts && grep -q \"models.json\" frontend/src/lib/models.ts"
check_shell "Dashboard 已集成 ECharts" "grep -q \"from 'echarts'\" frontend/src/pages/Dashboard.tsx"
check_shell "Explorer 已实现 stale 状态显示" "grep -qi 'stale' frontend/src/pages/Explorer.tsx"
check_shell "Explorer 已实现 pricing unavailable 显示" "grep -qi 'pricing unavailable' frontend/src/lib/models.ts"
finish_phase

25
scripts/verify_phase5.sh Executable file
View File

@@ -0,0 +1,25 @@
#!/bin/bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
. "$SCRIPT_DIR/verify_common.sh"
echo "=== Phase 5 验收检查 ==="
check_file "Dockerfile" "Dockerfile 存在"
check_file "docker-compose.yml" "docker-compose.yml 存在"
check_file "nginx.conf" "Nginx 配置存在"
check_file ".env.example" ".env.example 存在"
check_file ".github/workflows/ci.yml" "GitHub Actions CI 配置存在"
check_executable "scripts/backup.sh" "数据库备份脚本可执行"
check_file "healthcheck.sh" "健康检查脚本存在"
check_file "scripts/restore.sh" "数据库恢复脚本存在"
check_shell "CI 包含 Go 测试" "grep -Eq 'go test .*\\./internal/|go test .*\\./\\.\\.\\.' .github/workflows/ci.yml"
check_shell "CI 包含前端构建" "grep -q 'npm run build' .github/workflows/ci.yml"
check_shell "CI 包含 Docker 构建" "grep -q 'docker build' .github/workflows/ci.yml"
check_shell "CI 配置了覆盖率门禁" "grep -Eqi 'coverage|80%' .github/workflows/ci.yml"
check_shell "CI 配置了构建产物上传" "grep -Eqi 'upload-artifact|artifacts' .github/workflows/ci.yml"
check_shell "日志轮转配置已落地" "find . -maxdepth 3 -type f | grep -Eqi 'logrotate|logrotate\\.conf'"
finish_phase

133
scripts/verify_phase6.sh Normal file
View File

@@ -0,0 +1,133 @@
#!/bin/bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
. "$SCRIPT_DIR/verify_common.sh"
DB_URL="${DATABASE_URL:-host=/var/run/postgresql dbname=llm_intelligence user=long sslmode=disable}"
SERVER_BIN="/tmp/llm_phase6_server"
SERVER_LOG="/tmp/llm_phase6_server.log"
SERVER_PORT="${PHASE6_PORT:-}"
SERVER_PID=""
cleanup() {
if [ -n "${SERVER_PID:-}" ] && kill -0 "$SERVER_PID" >/dev/null 2>&1; then
kill "$SERVER_PID" >/dev/null 2>&1 || true
wait "$SERVER_PID" >/dev/null 2>&1 || true
fi
rm -f "$SERVER_BIN"
}
trap cleanup EXIT
port_in_use() {
local port="$1"
(echo >"/dev/tcp/127.0.0.1/$port") >/dev/null 2>&1
}
reserve_server_port() {
if [ -n "${SERVER_PORT:-}" ]; then
return 0
fi
for candidate in $(seq 18080 18120); do
if ! port_in_use "$candidate"; then
SERVER_PORT="$candidate"
return 0
fi
done
return 1
}
start_server() {
DATABASE_URL="$DB_URL" PORT="$SERVER_PORT" "$SERVER_BIN" >"$SERVER_LOG" 2>&1 &
SERVER_PID=$!
for _ in $(seq 1 20); do
if ! kill -0 "$SERVER_PID" >/dev/null 2>&1; then
return 1
fi
if curl -fsS "http://127.0.0.1:${SERVER_PORT}/health" >/tmp/llm_phase6_health.out 2>/tmp/llm_phase6_health.err &&
grep -q '"status":"ok"' /tmp/llm_phase6_health.out; then
return 0
fi
sleep 0.5
done
return 1
}
echo "=== Phase 6 综合验收检查 ==="
check_shell "Phase 1~5 总门禁通过" "bash scripts/verify_pre_phase6.sh"
check_shell "全仓 Go 测试通过" "go test ./..."
check_shell "脚本级采集器单测通过" "bash scripts/test.sh"
check_shell "真实采集并输出今日日报" "bash scripts/run_real_pipeline.sh"
check_shell "API Server 可构建" "go build -o /dev/null ./cmd/server"
check_shell "健康检查脚本通过" "DATABASE_URL='$DB_URL' bash healthcheck.sh"
check_shell "密钥未硬编码进源码" "grep -R -n 'sk-' cmd internal frontend/src scripts .github/workflows --include='*.go' --include='*.ts' --include='*.tsx' --include='*.sh' --include='*.yml' --include='*.yaml' --exclude='verify_phase6.sh' >/tmp/llm_phase6_secret_scan.out 2>/dev/null; test ! -s /tmp/llm_phase6_secret_scan.out"
check_shell "最近 7 次采集成功率达到 95%" "psql \"$DB_URL\" -Atqc \"select coalesce(round(avg(case when success then 100 else 0 end),2),0) from (select success from collector_stats order by created_at desc limit 7) t;\" | awk '{ exit !(\$1 >= 95) }'"
if go build -o "$SERVER_BIN" ./cmd/server >/tmp/llm_phase6_server_build.out 2>/tmp/llm_phase6_server_build.err; then
if reserve_server_port && start_server; then
pass "API /health 可用"
set +e
api_metrics="$(curl -sS -o /tmp/llm_phase6_models.json -w '%{http_code} %{time_total}' "http://127.0.0.1:${SERVER_PORT}/api/v1/models")"
api_rc=$?
set -e
if [ "$api_rc" -eq 0 ]; then
api_code="$(printf '%s' "$api_metrics" | awk '{print $1}')"
api_time="$(printf '%s' "$api_metrics" | awk '{print $2}')"
if [ "$api_code" = "200" ]; then
pass "API /api/v1/models 返回 200"
else
fail "API /api/v1/models 返回异常状态 (HTTP ${api_code:-unknown})"
fi
if awk "BEGIN { exit !($api_time < 0.5) }"; then
pass "API 响应 < 500ms (当前: ${api_time}s)"
else
fail "API 响应 >= 500ms (当前: ${api_time}s)"
fi
if grep -q '"data"' /tmp/llm_phase6_models.json; then
pass "API 返回模型数据载荷"
else
fail "API 返回体缺少 data 字段"
fi
else
fail "API /api/v1/models 请求失败"
fi
set +e
plan_metrics="$(curl -sS -o /tmp/llm_phase6_subscription_plans.json -w '%{http_code} %{time_total}' "http://127.0.0.1:${SERVER_PORT}/api/v1/subscription-plans")"
plan_rc=$?
set -e
if [ "$plan_rc" -eq 0 ]; then
plan_code="$(printf '%s' "$plan_metrics" | awk '{print $1}')"
if [ "$plan_code" = "200" ]; then
pass "API /api/v1/subscription-plans 返回 200"
else
fail "API /api/v1/subscription-plans 返回异常状态 (HTTP ${plan_code:-unknown})"
fi
if grep -q '"data"' /tmp/llm_phase6_subscription_plans.json; then
pass "API 返回套餐数据载荷"
else
fail "套餐 API 返回体缺少 data 字段"
fi
else
fail "API /api/v1/subscription-plans 请求失败"
fi
else
details="$(tr '\n' ' ' <"$SERVER_LOG" | sed 's/[[:space:]]\+/ /g' | sed 's/ $//')"
fail "API Server 启动失败 (${details:-no server log})"
fi
else
details="$(tr '\n' ' ' </tmp/llm_phase6_server_build.err | sed 's/[[:space:]]\+/ /g' | sed 's/ $//')"
fail "API Server 构建失败 (${details:-unknown build error})"
fi
check_shell "Phase 6 性能文档存在" "test -f docs/PERFORMANCE_TEST.md"
check_shell "前端已具备测试入口" "cd frontend && npm run test -- --run >/tmp/llm_phase6_frontend_test.out 2>/tmp/llm_phase6_frontend_test.err"
finish_phase

37
scripts/verify_pre_phase6.sh Executable file
View File

@@ -0,0 +1,37 @@
#!/bin/bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
phases=(
"verify_phase1.sh"
"verify_phase2.sh"
"verify_phase3.sh"
"verify_phase4.sh"
"verify_phase5.sh"
)
overall=0
echo "=== Pre-Phase 6 总验收 ==="
for phase in "${phases[@]}"; do
echo
echo ">>> 执行 ${phase}"
if bash "$SCRIPT_DIR/$phase"; then
echo ">>> ${phase} PASS"
else
echo ">>> ${phase} FAIL"
overall=1
fi
done
echo
if [ "$overall" -eq 0 ]; then
echo "PRE_PHASE6_RESULT: PASS"
else
echo "PRE_PHASE6_RESULT: FAIL"
fi
exit "$overall"

View File

@@ -0,0 +1,37 @@
#!/bin/bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
. "$SCRIPT_DIR/verify_common.sh"
REPORT_PATH="${1:-$(find "$PROJECT_ROOT/reports/daily" -maxdepth 1 -type f -name 'daily_report_*.md' | sort | tail -1)}"
if [[ -z "$REPORT_PATH" || ! -f "$REPORT_PATH" ]]; then
echo "未找到日报 Markdown 文件" >&2
exit 1
fi
REPORT_DATE="$(basename "$REPORT_PATH" | sed -E 's/^daily_report_([0-9-]+)\.md$/\1/')"
OUTPUT_DIR="$PROJECT_ROOT/reports/daily/video/$REPORT_DATE"
echo "=== 视频日报验收检查 ==="
check_shell "视频日报生成器单元测试通过" "go test -tags llm_script scripts/generate_video_digest.go scripts/generate_video_digest_test.go"
check_shell "视频日报生成器真实产物落盘" "go run -tags llm_script scripts/generate_video_digest.go --report \"$REPORT_PATH\" --output-dir \"$OUTPUT_DIR\" >/tmp/llm_video_digest.out 2>/tmp/llm_video_digest.err && grep -q 'cards=5' /tmp/llm_video_digest.out"
check_file "reports/daily/video/$REPORT_DATE/manifest.json" "manifest 已生成"
check_file "reports/daily/video/$REPORT_DATE/video_digest.gif" "GIF 视频原型已生成"
check_file "reports/daily/video/$REPORT_DATE/narration.wav" "旁白音轨已生成"
check_shell "脚本分镜文件达到 5 份" "test \"\$(find '$OUTPUT_DIR/scripts' -maxdepth 1 -type f -name '*.md' | wc -l)\" -eq 5"
check_shell "PNG 帧达到 5 张" "test \"\$(find '$OUTPUT_DIR/frames' -maxdepth 1 -type f -name '*.png' | wc -l)\" -eq 5"
check_shell "GIF 文件头有效" "head -c 6 \"$OUTPUT_DIR/video_digest.gif\" | grep -Eq 'GIF8[79]a'"
check_shell "WAV 文件头有效" "head -c 4 \"$OUTPUT_DIR/narration.wav\" | grep -q 'RIFF'"
check_shell "manifest 引用了视频与音轨产物" "grep -q 'video_digest.gif' \"$OUTPUT_DIR/manifest.json\" && grep -q 'narration.wav' \"$OUTPUT_DIR/manifest.json\""
echo
echo "SUMMARY pass=$PASS_COUNT fail=$FAIL_COUNT warn=$WARN_COUNT"
if [ "$FAIL_COUNT" -eq 0 ]; then
echo "VIDEO_PIPELINE: PASS"
exit 0
fi
echo "VIDEO_PIPELINE: FAIL"
exit 1