From e757cd2dd78da9084f0ee4315a100c3c5c1ce7e9 Mon Sep 17 00:00:00 2001 From: phamnazage-jpg Date: Sat, 23 May 2026 18:34:57 +0800 Subject: [PATCH] feat(importers): add official pricing importers for baichuan lingyiwanwu sensenova and xfyun --- scripts/import_baichuan_pricing.go | 188 +++++++++ scripts/import_baichuan_pricing_test.go | 64 +++ scripts/import_lingyiwanwu_pricing.go | 160 ++++++++ scripts/import_lingyiwanwu_pricing_test.go | 64 +++ scripts/import_sensenova_pricing.go | 378 ++++++++++++++++++ scripts/import_sensenova_pricing_test.go | 69 ++++ scripts/import_xfyun_pricing.go | 217 ++++++++++ scripts/import_xfyun_pricing_test.go | 61 +++ scripts/testdata/baichuan_pricing_sample.txt | 24 ++ .../testdata/lingyiwanwu_pricing_sample.txt | 1 + scripts/testdata/sensenova_pricing_sample.txt | 44 ++ scripts/testdata/xfyun_pricing_sample.html | 1 + 12 files changed, 1271 insertions(+) create mode 100644 scripts/import_baichuan_pricing.go create mode 100644 scripts/import_baichuan_pricing_test.go create mode 100644 scripts/import_lingyiwanwu_pricing.go create mode 100644 scripts/import_lingyiwanwu_pricing_test.go create mode 100644 scripts/import_sensenova_pricing.go create mode 100644 scripts/import_sensenova_pricing_test.go create mode 100644 scripts/import_xfyun_pricing.go create mode 100644 scripts/import_xfyun_pricing_test.go create mode 100644 scripts/testdata/baichuan_pricing_sample.txt create mode 100644 scripts/testdata/lingyiwanwu_pricing_sample.txt create mode 100644 scripts/testdata/sensenova_pricing_sample.txt create mode 100644 scripts/testdata/xfyun_pricing_sample.html diff --git a/scripts/import_baichuan_pricing.go b/scripts/import_baichuan_pricing.go new file mode 100644 index 0000000..ec17452 --- /dev/null +++ b/scripts/import_baichuan_pricing.go @@ -0,0 +1,188 @@ +//go:build llm_script + +package main + +import ( + "database/sql" + "flag" + "fmt" + "io" + "net/http" + "os" + "regexp" + "sort" + "strings" + "time" +) + +const defaultBaichuanPricingURL = "https://platform.baichuan-ai.com/prices" + +type baichuanPricingImportConfig struct { + URL string + Fixture string + DryRun bool + Timeout time.Duration +} + +type baichuanPricingRow struct { + Index int + ModelName string + ContextLength int + InputPrice float64 + OutputPrice float64 +} + +var baichuanModelContextPattern = regexp.MustCompile(`模型调用\s+(Baichuan[-A-Za-z0-9]+)\s+([0-9]+k)`) +var baichuanPairPricePattern = regexp.MustCompile(`输入:([0-9.]+)元/千tokens\s+输出:([0-9.]+)元/千tokens`) +var baichuanFlatPricePattern = regexp.MustCompile(`(?:00:00\s*~\s*24:00|00:00\s*~\s*8:00)\s+([0-9.]+)元/千tokens`) + +func main() { + loadSubscriptionImportEnv() + + var url string + var fixture string + var dryRun bool + var timeoutSeconds int + + flag.StringVar(&url, "url", defaultBaichuanPricingURL, "百川官方价格页") + flag.StringVar(&fixture, "fixture", "", "百川价格样例文件") + flag.BoolVar(&dryRun, "dry-run", false, "仅解析并打印摘要,不写入数据库") + flag.IntVar(&timeoutSeconds, "timeout", 20, "请求超时(秒)") + flag.Parse() + + cfg := baichuanPricingImportConfig{URL: url, Fixture: fixture, DryRun: dryRun, Timeout: time.Duration(timeoutSeconds) * time.Second} + + var db *sql.DB + var err error + if !cfg.DryRun { + db, err = subscriptionImportDB() + if err != nil { + fmt.Fprintf(os.Stderr, "open db: %v\n", err) + os.Exit(1) + } + defer db.Close() + } + + if err := runBaichuanPricingImport(cfg, db, os.Stdout); err != nil { + fmt.Fprintf(os.Stderr, "import_baichuan_pricing: %v\n", err) + os.Exit(1) + } +} + +func runBaichuanPricingImport(cfg baichuanPricingImportConfig, db *sql.DB, out io.Writer) error { + client := &http.Client{Timeout: cfg.Timeout} + raw, err := fetchRawPricingPage(cfg.URL, cfg.Fixture, client) + if err != nil { + return err + } + records, err := parseBaichuanPricingCatalog(raw) + if err != nil { + return err + } + records = dedupeOfficialPricingRecords(records) + if cfg.DryRun { + _, err = fmt.Fprintf(out, "source=baichuan-pricing-import models=%d operator=%s dry_run=true\n", len(records), records[0].OperatorName) + return err + } + if db == nil { + return fmt.Errorf("db is required when dry-run=false") + } + if err := upsertOfficialPricingRecords(db, records, "baichuan-pricing-import"); err != nil { + return err + } + var tableRows int + if err := db.QueryRow(`SELECT COUNT(*) FROM region_pricing`).Scan(&tableRows); err != nil { + return fmt.Errorf("count region_pricing: %w", err) + } + _, err = fmt.Fprintf(out, "source=baichuan-pricing-import models=%d operator=%s table_rows=%d dry_run=false\n", len(records), records[0].OperatorName, tableRows) + return err +} + +func parseBaichuanPricingCatalog(raw string) ([]officialPricingRecord, error) { + text := cleanHTMLText(raw) + text = strings.ReplaceAll(text, "\n", " ") + text = regexp.MustCompile(`\s+`).ReplaceAllString(text, " ") + text = strings.TrimSpace(text) + + sectionStart := strings.Index(text, "通用大模型") + if sectionStart == -1 { + return nil, fmt.Errorf("unexpected baichuan pricing content: missing 通用大模型") + } + text = text[sectionStart:] + sectionEnd := strings.Index(text, "搜索增强服务") + if sectionEnd == -1 { + return nil, fmt.Errorf("unexpected baichuan pricing content: missing 搜索增强服务") + } + section := text[:sectionEnd] + + chunks := strings.Split(section, "模型调用 ") + rows := make([]baichuanPricingRow, 0, len(chunks)) + for idx, chunk := range chunks { + chunk = strings.TrimSpace(chunk) + if chunk == "" { + continue + } + chunk = "模型调用 " + chunk + if strings.Contains(chunk, "Baichuan-Text-Embedding") { + continue + } + meta := baichuanModelContextPattern.FindStringSubmatch(chunk) + if len(meta) != 3 { + continue + } + modelName := strings.TrimSpace(meta[1]) + contextLength := parseContextLengthCommon(meta[2]) + if contextLength == 0 { + continue + } + row := baichuanPricingRow{Index: idx, ModelName: modelName, ContextLength: contextLength} + if pair := baichuanPairPricePattern.FindStringSubmatch(chunk); len(pair) == 3 { + row.InputPrice = baichuanPerKTokenToPerMToken(pair[1]) + row.OutputPrice = baichuanPerKTokenToPerMToken(pair[2]) + } else if flat := baichuanFlatPricePattern.FindStringSubmatch(chunk); len(flat) == 2 { + price := baichuanPerKTokenToPerMToken(flat[1]) + row.InputPrice = price + row.OutputPrice = price + } else { + continue + } + rows = append(rows, row) + } + if len(rows) == 0 { + return nil, fmt.Errorf("unexpected baichuan pricing content: no model rows parsed") + } + sort.Slice(rows, func(i, j int) bool { return rows[i].Index < rows[j].Index }) + + providerNameCn, providerCountry, providerWebsite := providerMetadata("Baichuan") + records := make([]officialPricingRecord, 0, len(rows)) + for _, row := range rows { + records = append(records, officialPricingRecord{ + ModelID: normalizeExternalID("baichuan", row.ModelName), + ModelName: row.ModelName, + ProviderName: "Baichuan", + ProviderNameCn: providerNameCn, + ProviderCountry: providerCountry, + ProviderWebsite: providerWebsite, + OperatorName: "Baichuan API", + OperatorNameCn: "百川开放平台", + OperatorCountry: "CN", + OperatorWebsite: "https://platform.baichuan-ai.com/docs", + OperatorType: "official", + Region: "CN", + Currency: "CNY", + InputPrice: row.InputPrice, + OutputPrice: row.OutputPrice, + ContextLength: row.ContextLength, + SourceURL: defaultBaichuanPricingURL, + ModelSourceURL: defaultBaichuanPricingURL, + DateConfidence: "unknown", + DateSourceKind: "official_pricing", + Modality: detectModality(row.ModelName), + }) + } + return records, nil +} + +func baichuanPerKTokenToPerMToken(raw string) float64 { + return mustParseSubscriptionPrice(raw) * 1000 +} diff --git a/scripts/import_baichuan_pricing_test.go b/scripts/import_baichuan_pricing_test.go new file mode 100644 index 0000000..6e1cc83 --- /dev/null +++ b/scripts/import_baichuan_pricing_test.go @@ -0,0 +1,64 @@ +//go:build llm_script + +package main + +import ( + "bytes" + "os" + "path/filepath" + "strings" + "testing" +) + +func TestParseBaichuanPricingCatalogBuildsRecords(t *testing.T) { + raw, err := os.ReadFile(filepath.Join("testdata", "baichuan_pricing_sample.txt")) + if err != nil { + t.Fatalf("读取 fixture 失败: %v", err) + } + + records, err := parseBaichuanPricingCatalog(string(raw)) + if err != nil { + t.Fatalf("parseBaichuanPricingCatalog 返回错误: %v", err) + } + if len(records) != 11 { + t.Fatalf("期望 11 条百川价格记录,实际 %d", len(records)) + } + if records[0].ModelID != "baichuan-baichuan-m3-plus" { + t.Fatalf("首条 modelID 错误: %q", records[0].ModelID) + } + if records[0].InputPrice != 5 || records[0].OutputPrice != 9 { + t.Fatalf("Baichuan-M3-Plus 定价错误: %v / %v", records[0].InputPrice, records[0].OutputPrice) + } + if records[4].InputPrice != 15 || records[4].OutputPrice != 15 { + t.Fatalf("Baichuan4-Turbo blended 定价错误: %v / %v", records[4].InputPrice, records[4].OutputPrice) + } + if records[8].ContextLength != 128000 { + t.Fatalf("Baichuan3-Turbo-128k context 错误: %d", records[8].ContextLength) + } + if records[10].InputPrice != 10 || records[10].OutputPrice != 10 { + t.Fatalf("Baichuan2-53B 基线定价错误: %v / %v", records[10].InputPrice, records[10].OutputPrice) + } +} + +func TestRunBaichuanPricingImportDryRunPrintsSummary(t *testing.T) { + var out bytes.Buffer + err := runBaichuanPricingImport(baichuanPricingImportConfig{ + URL: defaultBaichuanPricingURL, + Fixture: filepath.Join("testdata", "baichuan_pricing_sample.txt"), + DryRun: true, + }, nil, &out) + if err != nil { + t.Fatalf("runBaichuanPricingImport 返回错误: %v", err) + } + output := out.String() + for _, want := range []string{ + "source=baichuan-pricing-import", + "models=11", + "operator=Baichuan API", + "dry_run=true", + } { + if !strings.Contains(output, want) { + t.Fatalf("输出缺少 %q,实际: %q", want, output) + } + } +} diff --git a/scripts/import_lingyiwanwu_pricing.go b/scripts/import_lingyiwanwu_pricing.go new file mode 100644 index 0000000..23425b9 --- /dev/null +++ b/scripts/import_lingyiwanwu_pricing.go @@ -0,0 +1,160 @@ +//go:build llm_script + +package main + +import ( + "database/sql" + "flag" + "fmt" + "html" + "io" + "net/http" + "os" + "regexp" + "strings" + "time" +) + +const defaultLingyiwanwuPricingURL = "https://platform.lingyiwanwu.com/docs" + +type lingyiwanwuPricingImportConfig struct { + URL string + Fixture string + DryRun bool + Timeout time.Duration +} + +var lingyiwanwuPricingRowPattern = regexp.MustCompile(`(?s)"children":"(yi-[a-z0-9-]+)"\}\],\["\$","td",null,\{"children":"([0-9]+K)"\}.*?"children":"¥([0-9]+(?:\.[0-9]+)?)"`) + +func main() { + loadSubscriptionImportEnv() + + var url string + var fixture string + var dryRun bool + var timeoutSeconds int + + flag.StringVar(&url, "url", defaultLingyiwanwuPricingURL, "零一万物官方价格页") + flag.StringVar(&fixture, "fixture", "", "零一万物价格样例文件") + flag.BoolVar(&dryRun, "dry-run", false, "仅解析并打印摘要,不写入数据库") + flag.IntVar(&timeoutSeconds, "timeout", 20, "请求超时(秒)") + flag.Parse() + + cfg := lingyiwanwuPricingImportConfig{URL: url, Fixture: fixture, DryRun: dryRun, Timeout: time.Duration(timeoutSeconds) * time.Second} + + var db *sql.DB + var err error + if !cfg.DryRun { + db, err = subscriptionImportDB() + if err != nil { + fmt.Fprintf(os.Stderr, "open db: %v\n", err) + os.Exit(1) + } + defer db.Close() + } + + if err := runLingyiwanwuPricingImport(cfg, db, os.Stdout); err != nil { + fmt.Fprintf(os.Stderr, "import_lingyiwanwu_pricing: %v\n", err) + os.Exit(1) + } +} + +func runLingyiwanwuPricingImport(cfg lingyiwanwuPricingImportConfig, db *sql.DB, out io.Writer) error { + client := &http.Client{Timeout: cfg.Timeout} + raw, err := fetchRawPricingPage(cfg.URL, cfg.Fixture, client) + if err != nil { + return err + } + records, err := parseLingyiwanwuPricingCatalog(raw) + if err != nil { + return err + } + records = dedupeOfficialPricingRecords(records) + if len(records) == 0 { + return fmt.Errorf("unexpected lingyiwanwu pricing content: no records") + } + if cfg.DryRun { + _, err = fmt.Fprintf(out, "source=lingyiwanwu-pricing-import models=%d operator=%s dry_run=true\n", len(records), records[0].OperatorName) + return err + } + if db == nil { + return fmt.Errorf("db is required when dry-run=false") + } + if err := upsertOfficialPricingRecords(db, records, "lingyiwanwu-pricing-import"); err != nil { + return err + } + var tableRows int + if err := db.QueryRow(`SELECT COUNT(*) FROM region_pricing`).Scan(&tableRows); err != nil { + return fmt.Errorf("count region_pricing: %w", err) + } + _, err = fmt.Fprintf(out, "source=lingyiwanwu-pricing-import models=%d operator=%s table_rows=%d dry_run=false\n", len(records), records[0].OperatorName, tableRows) + return err +} + +func parseLingyiwanwuPricingCatalog(raw string) ([]officialPricingRecord, error) { + payload := lingyiwanwuPricingPayload(raw) + sectionStart := strings.Index(payload, "模型与计费") + if sectionStart == -1 { + return nil, fmt.Errorf("unexpected lingyiwanwu pricing content: missing 模型与计费") + } + payload = payload[sectionStart:] + sectionEnd := strings.Index(payload, "关于计费") + if sectionEnd == -1 { + return nil, fmt.Errorf("unexpected lingyiwanwu pricing content: missing 关于计费") + } + section := payload[:sectionEnd] + matches := lingyiwanwuPricingRowPattern.FindAllStringSubmatch(section, -1) + if len(matches) == 0 { + return nil, fmt.Errorf("unexpected lingyiwanwu pricing content: no model rows parsed") + } + + providerNameCn, providerCountry, providerWebsite := providerMetadata("Yi") + records := make([]officialPricingRecord, 0, len(matches)) + for _, match := range matches { + if len(match) != 4 { + continue + } + modelName := strings.TrimSpace(match[1]) + contextLength := parseContextLengthCommon(match[2]) + price := mustParseSubscriptionPrice(match[3]) + records = append(records, officialPricingRecord{ + ModelID: normalizeExternalID("yi", modelName), + ModelName: modelName, + ProviderName: "Yi", + ProviderNameCn: providerNameCn, + ProviderCountry: providerCountry, + ProviderWebsite: providerWebsite, + OperatorName: "01.AI API", + OperatorNameCn: "零一万物开放平台", + OperatorCountry: "CN", + OperatorWebsite: defaultLingyiwanwuPricingURL, + OperatorType: "official", + Region: "CN", + Currency: "CNY", + InputPrice: price, + OutputPrice: price, + ContextLength: contextLength, + SourceURL: defaultLingyiwanwuPricingURL, + ModelSourceURL: defaultLingyiwanwuPricingURL, + DateConfidence: "unknown", + DateSourceKind: "official_pricing", + Modality: detectModality(modelName), + }) + } + if len(records) == 0 { + return nil, fmt.Errorf("unexpected lingyiwanwu pricing content: empty records after parse") + } + return records, nil +} + +func lingyiwanwuPricingPayload(raw string) string { + text := html.UnescapeString(raw) + text = strings.ReplaceAll(text, `\u003c`, "<") + text = strings.ReplaceAll(text, `\u003e`, ">") + text = strings.ReplaceAll(text, `\n`, "\n") + text = strings.ReplaceAll(text, `\t`, " ") + text = strings.ReplaceAll(text, `\"`, `"`) + text = regexp.MustCompile(`(?is)<[^>]+>`).ReplaceAllString(text, " ") + text = regexp.MustCompile(`[ \t]+`).ReplaceAllString(text, " ") + return strings.TrimSpace(text) +} diff --git a/scripts/import_lingyiwanwu_pricing_test.go b/scripts/import_lingyiwanwu_pricing_test.go new file mode 100644 index 0000000..7168c53 --- /dev/null +++ b/scripts/import_lingyiwanwu_pricing_test.go @@ -0,0 +1,64 @@ +//go:build llm_script + +package main + +import ( + "bytes" + "os" + "path/filepath" + "strings" + "testing" +) + +func TestParseLingyiwanwuPricingCatalogBuildsRecords(t *testing.T) { + raw, err := os.ReadFile(filepath.Join("testdata", "lingyiwanwu_pricing_sample.txt")) + if err != nil { + t.Fatalf("读取 fixture 失败: %v", err) + } + + records, err := parseLingyiwanwuPricingCatalog(string(raw)) + if err != nil { + t.Fatalf("parseLingyiwanwuPricingCatalog 返回错误: %v", err) + } + if len(records) != 2 { + t.Fatalf("期望 2 条零一万物价格记录,实际 %d", len(records)) + } + if records[0].ModelID != "yi-yi-lightning" { + t.Fatalf("首条 modelID 错误: %q", records[0].ModelID) + } + if records[0].InputPrice != 0.99 || records[0].OutputPrice != 0.99 { + t.Fatalf("yi-lightning 定价错误: %v / %v", records[0].InputPrice, records[0].OutputPrice) + } + if records[1].ContextLength != 16000 { + t.Fatalf("yi-vision-v2 context 错误: %d", records[1].ContextLength) + } + if records[1].Modality != "multimodal" { + t.Fatalf("yi-vision-v2 modality 错误: %q", records[1].Modality) + } + if records[1].InputPrice != 6 || records[1].OutputPrice != 6 { + t.Fatalf("yi-vision-v2 定价错误: %v / %v", records[1].InputPrice, records[1].OutputPrice) + } +} + +func TestRunLingyiwanwuPricingImportDryRunPrintsSummary(t *testing.T) { + var out bytes.Buffer + err := runLingyiwanwuPricingImport(lingyiwanwuPricingImportConfig{ + URL: defaultLingyiwanwuPricingURL, + Fixture: filepath.Join("testdata", "lingyiwanwu_pricing_sample.txt"), + DryRun: true, + }, nil, &out) + if err != nil { + t.Fatalf("runLingyiwanwuPricingImport 返回错误: %v", err) + } + output := out.String() + for _, want := range []string{ + "source=lingyiwanwu-pricing-import", + "models=2", + "operator=01.AI API", + "dry_run=true", + } { + if !strings.Contains(output, want) { + t.Fatalf("输出缺少 %q,实际: %q", want, output) + } + } +} diff --git a/scripts/import_sensenova_pricing.go b/scripts/import_sensenova_pricing.go new file mode 100644 index 0000000..b14268a --- /dev/null +++ b/scripts/import_sensenova_pricing.go @@ -0,0 +1,378 @@ +//go:build llm_script + +package main + +import ( + "context" + "database/sql" + "flag" + "fmt" + "io" + "net/http" + "net/url" + "os" + "os/exec" + "regexp" + "strings" + "time" +) + +const ( + defaultSensenovaDocsURL = "https://platform.sensenova.cn/docs" + defaultSensenovaModelsURL = "https://www.sensenova.cn/models" +) + +type sensenovaPricingImportConfig struct { + DocsURL string + ModelsURL string + Fixture string + DryRun bool + Timeout time.Duration +} + +type sensenovaPricingFixture struct { + DocsHTML string + ModelsText string +} + +type sensenovaPricingDocModel struct { + ModelName string + ModelID string + QuotaPer5Hour int +} + +var ( + sensenovaFixtureSplitMarker = "\n===SENSENOVA_MODELS_BUNDLE===\n" + sensenovaOverviewCardPattern = regexp.MustCompile(`(?s)]*>([^<]+).*?调用次数限制

]*>每5小时([0-9]+)次

.*?MODEL ID

]*>([^<]+)`) + sensenovaModelsScriptPattern = regexp.MustCompile(`src="([^"]+/_next/static/chunks/[^"]+\.js|/_next/static/chunks/[^"]+\.js)"`) + sensenovaPricingZeroPattern = regexp.MustCompile(`(?s)"pricing"\s*:\s*\{\s*"prompt"\s*:\s*"0"\s*,\s*"completion"\s*:\s*"0"\s*,\s*"image"\s*:\s*"0"\s*,\s*"request"\s*:\s*"0"`) +) + +func main() { + loadSubscriptionImportEnv() + + var docsURL string + var modelsURL string + var fixture string + var dryRun bool + var timeoutSeconds int + + flag.StringVar(&docsURL, "docs-url", defaultSensenovaDocsURL, "商汤 SenseNova API 文档页") + flag.StringVar(&modelsURL, "models-url", defaultSensenovaModelsURL, "商汤 SenseNova 模型页") + flag.StringVar(&fixture, "fixture", "", "商汤 SenseNova 价格样例文件") + flag.BoolVar(&dryRun, "dry-run", false, "仅解析并打印摘要,不写入数据库") + flag.IntVar(&timeoutSeconds, "timeout", 45, "请求超时(秒)") + flag.Parse() + + cfg := sensenovaPricingImportConfig{ + DocsURL: docsURL, + ModelsURL: modelsURL, + Fixture: fixture, + DryRun: dryRun, + Timeout: time.Duration(timeoutSeconds) * time.Second, + } + + var db *sql.DB + var err error + if !cfg.DryRun { + db, err = subscriptionImportDB() + if err != nil { + fmt.Fprintf(os.Stderr, "open db: %v\n", err) + os.Exit(1) + } + defer db.Close() + } + + if err := runSensenovaPricingImport(cfg, db, os.Stdout); err != nil { + fmt.Fprintf(os.Stderr, "import_sensenova_pricing: %v\n", err) + os.Exit(1) + } +} + +func runSensenovaPricingImport(cfg sensenovaPricingImportConfig, db *sql.DB, out io.Writer) error { + fixture, err := fetchSensenovaPricingFixture(cfg) + if err != nil { + return err + } + records, err := parseSensenovaPricingCatalog(fixture) + if err != nil { + return err + } + records = dedupeOfficialPricingRecords(records) + if len(records) == 0 { + return fmt.Errorf("unexpected sensenova pricing content: no records") + } + if cfg.DryRun { + _, err = fmt.Fprintf(out, "source=sensenova-pricing-import models=%d operator=%s dry_run=true\n", len(records), records[0].OperatorName) + return err + } + if db == nil { + return fmt.Errorf("db is required when dry-run=false") + } + if err := upsertOfficialPricingRecords(db, records, "sensenova-pricing-import"); err != nil { + return err + } + var tableRows int + if err := db.QueryRow(`SELECT COUNT(*) FROM region_pricing`).Scan(&tableRows); err != nil { + return fmt.Errorf("count region_pricing: %w", err) + } + _, err = fmt.Fprintf(out, "source=sensenova-pricing-import models=%d operator=%s table_rows=%d dry_run=false\n", len(records), records[0].OperatorName, tableRows) + return err +} + +func fetchSensenovaPricingFixture(cfg sensenovaPricingImportConfig) (sensenovaPricingFixture, error) { + if strings.TrimSpace(cfg.Fixture) != "" { + data, err := os.ReadFile(cfg.Fixture) + if err != nil { + return sensenovaPricingFixture{}, fmt.Errorf("read fixture %s: %w", cfg.Fixture, err) + } + return splitSensenovaFixture(string(data)) + } + + docsHTML, err := fetchRenderedPricingPageWithChromium(cfg.DocsURL, cfg.Timeout) + if err != nil { + return sensenovaPricingFixture{}, fmt.Errorf("fetch docs render: %w", err) + } + modelsText, err := fetchSensenovaModelsBundle(cfg.ModelsURL, cfg.Timeout) + if err != nil { + return sensenovaPricingFixture{}, err + } + return sensenovaPricingFixture{DocsHTML: docsHTML, ModelsText: modelsText}, nil +} + +func splitSensenovaFixture(raw string) (sensenovaPricingFixture, error) { + parts := strings.SplitN(raw, sensenovaFixtureSplitMarker, 2) + if len(parts) != 2 { + return sensenovaPricingFixture{}, fmt.Errorf("unexpected sensenova fixture: missing models bundle marker") + } + docsHTML := strings.TrimSpace(parts[0]) + modelsText := strings.TrimSpace(parts[1]) + if docsHTML == "" || modelsText == "" { + return sensenovaPricingFixture{}, fmt.Errorf("unexpected sensenova fixture: empty docs or models segment") + } + return sensenovaPricingFixture{DocsHTML: docsHTML, ModelsText: modelsText}, nil +} + +func fetchSensenovaModelsBundle(modelsURL string, timeout time.Duration) (string, error) { + client := &http.Client{Timeout: timeout} + html, err := fetchRawPricingPage(modelsURL, "", client) + if err != nil { + return "", fmt.Errorf("fetch models page shell: %w", err) + } + scripts := sensenovaModelsScriptPattern.FindAllStringSubmatch(html, -1) + if len(scripts) == 0 { + return "", fmt.Errorf("unexpected sensenova models page: no chunk scripts found") + } + seen := make(map[string]struct{}, len(scripts)) + for _, match := range scripts { + if len(match) != 2 { + continue + } + scriptURL, err := resolveSensenovaAssetURL(modelsURL, match[1]) + if err != nil { + continue + } + if _, ok := seen[scriptURL]; ok { + continue + } + seen[scriptURL] = struct{}{} + bundle, err := fetchRawPricingPage(scriptURL, "", client) + if err != nil { + continue + } + if sensenovaBundleConfirmsFreeBeta(bundle) { + return bundle, nil + } + } + return "", fmt.Errorf("unexpected sensenova models page: free-beta bundle not found") +} + +func resolveSensenovaAssetURL(baseURL string, assetPath string) (string, error) { + parsedBase, err := url.Parse(baseURL) + if err != nil { + return "", err + } + asset, err := url.Parse(assetPath) + if err != nil { + return "", err + } + return parsedBase.ResolveReference(asset).String(), nil +} + +func sensenovaBundleConfirmsFreeBeta(raw string) bool { + hasFree := strings.Contains(raw, "公测期完全免费开放") || strings.Contains(raw, "free during public beta") + hasAllModels := strings.Contains(raw, "所有模型完全开放") || strings.Contains(raw, "all models included") + return hasFree && hasAllModels +} + +func fetchRenderedPricingPageWithChromium(pageURL string, timeout time.Duration) (string, error) { + browserPath, err := lookupChromiumBinaryForSensenova() + if err != nil { + return "", err + } + ctx, cancel := context.WithTimeout(context.Background(), timeout) + defer cancel() + cmd := exec.CommandContext(ctx, browserPath, + "--headless", + "--no-sandbox", + "--disable-gpu", + "--virtual-time-budget=8000", + "--dump-dom", + pageURL, + ) + cmd.Stderr = io.Discard + out, err := cmd.Output() + if ctx.Err() == context.DeadlineExceeded { + return "", fmt.Errorf("chromium render timeout after %s", timeout) + } + if err != nil { + return "", fmt.Errorf("chromium dump-dom: %w", err) + } + if len(out) == 0 { + return "", fmt.Errorf("chromium dump-dom returned empty output") + } + return string(out), nil +} + +func lookupChromiumBinaryForSensenova() (string, error) { + for _, name := range []string{"chromium", "chromium-browser", "google-chrome", "google-chrome-stable"} { + if path, err := exec.LookPath(name); err == nil { + return path, nil + } + } + return "", fmt.Errorf("no chromium-compatible browser found in PATH") +} + +func parseSensenovaPricingCatalog(fixture sensenovaPricingFixture) ([]officialPricingRecord, error) { + if !sensenovaBundleConfirmsFreeBeta(fixture.ModelsText) { + return nil, fmt.Errorf("unexpected sensenova models bundle: missing public-beta free signal") + } + if !strings.Contains(fixture.DocsHTML, "GET https://token.sensenova.cn/v1/models") { + return nil, fmt.Errorf("unexpected sensenova docs content: missing list models endpoint") + } + if !sensenovaPricingZeroPattern.MatchString(fixture.DocsHTML) { + return nil, fmt.Errorf("unexpected sensenova docs content: missing zero pricing object example") + } + + matches := sensenovaOverviewCardPattern.FindAllStringSubmatch(fixture.DocsHTML, -1) + if len(matches) == 0 { + return nil, fmt.Errorf("unexpected sensenova docs content: no model overview cards parsed") + } + + providerNameCn, providerCountry, providerWebsite := providerMetadata("SenseTime") + records := make([]officialPricingRecord, 0, len(matches)) + seenModelIDs := make(map[string]struct{}, len(matches)) + for _, match := range matches { + if len(match) != 4 { + continue + } + modelName := strings.TrimSpace(match[1]) + modelID := strings.TrimSpace(match[3]) + if modelName == "" || modelID == "" { + continue + } + if _, ok := seenModelIDs[modelID]; ok { + continue + } + seenModelIDs[modelID] = struct{}{} + sectionID := sensenovaSectionIDForModel(modelID) + section, err := extractHTMLSectionByID(fixture.DocsHTML, sectionID) + if err != nil { + return nil, err + } + providerName := sensenovaProviderName(modelID) + providerCn, providerCountryCode, providerSite := providerNameCn, providerCountry, providerWebsite + if providerName != "SenseTime" { + providerCn, providerCountryCode, providerSite = providerMetadata(providerName) + } + records = append(records, officialPricingRecord{ + ModelID: normalizeExternalID("sensenova", modelID), + ModelName: modelName, + ProviderName: providerName, + ProviderNameCn: providerCn, + ProviderCountry: providerCountryCode, + ProviderWebsite: providerSite, + OperatorName: "SenseNova API", + OperatorNameCn: "日日新开放平台", + OperatorCountry: "CN", + OperatorWebsite: defaultSensenovaDocsURL, + OperatorType: "official", + Region: "CN", + Currency: "CNY", + InputPrice: 0, + OutputPrice: 0, + IsFree: true, + ContextLength: sensenovaContextLength(modelID, section), + SourceURL: defaultSensenovaDocsURL, + ModelSourceURL: firstNonEmptyText(defaultSensenovaDocsURL+"#"+sectionID, defaultSensenovaDocsURL), + DateConfidence: "unknown", + DateSourceKind: "official_pricing", + Modality: sensenovaModality(modelID, section), + }) + } + if len(records) == 0 { + return nil, fmt.Errorf("unexpected sensenova pricing content: empty records after parse") + } + return records, nil +} + +func extractHTMLSectionByID(raw string, sectionID string) (string, error) { + marker := fmt.Sprintf(`
([^<]+)
([0-9]+(?:\.[0-9]+)?)元/百万tokens`) + +func main() { + loadSubscriptionImportEnv() + + var url string + var fixture string + var dryRun bool + var timeoutSeconds int + + flag.StringVar(&url, "url", defaultXfyunPricingURL, "讯飞官方价格页") + flag.StringVar(&fixture, "fixture", "", "讯飞价格样例文件") + flag.BoolVar(&dryRun, "dry-run", false, "仅解析并打印摘要,不写入数据库") + flag.IntVar(&timeoutSeconds, "timeout", 30, "请求超时(秒)") + flag.Parse() + + cfg := xfyunPricingImportConfig{URL: url, Fixture: fixture, DryRun: dryRun, Timeout: time.Duration(timeoutSeconds) * time.Second} + + var db *sql.DB + var err error + if !cfg.DryRun { + db, err = subscriptionImportDB() + if err != nil { + fmt.Fprintf(os.Stderr, "open db: %v\n", err) + os.Exit(1) + } + defer db.Close() + } + + if err := runXfyunPricingImport(cfg, db, os.Stdout); err != nil { + fmt.Fprintf(os.Stderr, "import_xfyun_pricing: %v\n", err) + os.Exit(1) + } +} + +func runXfyunPricingImport(cfg xfyunPricingImportConfig, db *sql.DB, out io.Writer) error { + raw, err := fetchXfyunPricingPage(cfg) + if err != nil { + return err + } + records, err := parseXfyunPricingCatalog(raw) + if err != nil { + return err + } + records = dedupeOfficialPricingRecords(records) + if len(records) == 0 { + return fmt.Errorf("unexpected xfyun pricing content: no records") + } + if cfg.DryRun { + _, err = fmt.Fprintf(out, "source=xfyun-pricing-import models=%d operator=%s dry_run=true\n", len(records), records[0].OperatorName) + return err + } + if db == nil { + return fmt.Errorf("db is required when dry-run=false") + } + if err := upsertOfficialPricingRecords(db, records, "xfyun-pricing-import"); err != nil { + return err + } + var tableRows int + if err := db.QueryRow(`SELECT COUNT(*) FROM region_pricing`).Scan(&tableRows); err != nil { + return fmt.Errorf("count region_pricing: %w", err) + } + _, err = fmt.Fprintf(out, "source=xfyun-pricing-import models=%d operator=%s table_rows=%d dry_run=false\n", len(records), records[0].OperatorName, tableRows) + return err +} + +func fetchXfyunPricingPage(cfg xfyunPricingImportConfig) (string, error) { + if strings.TrimSpace(cfg.Fixture) != "" { + data, err := os.ReadFile(cfg.Fixture) + if err != nil { + return "", fmt.Errorf("read fixture %s: %w", cfg.Fixture, err) + } + return string(data), nil + } + + client := &http.Client{Timeout: cfg.Timeout} + raw, err := fetchRawPricingPage(cfg.URL, "", client) + if err == nil && strings.Contains(raw, "apiprice_cardTitle__") { + return raw, nil + } + + rendered, renderErr := fetchXfyunPricingPageWithChromium(cfg.URL, cfg.Timeout) + if renderErr != nil { + if err != nil { + return "", fmt.Errorf("fetch shell failed: %v; chromium render failed: %w", err, renderErr) + } + return "", renderErr + } + return rendered, nil +} + +func fetchXfyunPricingPageWithChromium(url string, timeout time.Duration) (string, error) { + browserPath, err := lookupChromiumBinary() + if err != nil { + return "", err + } + ctx, cancel := context.WithTimeout(context.Background(), timeout) + defer cancel() + cmd := exec.CommandContext(ctx, browserPath, + "--headless", + "--no-sandbox", + "--disable-gpu", + "--dump-dom", + url, + ) + cmd.Stderr = io.Discard + out, err := cmd.Output() + if ctx.Err() == context.DeadlineExceeded { + return "", fmt.Errorf("chromium render timeout after %s", timeout) + } + if err != nil { + return "", fmt.Errorf("chromium dump-dom: %w", err) + } + if len(out) == 0 { + return "", fmt.Errorf("chromium dump-dom returned empty output") + } + return string(out), nil +} + +func lookupChromiumBinary() (string, error) { + for _, name := range []string{"chromium", "chromium-browser", "google-chrome", "google-chrome-stable"} { + if path, err := exec.LookPath(name); err == nil { + return path, nil + } + } + return "", fmt.Errorf("no chromium-compatible browser found in PATH") +} + +func parseXfyunPricingCatalog(raw string) ([]officialPricingRecord, error) { + matches := xfyunPricingCardPattern.FindAllStringSubmatch(raw, -1) + if len(matches) == 0 { + return nil, fmt.Errorf("unexpected xfyun pricing content: no pricing cards found") + } + + providerNameCn, providerCountry, providerWebsite := providerMetadata("iFlytek") + records := make([]officialPricingRecord, 0, len(matches)) + for _, match := range matches { + if len(match) != 3 { + continue + } + title := strings.TrimSpace(match[1]) + modelName := xfyunCanonicalModelName(title) + if modelName == "" { + continue + } + price := mustParseSubscriptionPrice(match[2]) + records = append(records, officialPricingRecord{ + ModelID: normalizeExternalID("xfyun", modelName), + ModelName: modelName, + ProviderName: "iFlytek", + ProviderNameCn: providerNameCn, + ProviderCountry: providerCountry, + ProviderWebsite: providerWebsite, + OperatorName: "Spark API", + OperatorNameCn: "讯飞星火 API", + OperatorCountry: "CN", + OperatorWebsite: defaultXfyunPricingURL, + OperatorType: "official", + Region: "CN", + Currency: "CNY", + InputPrice: price, + OutputPrice: price, + IsFree: price == 0, + SourceURL: defaultXfyunPricingURL, + ModelSourceURL: defaultXfyunPricingURL, + DateConfidence: "unknown", + DateSourceKind: "official_pricing", + Modality: "text", + }) + } + if len(records) == 0 { + return nil, fmt.Errorf("unexpected xfyun pricing content: empty records after canonical mapping") + } + return records, nil +} + +func xfyunCanonicalModelName(title string) string { + switch strings.TrimSpace(title) { + case "X2/X1.5模型": + return "Spark X2/X1.5" + case "Ultra模型": + return "Spark Ultra" + case "Pro模型": + return "Spark Pro" + case "Lite模型": + return "Spark Lite" + default: + return "" + } +} diff --git a/scripts/import_xfyun_pricing_test.go b/scripts/import_xfyun_pricing_test.go new file mode 100644 index 0000000..157b88b --- /dev/null +++ b/scripts/import_xfyun_pricing_test.go @@ -0,0 +1,61 @@ +//go:build llm_script + +package main + +import ( + "bytes" + "os" + "path/filepath" + "strings" + "testing" +) + +func TestParseXfyunPricingCatalogBuildsRecords(t *testing.T) { + raw, err := os.ReadFile(filepath.Join("testdata", "xfyun_pricing_sample.html")) + if err != nil { + t.Fatalf("读取 fixture 失败: %v", err) + } + + records, err := parseXfyunPricingCatalog(string(raw)) + if err != nil { + t.Fatalf("parseXfyunPricingCatalog 返回错误: %v", err) + } + if len(records) != 4 { + t.Fatalf("期望 4 条讯飞价格记录,实际 %d", len(records)) + } + if records[0].ModelID != "xfyun-spark-x2-x1-5" { + t.Fatalf("首条 modelID 错误: %q", records[0].ModelID) + } + if records[0].InputPrice != 2 || records[0].OutputPrice != 2 { + t.Fatalf("Spark X2/X1.5 定价错误: %v / %v", records[0].InputPrice, records[0].OutputPrice) + } + if records[1].ModelName != "Spark Ultra" || records[1].InputPrice != 0.8 { + t.Fatalf("Spark Ultra 解析错误: %+v", records[1]) + } + if !records[3].IsFree || records[3].InputPrice != 0 || records[3].OutputPrice != 0 { + t.Fatalf("Spark Lite 免费定价错误: %+v", records[3]) + } +} + +func TestRunXfyunPricingImportDryRunPrintsSummary(t *testing.T) { + var out bytes.Buffer + err := runXfyunPricingImport(xfyunPricingImportConfig{ + URL: defaultXfyunPricingURL, + Fixture: filepath.Join("testdata", "xfyun_pricing_sample.html"), + DryRun: true, + }, nil, &out) + if err != nil { + t.Fatalf("runXfyunPricingImport 返回错误: %v", err) + } + output := out.String() + for _, want := range []string{ + "source=xfyun-pricing-import", + "models=4", + "operator=Spark API", + "dry_run=true", + } { + if !strings.Contains(output, want) { + t.Fatalf("输出缺少 %q,实际: %q", want, output) + } + } +} diff --git a/scripts/testdata/baichuan_pricing_sample.txt b/scripts/testdata/baichuan_pricing_sample.txt new file mode 100644 index 0000000..e8fb88b --- /dev/null +++ b/scripts/testdata/baichuan_pricing_sample.txt @@ -0,0 +1,24 @@ +价格说明 +计费模式 +按照实际使用的数据量(千tokens)收费。 +通用大模型 +模型调用 Baichuan-M3-Plus 32k 00:00 ~ 24:00 输入:0.005元/千tokens +输出:0.009元/千tokens 包括对话全流程节点产生的Token总数 +模型调用 Baichuan-M3 32k 00:00 ~ 24:00 输入:0.01元/千tokens +输出:0.03元/千tokens +模型调用 Baichuan-M2-Plus 32k 00:00 ~ 24:00 输入:0.01元/千tokens +输出:0.03元/千tokens 包括对话全流程节点产生的Token总数 +模型调用 Baichuan-M2 32k 00:00 ~ 24:00 输入:0.002元/千tokens +输出:0.02元/千tokens +模型调用 Baichuan4-Turbo 32k 00:00 ~ 24:00 0.015元/千tokens 包含输入和输出 +模型调用 Baichuan4-Air 32k 00:00 ~ 24:00 0.00098元/千tokens 包含输入和输出 +模型调用 Baichuan4 32k 00:00 ~ 24:00 0.1元/千tokens 包含输入和输出 +模型调用 Baichuan3-Turbo 32k 00:00 ~ 24:00 0.012元/千tokens 包含输入和输出 +模型调用 Baichuan3-Turbo-128k 128k 00:00 ~ 24:00 0.024元/千tokens 包含输入和输出 +模型调用 Baichuan2-Turbo 32k 00:00 ~ 24:00 0.008元/千tokens 包含输入和输出 +模型调用 Baichuan2-53B 32k 00:00 ~ 8:00 0.01元/千tokens 包含输入和输出 +8:00 ~ 24:00 0.02元/千tokens +搜索增强服务 - 00:00 ~ 24:00 0.03元/次 开启 web_search 后,接口自动判断调用搜索增强服务的次数 +医疗搜索 - 00:00 ~ 24:00 0.03元/次 调用Baichuan-M2-Plus对话会自动触发医疗搜索 +知识库 +模型调用 Baichuan-Text-Embedding 00:00 ~ 24:00 0.0005元/千tokens diff --git a/scripts/testdata/lingyiwanwu_pricing_sample.txt b/scripts/testdata/lingyiwanwu_pricing_sample.txt new file mode 100644 index 0000000..a01e552 --- /dev/null +++ b/scripts/testdata/lingyiwanwu_pricing_sample.txt @@ -0,0 +1 @@ +模型与计费","children":"模型与计费"}],"\n",["$","p",null,{"children":"零一万物 API 开放平台提供一系列具有不同功能和定价的 Yi 系列大模型。"}],"\n",["$","table",null,{"children":[["$","thead",null,{"children":["$","tr",null,{"children":[["$","th",null,{"children":"模型"}],["$","th",null,{"children":"上下文长度"}],["$","th",null,{"children":"特性"}],["$","th",null,{"children":"场景"}],["$","th",null,{"children":"价格/1M token"}]]}]}],["$","tbody",null,{"children":[["$","tr",null,{"children":[["$","td",null,{"children":"yi-lightning"}],["$","td",null,{"children":"16K"}],["$","td",null,{"children":"最新高性能模型,保证高质量输出同时,推理速度大幅提升。"}],["$","td",null,{"children":"适用于实时交互,高复杂推理场景。"}],["$","td",null,{"children":"¥0.99"}]]}],["$","tr",null,{"children":[["$","td",null,{"children":"yi-vision-v2"}],["$","td",null,{"children":"16K"}],["$","td",null,{"children":"复杂视觉任务模型,提供基于多张图片的高性能理解、分析能力。"}],["$","td",null,{"children":"适合图片问答、OCR、视觉推理。"}],["$","td",null,{"children":"¥6"}]]}]]}]]}],"\n",["$","p",null,{"children":["$","strong",null,{"children":"关于计费"}]}] \ No newline at end of file diff --git a/scripts/testdata/sensenova_pricing_sample.txt b/scripts/testdata/sensenova_pricing_sample.txt new file mode 100644 index 0000000..e042ca1 --- /dev/null +++ b/scripts/testdata/sensenova_pricing_sample.txt @@ -0,0 +1,44 @@ +
+
+

SenseNova 6.7 Flash-Lite

+

面向真实工作流的轻量多模态智能体模型,支持文本对话与图像输入理解

调用次数限制

每5小时1500次

+

MODEL ID

sensenova-6.7-flash-lite
+
+
+

SenseNova U1 Fast

+

基于 SenseNova U1 的加速版本,专供信息图(Infographics)生成

调用次数限制

每5小时1500次

+

MODEL ID

sensenova-u1-fast
+
+
+

DeepSeek V4 Flash

+

DeepSeek 高性能对话模型,支持思考/非思考模式、256K 上下文、工具调用

调用次数限制

每5小时150次

+

MODEL ID

deepseek-v4-flash
+
+
+

SenseNova 6.7 Flash-Lite

面向真实工作流的轻量多模态智能体模型,支持文本对话与图像输入理解。

  • 轻量高效,兼顾效果、成本与落地性
  • 原生多模态架构,支持图像输入理解(OCR、图表解读等)
  • 上下文长度 256K tokens(最大输入 252K,最大输出 64K)

MODEL ID: sensenova-6.7-flash-lite

+

SenseNova U1 Fast

SenseNova U1 Fast 基于 SenseNova U1 的加速版本,专供信息图(Infographics)生成场景。

MODEL ID: sensenova-u1-fast

注意: U1 Fast 使用独立的图像生成接口 POST /v1/images/generations,不是 Chat Completions;不支持图像输入。

+

DeepSeek V4 Flash

DeepSeek 高性能对话模型,支持思考模式与非思考模式,上下文长度 256K tokens,最大输出 64K tokens,内置 JSON Output、Tool Calls等功能。

MODEL ID: deepseek-v4-flash

+
GET https://token.sensenova.cn/v1/models
+
{
+  "data": [
+    {
+      "id": "sensenova-6.7-flash-lite",
+      "name": "sensenova-6.7-flash-lite",
+      "created": 1777392000,
+      "input_modalities": ["text", "image"],
+      "output_modalities": ["text"],
+      "quantization": "fp8",
+      "context_length": 262144,
+      "max_output_length": 65536,
+      "pricing": {
+        "prompt": "0",
+        "completion": "0",
+        "image": "0",
+        "request": "0",
+        "input_cache_read": "0"
+      }
+    }
+  ]
+}
+===SENSENOVA_MODELS_BUNDLE=== +{"subtitle":"兼容 OpenAI 接口,按量透明计费,公测期内免费开放","freeDesc":"公测期完全免费开放","promoLine2":",所有模型完全开放"} diff --git a/scripts/testdata/xfyun_pricing_sample.html b/scripts/testdata/xfyun_pricing_sample.html new file mode 100644 index 0000000..4eeb8b7 --- /dev/null +++ b/scripts/testdata/xfyun_pricing_sample.html @@ -0,0 +1 @@ +
X2/X1.5模型
2元/百万tokens
Ultra模型
0.8元/百万tokens
Pro模型
5元/百万tokens
Lite模型
0元/百万tokens
\ No newline at end of file