From e757cd2dd78da9084f0ee4315a100c3c5c1ce7e9 Mon Sep 17 00:00:00 2001
From: phamnazage-jpg ]*>([^<]+)
.*?调用次数限制
]*>每5小时([0-9]+)次
.*?MODEL ID]*>([^<]+)`)
+ sensenovaModelsScriptPattern = regexp.MustCompile(`src="([^"]+/_next/static/chunks/[^"]+\.js|/_next/static/chunks/[^"]+\.js)"`)
+ sensenovaPricingZeroPattern = regexp.MustCompile(`(?s)"pricing"\s*:\s*\{\s*"prompt"\s*:\s*"0"\s*,\s*"completion"\s*:\s*"0"\s*,\s*"image"\s*:\s*"0"\s*,\s*"request"\s*:\s*"0"`)
+)
+
+func main() {
+ loadSubscriptionImportEnv()
+
+ var docsURL string
+ var modelsURL string
+ var fixture string
+ var dryRun bool
+ var timeoutSeconds int
+
+ flag.StringVar(&docsURL, "docs-url", defaultSensenovaDocsURL, "商汤 SenseNova API 文档页")
+ flag.StringVar(&modelsURL, "models-url", defaultSensenovaModelsURL, "商汤 SenseNova 模型页")
+ flag.StringVar(&fixture, "fixture", "", "商汤 SenseNova 价格样例文件")
+ flag.BoolVar(&dryRun, "dry-run", false, "仅解析并打印摘要,不写入数据库")
+ flag.IntVar(&timeoutSeconds, "timeout", 45, "请求超时(秒)")
+ flag.Parse()
+
+ cfg := sensenovaPricingImportConfig{
+ DocsURL: docsURL,
+ ModelsURL: modelsURL,
+ Fixture: fixture,
+ DryRun: dryRun,
+ Timeout: time.Duration(timeoutSeconds) * time.Second,
+ }
+
+ var db *sql.DB
+ var err error
+ if !cfg.DryRun {
+ db, err = subscriptionImportDB()
+ if err != nil {
+ fmt.Fprintf(os.Stderr, "open db: %v\n", err)
+ os.Exit(1)
+ }
+ defer db.Close()
+ }
+
+ if err := runSensenovaPricingImport(cfg, db, os.Stdout); err != nil {
+ fmt.Fprintf(os.Stderr, "import_sensenova_pricing: %v\n", err)
+ os.Exit(1)
+ }
+}
+
+func runSensenovaPricingImport(cfg sensenovaPricingImportConfig, db *sql.DB, out io.Writer) error {
+ fixture, err := fetchSensenovaPricingFixture(cfg)
+ if err != nil {
+ return err
+ }
+ records, err := parseSensenovaPricingCatalog(fixture)
+ if err != nil {
+ return err
+ }
+ records = dedupeOfficialPricingRecords(records)
+ if len(records) == 0 {
+ return fmt.Errorf("unexpected sensenova pricing content: no records")
+ }
+ if cfg.DryRun {
+ _, err = fmt.Fprintf(out, "source=sensenova-pricing-import models=%d operator=%s dry_run=true\n", len(records), records[0].OperatorName)
+ return err
+ }
+ if db == nil {
+ return fmt.Errorf("db is required when dry-run=false")
+ }
+ if err := upsertOfficialPricingRecords(db, records, "sensenova-pricing-import"); err != nil {
+ return err
+ }
+ var tableRows int
+ if err := db.QueryRow(`SELECT COUNT(*) FROM region_pricing`).Scan(&tableRows); err != nil {
+ return fmt.Errorf("count region_pricing: %w", err)
+ }
+ _, err = fmt.Fprintf(out, "source=sensenova-pricing-import models=%d operator=%s table_rows=%d dry_run=false\n", len(records), records[0].OperatorName, tableRows)
+ return err
+}
+
+func fetchSensenovaPricingFixture(cfg sensenovaPricingImportConfig) (sensenovaPricingFixture, error) {
+ if strings.TrimSpace(cfg.Fixture) != "" {
+ data, err := os.ReadFile(cfg.Fixture)
+ if err != nil {
+ return sensenovaPricingFixture{}, fmt.Errorf("read fixture %s: %w", cfg.Fixture, err)
+ }
+ return splitSensenovaFixture(string(data))
+ }
+
+ docsHTML, err := fetchRenderedPricingPageWithChromium(cfg.DocsURL, cfg.Timeout)
+ if err != nil {
+ return sensenovaPricingFixture{}, fmt.Errorf("fetch docs render: %w", err)
+ }
+ modelsText, err := fetchSensenovaModelsBundle(cfg.ModelsURL, cfg.Timeout)
+ if err != nil {
+ return sensenovaPricingFixture{}, err
+ }
+ return sensenovaPricingFixture{DocsHTML: docsHTML, ModelsText: modelsText}, nil
+}
+
+func splitSensenovaFixture(raw string) (sensenovaPricingFixture, error) {
+ parts := strings.SplitN(raw, sensenovaFixtureSplitMarker, 2)
+ if len(parts) != 2 {
+ return sensenovaPricingFixture{}, fmt.Errorf("unexpected sensenova fixture: missing models bundle marker")
+ }
+ docsHTML := strings.TrimSpace(parts[0])
+ modelsText := strings.TrimSpace(parts[1])
+ if docsHTML == "" || modelsText == "" {
+ return sensenovaPricingFixture{}, fmt.Errorf("unexpected sensenova fixture: empty docs or models segment")
+ }
+ return sensenovaPricingFixture{DocsHTML: docsHTML, ModelsText: modelsText}, nil
+}
+
+func fetchSensenovaModelsBundle(modelsURL string, timeout time.Duration) (string, error) {
+ client := &http.Client{Timeout: timeout}
+ html, err := fetchRawPricingPage(modelsURL, "", client)
+ if err != nil {
+ return "", fmt.Errorf("fetch models page shell: %w", err)
+ }
+ scripts := sensenovaModelsScriptPattern.FindAllStringSubmatch(html, -1)
+ if len(scripts) == 0 {
+ return "", fmt.Errorf("unexpected sensenova models page: no chunk scripts found")
+ }
+ seen := make(map[string]struct{}, len(scripts))
+ for _, match := range scripts {
+ if len(match) != 2 {
+ continue
+ }
+ scriptURL, err := resolveSensenovaAssetURL(modelsURL, match[1])
+ if err != nil {
+ continue
+ }
+ if _, ok := seen[scriptURL]; ok {
+ continue
+ }
+ seen[scriptURL] = struct{}{}
+ bundle, err := fetchRawPricingPage(scriptURL, "", client)
+ if err != nil {
+ continue
+ }
+ if sensenovaBundleConfirmsFreeBeta(bundle) {
+ return bundle, nil
+ }
+ }
+ return "", fmt.Errorf("unexpected sensenova models page: free-beta bundle not found")
+}
+
+func resolveSensenovaAssetURL(baseURL string, assetPath string) (string, error) {
+ parsedBase, err := url.Parse(baseURL)
+ if err != nil {
+ return "", err
+ }
+ asset, err := url.Parse(assetPath)
+ if err != nil {
+ return "", err
+ }
+ return parsedBase.ResolveReference(asset).String(), nil
+}
+
+func sensenovaBundleConfirmsFreeBeta(raw string) bool {
+ hasFree := strings.Contains(raw, "公测期完全免费开放") || strings.Contains(raw, "free during public beta")
+ hasAllModels := strings.Contains(raw, "所有模型完全开放") || strings.Contains(raw, "all models included")
+ return hasFree && hasAllModels
+}
+
+func fetchRenderedPricingPageWithChromium(pageURL string, timeout time.Duration) (string, error) {
+ browserPath, err := lookupChromiumBinaryForSensenova()
+ if err != nil {
+ return "", err
+ }
+ ctx, cancel := context.WithTimeout(context.Background(), timeout)
+ defer cancel()
+ cmd := exec.CommandContext(ctx, browserPath,
+ "--headless",
+ "--no-sandbox",
+ "--disable-gpu",
+ "--virtual-time-budget=8000",
+ "--dump-dom",
+ pageURL,
+ )
+ cmd.Stderr = io.Discard
+ out, err := cmd.Output()
+ if ctx.Err() == context.DeadlineExceeded {
+ return "", fmt.Errorf("chromium render timeout after %s", timeout)
+ }
+ if err != nil {
+ return "", fmt.Errorf("chromium dump-dom: %w", err)
+ }
+ if len(out) == 0 {
+ return "", fmt.Errorf("chromium dump-dom returned empty output")
+ }
+ return string(out), nil
+}
+
+func lookupChromiumBinaryForSensenova() (string, error) {
+ for _, name := range []string{"chromium", "chromium-browser", "google-chrome", "google-chrome-stable"} {
+ if path, err := exec.LookPath(name); err == nil {
+ return path, nil
+ }
+ }
+ return "", fmt.Errorf("no chromium-compatible browser found in PATH")
+}
+
+func parseSensenovaPricingCatalog(fixture sensenovaPricingFixture) ([]officialPricingRecord, error) {
+ if !sensenovaBundleConfirmsFreeBeta(fixture.ModelsText) {
+ return nil, fmt.Errorf("unexpected sensenova models bundle: missing public-beta free signal")
+ }
+ if !strings.Contains(fixture.DocsHTML, "GET https://token.sensenova.cn/v1/models") {
+ return nil, fmt.Errorf("unexpected sensenova docs content: missing list models endpoint")
+ }
+ if !sensenovaPricingZeroPattern.MatchString(fixture.DocsHTML) {
+ return nil, fmt.Errorf("unexpected sensenova docs content: missing zero pricing object example")
+ }
+
+ matches := sensenovaOverviewCardPattern.FindAllStringSubmatch(fixture.DocsHTML, -1)
+ if len(matches) == 0 {
+ return nil, fmt.Errorf("unexpected sensenova docs content: no model overview cards parsed")
+ }
+
+ providerNameCn, providerCountry, providerWebsite := providerMetadata("SenseTime")
+ records := make([]officialPricingRecord, 0, len(matches))
+ seenModelIDs := make(map[string]struct{}, len(matches))
+ for _, match := range matches {
+ if len(match) != 4 {
+ continue
+ }
+ modelName := strings.TrimSpace(match[1])
+ modelID := strings.TrimSpace(match[3])
+ if modelName == "" || modelID == "" {
+ continue
+ }
+ if _, ok := seenModelIDs[modelID]; ok {
+ continue
+ }
+ seenModelIDs[modelID] = struct{}{}
+ sectionID := sensenovaSectionIDForModel(modelID)
+ section, err := extractHTMLSectionByID(fixture.DocsHTML, sectionID)
+ if err != nil {
+ return nil, err
+ }
+ providerName := sensenovaProviderName(modelID)
+ providerCn, providerCountryCode, providerSite := providerNameCn, providerCountry, providerWebsite
+ if providerName != "SenseTime" {
+ providerCn, providerCountryCode, providerSite = providerMetadata(providerName)
+ }
+ records = append(records, officialPricingRecord{
+ ModelID: normalizeExternalID("sensenova", modelID),
+ ModelName: modelName,
+ ProviderName: providerName,
+ ProviderNameCn: providerCn,
+ ProviderCountry: providerCountryCode,
+ ProviderWebsite: providerSite,
+ OperatorName: "SenseNova API",
+ OperatorNameCn: "日日新开放平台",
+ OperatorCountry: "CN",
+ OperatorWebsite: defaultSensenovaDocsURL,
+ OperatorType: "official",
+ Region: "CN",
+ Currency: "CNY",
+ InputPrice: 0,
+ OutputPrice: 0,
+ IsFree: true,
+ ContextLength: sensenovaContextLength(modelID, section),
+ SourceURL: defaultSensenovaDocsURL,
+ ModelSourceURL: firstNonEmptyText(defaultSensenovaDocsURL+"#"+sectionID, defaultSensenovaDocsURL),
+ DateConfidence: "unknown",
+ DateSourceKind: "official_pricing",
+ Modality: sensenovaModality(modelID, section),
+ })
+ }
+ if len(records) == 0 {
+ return nil, fmt.Errorf("unexpected sensenova pricing content: empty records after parse")
+ }
+ return records, nil
+}
+
+func extractHTMLSectionByID(raw string, sectionID string) (string, error) {
+ marker := fmt.Sprintf(`面向真实工作流的轻量多模态智能体模型,支持文本对话与图像输入理解
调用次数限制
每5小时1500次
MODEL ID
sensenova-6.7-flash-lite基于 SenseNova U1 的加速版本,专供信息图(Infographics)生成
调用次数限制
每5小时1500次
MODEL ID
sensenova-u1-fastDeepSeek 高性能对话模型,支持思考/非思考模式、256K 上下文、工具调用
调用次数限制
每5小时150次
MODEL ID
deepseek-v4-flash面向真实工作流的轻量多模态智能体模型,支持文本对话与图像输入理解。
MODEL ID: sensenova-6.7-flash-lite
SenseNova U1 Fast 基于 SenseNova U1 的加速版本,专供信息图(Infographics)生成场景。
MODEL ID: sensenova-u1-fast
注意: U1 Fast 使用独立的图像生成接口 POST /v1/images/generations,不是 Chat Completions;不支持图像输入。
DeepSeek 高性能对话模型,支持思考模式与非思考模式,上下文长度 256K tokens,最大输出 64K tokens,内置 JSON Output、Tool Calls等功能。
MODEL ID: deepseek-v4-flash
GET https://token.sensenova.cn/v1/models{
+ "data": [
+ {
+ "id": "sensenova-6.7-flash-lite",
+ "name": "sensenova-6.7-flash-lite",
+ "created": 1777392000,
+ "input_modalities": ["text", "image"],
+ "output_modalities": ["text"],
+ "quantization": "fp8",
+ "context_length": 262144,
+ "max_output_length": 65536,
+ "pricing": {
+ "prompt": "0",
+ "completion": "0",
+ "image": "0",
+ "request": "0",
+ "input_cache_read": "0"
+ }
+ }
+ ]
+}
+===SENSENOVA_MODELS_BUNDLE===
+{"subtitle":"兼容 OpenAI 接口,按量透明计费,公测期内免费开放","freeDesc":"公测期完全免费开放","promoLine2":",所有模型完全开放"}
diff --git a/scripts/testdata/xfyun_pricing_sample.html b/scripts/testdata/xfyun_pricing_sample.html
new file mode 100644
index 0000000..4eeb8b7
--- /dev/null
+++ b/scripts/testdata/xfyun_pricing_sample.html
@@ -0,0 +1 @@
+