From 6fe3b484f1d6c040e6a1a1aa01e89854535ed60a Mon Sep 17 00:00:00 2001 From: phamnazage-jpg Date: Fri, 22 May 2026 15:28:13 +0800 Subject: [PATCH] feat(pricing): add cucloud and bytedance payg importers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add import_cucloud_pricing.go for 联通云 payg 公开价抓取 - Add import_bytedance_pricing.go for 火山引擎/ByteDance Ark 定价导入 - Include test files and sample testdata for both importers - Update plan catalog inventory docs and seeds - Add cucloud pricing importer implementation plan - Align pipeline scripts and smoke gate tests --- docs/PLAN_CATALOG_INVENTORY.md | 18 +- ...026-05-22-cucloud-pricing-importer-plan.md | 335 ++++++++++++++++++ scripts/import_bytedance_pricing.go | 284 +++++++++++++++ scripts/import_bytedance_pricing_test.go | 82 +++++ scripts/import_cucloud_pricing.go | 320 +++++++++++++++++ scripts/import_cucloud_pricing_test.go | 95 +++++ scripts/import_plan_catalog_test.go | 18 +- scripts/importer_smoke_gate_test.sh | 6 + scripts/pipeline_runtime_alignment_test.sh | 12 + scripts/run_daily.sh | 24 +- scripts/run_intel_pipeline.sh | 10 +- scripts/run_real_pipeline.sh | 18 +- .../testdata/bytedance_ark_pricing_sample.txt | 59 +++ scripts/testdata/cucloud_pricing_sample.html | 62 ++++ scripts/verify_importer_smoke.sh | 9 + ...og_inventory_seed_cn_relays_top20plus.json | 43 ++- ...talog_inventory_seed_cn_vendors_top20.json | 4 +- 17 files changed, 1362 insertions(+), 37 deletions(-) create mode 100644 docs/plans/2026-05-22-cucloud-pricing-importer-plan.md create mode 100644 scripts/import_bytedance_pricing.go create mode 100644 scripts/import_bytedance_pricing_test.go create mode 100644 scripts/import_cucloud_pricing.go create mode 100644 scripts/import_cucloud_pricing_test.go create mode 100644 scripts/testdata/bytedance_ark_pricing_sample.txt create mode 100644 scripts/testdata/cucloud_pricing_sample.html diff --git a/docs/PLAN_CATALOG_INVENTORY.md b/docs/PLAN_CATALOG_INVENTORY.md index 719a5d3..0bb54fe 100644 --- a/docs/PLAN_CATALOG_INVENTORY.md +++ b/docs/PLAN_CATALOG_INVENTORY.md @@ -1,6 +1,6 @@ # Token Plan / Coding Plan 基础目录 -更新时间:2026-05-15(Asia/Shanghai) +更新时间:2026-05-22(Asia/Shanghai) 配套矩阵见:[PLAN_CATALOG_COVERAGE_MATRIX.md](PLAN_CATALOG_COVERAGE_MATRIX.md)。该矩阵按平台/套餐族逐项标注“目录基线 / 目录核验 / 已有 importer / 已真实入库 / 仍缺细颗粒度价格”,用于快速回答覆盖边界问题。 @@ -18,7 +18,7 @@ - 国内官方模型厂家 Top 20 - 国内中转 / 聚合 / 云厂商平台 20+ - 全球官方模型平台与全球多模型中转平台参考集 -- `plan_catalog_inventory` 最终落库 70 条目录记录 +- `plan_catalog_inventory` 最终落库 71 条目录记录 - `subscription_plan` 新增一批手工核实套餐 seed,用于在真正抓取器到位前先支撑日报对比 ## 分类约定 @@ -72,7 +72,7 @@ 10. 天翼云息壤 11. 联通云 AICP 12. 联通云 AI 应用开发平台 -13. 移动云 AI 应用专区 +13. 移动云 MoMA 14. 有道智云 MaaS 15. 360 智脑开放平台 16. 硅基流动 SiliconCloud @@ -202,7 +202,8 @@ 3. `import_zhipu_coding_plan.go` 4. `import_minimax_subscription.go` 5. `import_cucloud_catalog.go` -6. `import_mobile_cloud_catalog.go` +6. `import_mobile_cloud_pricing.go` +7. `import_cucloud_pricing.go` 新增已完成: @@ -221,9 +222,12 @@ 13. `import_qwen_pricing.go` 14. `import_hunyuan_pricing.go` 15. `import_huawei_maas_pricing.go` +16. `import_bytedance_pricing.go` 这些平台统一按 `pay_as_you_go -> region_pricing` 处理,直接抓取官方公开模型价格,不再停留在 `future_official_pricing`。 其中 `SiliconFlow` 当前优先尝试官方价格入口;若入口返回站点落地页或临时不可用,则回退到仓库内最近核验的官方快照,避免日跑流水线因前端路由问题中断。 +其中 `火山方舟` 当前 importer 先落地 `在线推理(常规)` 官方 token 价格;`低延迟/批量/TPM 保障包` 多 service-class 定价,以及 `向量/图片/3D` 这类非对称或非 token 计费项,待 schema 扩展后再细化。 +其中 `联通云` 当前新增 `import_cucloud_pricing.go`:已真实导入 AISP Token Plan 公开披露的 `DeepSeek-V4-Pro / DeepSeek-V4-Flash / MiniMax-M2.5` 三模型 blended 单价与区域支持矩阵;并已在 `plan_catalog_inventory` seed 中补充 `cucloud-aisp-token-plan-pricing` 显式 pricing evidence 行。公开文档仅确认 `按量计费模式` 存在,尚未披露 payg per-model 销售价表,因此不能宣称联通云 payg 已完整打通。 对于暂时没有稳定公开结构化价格页、但官方平台入口已经确认的长尾平台,当前统一归到: @@ -233,6 +237,6 @@ 下一步建议优先级: -1. `火山方舟按量模型价格官方页` -2. `移动云更细颗粒度的模型 API 价格` -3. `联通云更细颗粒度的模型 API 价格` +1. `移动云语音按字符 / 按秒计费如何落 schema` +2. `联通云 payg per-model 价格公开表是否出现` +3. `百川 / 零一万物 / 商汤 / 讯飞 任一官方 payg importer` diff --git a/docs/plans/2026-05-22-cucloud-pricing-importer-plan.md b/docs/plans/2026-05-22-cucloud-pricing-importer-plan.md new file mode 100644 index 0000000..b91cba1 --- /dev/null +++ b/docs/plans/2026-05-22-cucloud-pricing-importer-plan.md @@ -0,0 +1,335 @@ +# 联通云细颗粒度 Pricing Importer 设计计划 + +> For Hermes: Use subagent-driven-development skill to implement this plan task-by-task. + +**Goal:** 在不伪造价格事实的前提下,把联通云从目录级 `import_cucloud_catalog.go` 升级为可验证的细颗粒度 pricing importer。 + +**Architecture:** 采用“两层事实源”设计:第一层抓取帮助中心公开文档中已明确结构化披露的 Token Plan 模型价格与区域支持信息并真实落库;第二层对 AISP 按量计费公开文档仅做“计费模式已验证、具体 per-model payg 单价未公开”的 blocker 标注,不把未公开的销售价伪造成 `region_pricing`。实现上优先复用 `official_pricing_import_common.go`,新增 `import_cucloud_pricing.go`,并保留 `import_cucloud_catalog.go` 作为目录入口校验。 + +**Tech Stack:** Go llm_script importer、联通云帮助中心 SSR HTML、正则/HTML 表格解析、现有 `officialPricingRecord` / `catalogVerificationRecord`。 + +--- + +## 已验证事实 + +1. 目录入口仍有效: + - `https://www.cucloud.cn/act/CloudAI.html` + - 已由现有 `import_cucloud_catalog.go` 校验 AICP / AI 应用开发平台存在。 + +2. AISP 帮助中心公开页面可直接 `GET`,无需登录,且页面源码内内嵌完整文档内容: + - `https://support.cucloud.cn/document/127/591/2357.html?id=2357&folderid=2973`(购买计费) + - `https://support.cucloud.cn/document/127/591/2357.html?id=2357&folderid=3237`(Coding Plan) + - `https://support.cucloud.cn/document/127/591/2357.html?id=2357&folderid=3236`(Token Plan) + +3. 已从公开源码中确认的结构化价格事实: + - Token Plan 个人版: + - Lite 15元/月,600万 tokens + - Pro 30元/月,1200万 tokens + - Max 45元/月,1800万 tokens + - Token Plan 团队版: + - Lite 198元/月,25,000 credits + - Pro 698元/月,100,000 credits + - Max 1398元/月,250,000 credits + - 团队版 credits 对三种模型公开披露了折算综合单价: + - `DeepSeek-V4-Pro`:9.30 元/百万tokens + - `DeepSeek-V4-Flash`:0.70 元/百万tokens + - `MiniMax-M2.5`:1.10 元/百万tokens + +4. 已从公开源码中确认的结构化区域支持事实: + - 区域列:`呼和浩特二区 / 上海二十二区 / 武汉四区 / 济南五区 / 贵阳基地二区` + - 可从 `Token Plan概述` 或 `AI服务平台API介绍` 表格中解析模型-区域支持矩阵。 + +5. 已确认但不能伪造的边界: + - AISP `购买计费/计费项及计费方式`、`按量计费模式` 文档明确说明“按量计费”为官方公开模式,单位为 `元/千 Tokens`,且按所选模型销售价实时累加。 + - 但当前公开帮助中心页面未披露具体每个模型的按量销售价表。 + - 因此现阶段不能把 AISP payg 的 per-model 单价写入 `region_pricing`。 + +## 设计决策 + +### 决策 A:拆成“可落库价格”与“已验证 blocker”两部分 + +1. 可直接落库到 `region_pricing` 的只有: + - Token Plan 团队版中公开给出的三个模型综合单价。 +2. 只能记录 blocker、不能写价格的部分: + - AISP 按量计费 per-model 销售价。 + +理由: +- 公开文档对 payg 的计费机制有描述,但没有模型价格表。 +- 用户明确要求“找不到就标 blocker,不伪造 importer”。 +- Token Plan 团队版三模型综合单价属于公开结构化价格,足够支撑一个“真实 importer v1”。 + +### 决策 B:新增 pricing importer,不覆盖 catalog importer + +保留: +- `scripts/import_cucloud_catalog.go`:继续负责 `cucloud-aicp-platform` / `cucloud-ai-app-platform` 目录存在性校验。 + +新增: +- `scripts/import_cucloud_pricing.go`:负责 AISP Token Plan 公开价格与模型区域支持的结构化导入。 + +理由: +- catalog importer 与 pricing importer 的事实层级不同。 +- 后续若官方公开 payg 模型价表,可在 `import_cucloud_pricing.go` 内扩展,不影响目录校验链路。 + +### 决策 C:v1 只导入三个模型,价格视为 blended price + +v1 导入模型: +- `DeepSeek-V4-Pro` +- `DeepSeek-V4-Flash` +- `MiniMax-M2.5` + +价格写法: +- 因公开文档给的是“综合单价 X 元/百万tokens”,不是 input/output 分拆价; +- v1 写入 `officialPricingRecord` 时采用: + - `InputPrice = blendedPrice` + - `OutputPrice = blendedPrice` +- 同时在 `SourceURL` / `notes` / 文档中明确这是 `Token Plan blended price`,不是 AISP payg 的 input/output 拆分价。 + +风险: +- 这不适合作为严格意义上的 OpenAI-style in/out token 定价比较。 +- 但比继续停留在目录级“无细颗粒度价格”更真实,且不会伪造不可得的 input/output 拆分。 + +### 决策 D:区域粒度以“支持矩阵交集”写入 + +建议落库策略: +- 仅对公开支持该模型的区域写入 region_pricing 记录。 +- 例如: + - `DeepSeek-V4-Pro` -> `贵阳基地二区` + - `DeepSeek-V4-Flash` -> `贵阳基地二区`(团队版表明确)以及个人版支持区域若文档已写明,可谨慎扩展,但 v1 优先用矩阵表而不是 prose。 + - `MiniMax-M2.5` -> 从矩阵表取支持区域。 + +理由: +- 同一模型在联通云并非全区域可用。 +- 使用支持矩阵可避免写出不存在的区域价格。 + +## 文件设计 + +### 1. 新增 importer + +**Create:** `scripts/import_cucloud_pricing.go` + +职责: +1. 获取公开帮助中心页面 HTML +2. 修复页面源码中的 UTF-8 / Latin1 混杂问题 +3. 从页面源码中定位目标文档 `content` 块: + - `Token Plan概述` + - `AI服务平台API介绍` 或 `各云区域模型支持情况` + - `计费项及计费方式` + - `按量计费模式` +4. 解析: + - Token Plan 团队版三模型综合单价表 + - 模型-区域支持矩阵表 +5. 生成 `officialPricingRecord` +6. dry-run 输出: + - 记录数 + - 模型数 + - 区域数 + - 是否检测到 `payg_mode_confirmed=true` + - `payg_price_table_public=false` + +建议 CLI 参数: +- `-url`:默认购买计费页或 Token Plan 页 +- `-fixture`:本地样例 HTML +- `-dry-run` +- `-timeout` + +### 2. 新增测试 + +**Create:** `scripts/import_cucloud_pricing_test.go` + +测试覆盖: +1. 能从 fixture 中解析三模型 blended 价格 +2. 能解析区域支持矩阵 +3. 仅为支持区域生成记录 +4. dry-run 摘要包含: + - `source=cucloud-pricing-import` + - `models=3` + - `payg_mode_confirmed=true` + - `payg_price_table_public=false` +5. 若 fixture 缺少三模型价格表,测试应 fail + +### 3. 新增 fixture + +**Create:** `scripts/testdata/cucloud_pricing_sample.html` + +内容最少应覆盖: +- Token Plan 团队版价格表 +- 三模型 `综合单价X元/百万tokens` +- 区域支持矩阵表 +- `按量计费` 文本(用于 blocker 语义断言) + +### 4. runtime 接入 + +**Modify:** +- `scripts/run_intel_pipeline.sh` +- `scripts/run_real_pipeline.sh` +- `scripts/run_daily.sh` +- `scripts/verify_importer_smoke.sh` +- `scripts/importer_smoke_gate_test.sh` +- `scripts/pipeline_runtime_alignment_test.sh` + +接入方式: +- 保留 `cucloud_catalog` +- 新增 `cucloud_pricing` +- 失败消息区分: + - 目录失败:`联通云目录校验失败` + - 价格失败:`联通云 Token Plan 价格导入失败` + +### 5. seed / docs 同步 + +**Modify:** +- `seeds/plan_catalog_inventory_seed_cn_relays_top20plus.json` +- `docs/PLAN_CATALOG_COVERAGE_MATRIX.md` +- `docs/NEXT_IMPORTER_RUNTIME_PRIORITY.md` +- `docs/PLAN_CATALOG_INVENTORY.md` +- `scripts/import_plan_catalog_test.go` + +同步原则: +- `cucloud-aicp-platform` / `cucloud-ai-app-platform` 仍指向 `import_cucloud_catalog.go` +- 如新增联通云价格型 catalogCode,则新建 seed 项;否则仅在 docs 中注明: + - `目录入口已导入` + - `Token Plan 三模型 blended price 已导入` + - `AISP payg per-model price table 仍未公开` + +## 推荐实现顺序 + +### Task 1: 固化 discovery 结果到 fixture 与计划文档 + +**Objective:** 把已验证的公开证据固化为可重复测试输入。 + +**Files:** +- Create: `scripts/testdata/cucloud_pricing_sample.html` +- Create: `docs/plans/2026-05-22-cucloud-pricing-importer-plan.md` + +**Step 1: 写 fixture** +- 从公开页面中裁剪最小必要 HTML 片段: + - Token Plan 三模型价格表 + - 区域支持矩阵表 + - 按量计费说明段落 + +**Step 2: 验证 fixture 可读** +Run: +- `python3 - <<'PY' ...` 或 importer 单测读取 fixture +Expected: +- 能定位三张关键表 / 段落 + +### Task 2: 先写失败测试 + +**Objective:** 先锁定 importer 的真实合同。 + +**Files:** +- Create: `scripts/import_cucloud_pricing_test.go` + +**Step 1: 写 failing tests** +至少包括: +- `TestParseCUCloudPricingBuildsBlendedRecords` +- `TestParseCUCloudPricingBuildsRegionMatrix` +- `TestRunCUCloudPricingImportDryRunPrintsSummary` + +**Step 2: 运行测试确认失败** +Run: +- `go test -tags llm_script ./scripts/subscription_import_common.go ./scripts/official_pricing_import_common.go ./scripts/import_cucloud_pricing.go ./scripts/import_cucloud_pricing_test.go` +Expected: +- FAIL,因为 importer 尚不存在或解析逻辑未实现 + +### Task 3: 实现最小 importer + +**Objective:** 只实现三模型 blended price + 区域支持矩阵。 + +**Files:** +- Create: `scripts/import_cucloud_pricing.go` + +**实现要点:** +1. 获取 HTML +2. `latin1 -> utf8` 修正 +3. 通过最近的 `"content":"...","createBy"` 边界提取目标内容块,而不是依赖简单 title-first regex +4. 表格解析: + - Table A:团队版三模型综合单价 + - Table B:模型区域支持矩阵 +5. 产出 `officialPricingRecord` + - `OperatorName`: `Unicom AISP` + - `OperatorNameCn`: `联通云 AI服务平台AISP` + - `OperatorWebsite`: `https://www.cucloud.cn` + - `SourceURL`: 购买计费 / Token Plan 页面 + - `Currency`: `CNY` + - `InputPrice == OutputPrice == blendedPrice` + - `Region`: 匹配支持矩阵中的具体云区域 +6. dry-run 摘要要显式输出: + - `payg_mode_confirmed=true` + - `payg_price_table_public=false` + +### Task 4: 运行 focused tests + +**Objective:** 验证 importer 合同成立。 + +Run: +- `go test -tags llm_script ./scripts/subscription_import_common.go ./scripts/official_pricing_import_common.go ./scripts/import_cucloud_pricing.go ./scripts/import_cucloud_pricing_test.go` +Expected: +- PASS + +### Task 5: 接入 smoke / pipeline + +**Objective:** 让新 importer 进入日跑链路,但不移除 catalog importer。 + +**Files:** +- Modify: `scripts/verify_importer_smoke.sh` +- Modify: `scripts/importer_smoke_gate_test.sh` +- Modify: `scripts/pipeline_runtime_alignment_test.sh` +- Modify: `scripts/run_intel_pipeline.sh` +- Modify: `scripts/run_real_pipeline.sh` +- Modify: `scripts/run_daily.sh` + +**Step 1: 增加 `cucloud-pricing-fixture` / `cucloud-pricing-live` smoke** +**Step 2: 增加 runtime source key `cucloud_pricing`** +**Step 3: 保留 `cucloud_catalog`** + +### Task 6: 文档 truth-sync + +**Objective:** 把联通云状态从“只有目录级”升级为“目录+部分结构化价格”。 + +**Files:** +- Modify: `docs/PLAN_CATALOG_COVERAGE_MATRIX.md` +- Modify: `docs/NEXT_IMPORTER_RUNTIME_PRIORITY.md` +- Modify: `docs/PLAN_CATALOG_INVENTORY.md` +- Modify: `seeds/plan_catalog_inventory_seed_cn_relays_top20plus.json`(如需要) + +**文案要求:** +- 明确写: + - Token Plan 三模型 blended price 已真实导入 + - AISP payg per-model 单价未公开,仍属 blocker +- 禁止写成“联通云 payg 已完整打通” + +## 验证命令 + +### Focused unit tests +- `go test -tags llm_script ./scripts/subscription_import_common.go ./scripts/official_pricing_import_common.go ./scripts/import_cucloud_pricing.go ./scripts/import_cucloud_pricing_test.go` + +### Plan catalog mapping tests +- `go test -tags llm_script ./scripts/subscription_import_common.go ./scripts/import_plan_catalog.go ./scripts/import_plan_catalog_test.go` + +### Shell gates +- `bash scripts/pipeline_runtime_alignment_test.sh` +- `bash scripts/importer_smoke_gate_test.sh` + +### Live dry-run +- `go run -tags llm_script ./scripts/subscription_import_common.go ./scripts/official_pricing_import_common.go ./scripts/import_cucloud_pricing.go -dry-run` + +**Expected live dry-run truth:** +- 只宣称 Token Plan 三模型 blended price 已导入 +- 同时输出 / 记录 `payg_mode_confirmed=true` +- 同时输出 / 记录 `payg_price_table_public=false` + +## 非目标 + +1. 不伪造 AISP payg per-model input/output 单价 +2. 不把 Token Plan blended price 冒充为 OpenAI 风格 input/output split price +3. 不删除现有 `import_cucloud_catalog.go` +4. 不在未发现公开价表前宣称“联通云细颗粒度价格已完整闭环” + +## 当前最短闭环路径 + +1. 先实现 `import_cucloud_pricing.go` v1 +2. 只导入三模型 Token Plan blended price + 区域支持矩阵 +3. runtime/smoke 接入 +4. docs 标明 payg per-model price 仍是 verified blocker + +这条路径能把联通云从“纯目录级”提升到“部分结构化价格已真实落库”,同时保持事实边界清晰。 \ No newline at end of file diff --git a/scripts/import_bytedance_pricing.go b/scripts/import_bytedance_pricing.go new file mode 100644 index 0000000..24cd743 --- /dev/null +++ b/scripts/import_bytedance_pricing.go @@ -0,0 +1,284 @@ +//go:build llm_script + +package main + +import ( + "database/sql" + "encoding/json" + "flag" + "fmt" + "html" + "io" + "net/http" + "os" + "regexp" + "strings" + "time" +) + +const defaultBytedanceArkPricingURL = "https://www.volcengine.com/docs/82379/1544106" + +type bytedanceArkPricingImportConfig struct { + URL string + Fixture string + DryRun bool + Timeout time.Duration +} + +func main() { + loadSubscriptionImportEnv() + + var url string + var fixture string + var dryRun bool + var timeoutSeconds int + + flag.StringVar(&url, "url", defaultBytedanceArkPricingURL, "火山方舟官方模型价格页") + flag.StringVar(&fixture, "fixture", "", "火山方舟价格样例文件") + flag.BoolVar(&dryRun, "dry-run", false, "仅解析并打印摘要,不写入数据库") + flag.IntVar(&timeoutSeconds, "timeout", 20, "请求超时(秒)") + flag.Parse() + + cfg := bytedanceArkPricingImportConfig{URL: url, Fixture: fixture, DryRun: dryRun, Timeout: time.Duration(timeoutSeconds) * time.Second} + + var db *sql.DB + var err error + if !cfg.DryRun { + db, err = subscriptionImportDB() + if err != nil { + fmt.Fprintf(os.Stderr, "open db: %v\n", err) + os.Exit(1) + } + defer db.Close() + } + + if err := runBytedanceArkPricingImport(cfg, db, os.Stdout); err != nil { + fmt.Fprintf(os.Stderr, "import_bytedance_pricing: %v\n", err) + os.Exit(1) + } +} + +func runBytedanceArkPricingImport(cfg bytedanceArkPricingImportConfig, db *sql.DB, out io.Writer) error { + client := &http.Client{Timeout: cfg.Timeout} + raw, err := fetchRawPricingPage(cfg.URL, cfg.Fixture, client) + if err != nil { + return err + } + records, err := parseBytedanceArkPricingCatalog(raw) + if err != nil { + return err + } + records = dedupeOfficialPricingRecords(records) + if cfg.DryRun { + _, err = fmt.Fprintf(out, "source=bytedance-pricing-import models=%d operator=%s dry_run=true\n", len(records), records[0].OperatorName) + return err + } + if db == nil { + return fmt.Errorf("db is required when dry-run=false") + } + if err := upsertOfficialPricingRecords(db, records, "bytedance-pricing-import"); err != nil { + return err + } + var tableRows int + if err := db.QueryRow(`SELECT COUNT(*) FROM region_pricing`).Scan(&tableRows); err != nil { + return fmt.Errorf("count region_pricing: %w", err) + } + _, err = fmt.Fprintf(out, "source=bytedance-pricing-import models=%d operator=%s table_rows=%d dry_run=false\n", len(records), records[0].OperatorName, tableRows) + return err +} + +func parseBytedanceArkPricingCatalog(raw string) ([]officialPricingRecord, error) { + markdown, err := extractBytedanceArkPricingMarkdown(raw) + if err != nil { + return nil, err + } + rows, err := extractMarkdownTableRowsForHeading(markdown, "## 在线推理(常规)") + if err != nil { + return nil, err + } + if len(rows) < 2 { + return nil, fmt.Errorf("unexpected bytedance ark pricing table") + } + + records := make([]officialPricingRecord, 0, len(rows)-1) + for _, row := range rows[1:] { + if len(row) < 6 { + continue + } + modelName := cleanBytedanceArkCell(row[0]) + if modelName == "" || isBytedanceArkConditionRow(modelName) { + continue + } + inputPrice := bytedanceArkPriceValue(row[2]) + outputPrice := bytedanceArkPriceValue(row[5]) + if inputPrice <= 0 || outputPrice <= 0 { + continue + } + providerName := bytedanceArkProviderName(modelName) + providerNameCn, providerCountry, providerWebsite := providerMetadata(providerName) + records = append(records, officialPricingRecord{ + ModelID: normalizeExternalID("bytedance", modelName), + ModelName: modelName, + ProviderName: providerName, + ProviderNameCn: providerNameCn, + ProviderCountry: providerCountry, + ProviderWebsite: providerWebsite, + OperatorName: "ByteDance Volcano", + OperatorNameCn: "火山引擎", + OperatorCountry: "CN", + OperatorWebsite: "https://www.volcengine.com/product/ark", + OperatorType: "official", + Region: "CN", + Currency: "CNY", + InputPrice: inputPrice, + OutputPrice: outputPrice, + SourceURL: defaultBytedanceArkPricingURL, + ModelSourceURL: defaultBytedanceArkPricingURL, + DateConfidence: "unknown", + DateSourceKind: "official_pricing", + Modality: detectModality(modelName), + }) + } + if len(records) == 0 { + return nil, fmt.Errorf("no bytedance ark input/output pricing rows found") + } + return records, nil +} + +func extractBytedanceArkPricingMarkdown(raw string) (string, error) { + if !strings.Contains(raw, "window._ROUTER_DATA = ") { + return raw, nil + } + jsonText, err := extractJSONAfterMarker(raw, "window._ROUTER_DATA = ") + if err != nil { + return "", err + } + var envelope map[string]any + if err := json.Unmarshal([]byte(jsonText), &envelope); err != nil { + return "", fmt.Errorf("parse bytedance router json: %w", err) + } + loaderData, _ := envelope["loaderData"].(map[string]any) + page, _ := loaderData["docs/(libid)/(docid$)/page"].(map[string]any) + curDoc, _ := page["curDoc"].(map[string]any) + markdown, _ := curDoc["MDContent"].(string) + if strings.TrimSpace(markdown) == "" { + return "", fmt.Errorf("missing bytedance pricing markdown content") + } + return markdown, nil +} + +func extractJSONAfterMarker(raw string, marker string) (string, error) { + start := strings.Index(raw, marker) + if start < 0 { + return "", fmt.Errorf("marker %q not found", marker) + } + start += len(marker) + braceDepth := 0 + inString := false + escaped := false + end := -1 + for i := start; i < len(raw); i++ { + ch := raw[i] + if inString { + if escaped { + escaped = false + continue + } + switch ch { + case '\\': + escaped = true + case '"': + inString = false + } + continue + } + switch ch { + case '"': + inString = true + case '{': + braceDepth++ + case '}': + braceDepth-- + if braceDepth == 0 { + end = i + 1 + i = len(raw) + } + } + } + if end <= start { + return "", fmt.Errorf("unable to locate router json boundary") + } + return raw[start:end], nil +} + +func extractMarkdownTableRowsForHeading(markdown string, heading string) ([][]string, error) { + lines := strings.Split(markdown, "\n") + capturing := false + rows := make([][]string, 0) + for _, line := range lines { + trimmed := strings.TrimSpace(line) + switch { + case trimmed == heading: + capturing = true + case capturing && strings.HasPrefix(trimmed, "#") && trimmed != heading: + if len(rows) > 0 { + return rows, nil + } + capturing = false + } + if !capturing || !strings.HasPrefix(trimmed, "|") || strings.Contains(trimmed, "|---") { + continue + } + cells := strings.Split(strings.Trim(trimmed, "|"), "|") + for i := range cells { + cells[i] = strings.TrimSpace(cells[i]) + } + rows = append(rows, cells) + } + if len(rows) == 0 { + return nil, fmt.Errorf("missing markdown table for heading %s", heading) + } + return rows, nil +} + +func cleanBytedanceArkCell(raw string) string { + cleaned := html.UnescapeString(strings.TrimSpace(raw)) + cleaned = strings.ReplaceAll(cleaned, `\-`, "-") + cleaned = strings.ReplaceAll(cleaned, `\`, "") + cleaned = strings.ReplaceAll(cleaned, "

", " ") + cleaned = strings.ReplaceAll(cleaned, "
", " ") + cleaned = strings.ReplaceAll(cleaned, "
", " ") + cleaned = strings.ReplaceAll(cleaned, "
", " ") + cleaned = regexp.MustCompile(`(?is)<[^>]+>`).ReplaceAllString(cleaned, " ") + cleaned = regexp.MustCompile(`\s+`).ReplaceAllString(cleaned, " ") + return strings.TrimSpace(cleaned) +} + +func bytedanceArkPriceValue(raw string) float64 { + cleaned := cleanBytedanceArkCell(raw) + if cleaned == "" || strings.Contains(cleaned, "不支持") { + return 0 + } + match := regexp.MustCompile(`([0-9]+(?:\.[0-9]+)?)`).FindStringSubmatch(cleaned) + if len(match) != 2 { + return 0 + } + return mustParseSubscriptionPrice(match[1]) +} + +func isBytedanceArkConditionRow(value string) bool { + lower := strings.ToLower(strings.TrimSpace(value)) + return lower == "" || strings.HasPrefix(lower, "输入长度") +} + +func bytedanceArkProviderName(modelName string) string { + lower := strings.ToLower(strings.TrimSpace(modelName)) + switch { + case strings.HasPrefix(lower, "deepseek"): + return "DeepSeek" + case strings.HasPrefix(lower, "glm"): + return "Zhipu AI" + default: + return "ByteDance" + } +} diff --git a/scripts/import_bytedance_pricing_test.go b/scripts/import_bytedance_pricing_test.go new file mode 100644 index 0000000..4bb0ecf --- /dev/null +++ b/scripts/import_bytedance_pricing_test.go @@ -0,0 +1,82 @@ +//go:build llm_script + +package main + +import ( + "bytes" + "os" + "path/filepath" + "strings" + "testing" +) + +func TestParseBytedanceArkPricingCatalogBuildsRecords(t *testing.T) { + raw, err := os.ReadFile(filepath.Join("testdata", "bytedance_ark_pricing_sample.txt")) + if err != nil { + t.Fatalf("读取 fixture 失败: %v", err) + } + + records, err := parseBytedanceArkPricingCatalog(string(raw)) + if err != nil { + t.Fatalf("parseBytedanceArkPricingCatalog 返回错误: %v", err) + } + if len(records) != 20 { + t.Fatalf("期望 20 条火山方舟价格记录,实际 %d", len(records)) + } + recordMap := make(map[string]officialPricingRecord, len(records)) + for _, record := range records { + recordMap[record.ModelID] = record + } + if recordMap["bytedance-doubao-seed-2-0-pro"].InputPrice != 3.2 || recordMap["bytedance-doubao-seed-2-0-pro"].OutputPrice != 16.0 { + t.Fatalf("doubao-seed-2.0-pro 基线价格错误: %+v", recordMap["bytedance-doubao-seed-2-0-pro"]) + } + if recordMap["bytedance-doubao-seed-1-8"].OutputPrice != 2.0 { + t.Fatalf("doubao-seed-1.8 应取首个阶梯输出价 2.0,实际 %v", recordMap["bytedance-doubao-seed-1-8"].OutputPrice) + } + if recordMap["bytedance-glm-4-7"].ProviderName != "Zhipu AI" || recordMap["bytedance-glm-4-7"].InputPrice != 2.0 || recordMap["bytedance-glm-4-7"].OutputPrice != 8.0 { + t.Fatalf("glm-4.7 解析错误: %+v", recordMap["bytedance-glm-4-7"]) + } + if recordMap["bytedance-deepseek-v3-2"].ProviderName != "DeepSeek" { + t.Fatalf("deepseek-v3.2 provider 归一化错误: %+v", recordMap["bytedance-deepseek-v3-2"]) + } + if recordMap["bytedance-doubao-1-5-vision-pro"].Modality != "multimodal" { + t.Fatalf("doubao-1.5-vision-pro modality 错误: %+v", recordMap["bytedance-doubao-1-5-vision-pro"]) + } + if _, ok := recordMap["bytedance-doubao-embedding-vision"]; ok { + t.Fatalf("当前 schema 不支持仅输入计费的向量模型,不应误入库") + } +} + +func TestExtractBytedanceArkPricingMarkdownFromRouterData(t *testing.T) { + html := `` + markdown, err := extractBytedanceArkPricingMarkdown(html) + if err != nil { + t.Fatalf("extractBytedanceArkPricingMarkdown 返回错误: %v", err) + } + if !strings.Contains(markdown, "doubao\\-seed\\-2.0\\-mini") { + t.Fatalf("提取后的 markdown 缺少模型行: %q", markdown) + } +} + +func TestRunBytedanceArkPricingImportDryRunPrintsSummary(t *testing.T) { + var out bytes.Buffer + err := runBytedanceArkPricingImport(bytedanceArkPricingImportConfig{ + URL: defaultBytedanceArkPricingURL, + Fixture: filepath.Join("testdata", "bytedance_ark_pricing_sample.txt"), + DryRun: true, + }, nil, &out) + if err != nil { + t.Fatalf("runBytedanceArkPricingImport 返回错误: %v", err) + } + output := out.String() + for _, want := range []string{ + "source=bytedance-pricing-import", + "models=20", + "operator=ByteDance Volcano", + "dry_run=true", + } { + if !strings.Contains(output, want) { + t.Fatalf("输出缺少 %q,实际: %q", want, output) + } + } +} diff --git a/scripts/import_cucloud_pricing.go b/scripts/import_cucloud_pricing.go new file mode 100644 index 0000000..15fa015 --- /dev/null +++ b/scripts/import_cucloud_pricing.go @@ -0,0 +1,320 @@ +//go:build llm_script + +package main + +import ( + "database/sql" + "flag" + "fmt" + "html" + "io" + "net/http" + "os" + "regexp" + "strings" + "time" +) + +const defaultCUCloudPricingURL = "https://support.cucloud.cn/document/127/591/2357.html?id=2357&folderid=3236" + +type cucloudPricingImportConfig struct { + URL string + Fixture string + DryRun bool + Timeout time.Duration +} + +type cucloudPricingSummary struct { + Models int + Records int + Regions int + PaygModeConfirmed bool + PaygPriceTablePublic bool +} + +var cucloudRequiredModels = []string{"DeepSeek-V4-Pro", "DeepSeek-V4-Flash", "MiniMax-M2.5"} + +func main() { + loadSubscriptionImportEnv() + + var url string + var fixture string + var dryRun bool + var timeoutSeconds int + + flag.StringVar(&url, "url", defaultCUCloudPricingURL, "联通云 AISP Token Plan 页面") + flag.StringVar(&fixture, "fixture", "", "联通云价格样例文件") + flag.BoolVar(&dryRun, "dry-run", false, "仅解析并打印摘要,不写入数据库") + flag.IntVar(&timeoutSeconds, "timeout", 20, "请求超时(秒)") + flag.Parse() + + cfg := cucloudPricingImportConfig{URL: url, Fixture: fixture, DryRun: dryRun, Timeout: time.Duration(timeoutSeconds) * time.Second} + + var db *sql.DB + var err error + if !cfg.DryRun { + db, err = subscriptionImportDB() + if err != nil { + fmt.Fprintf(os.Stderr, "open db: %v\n", err) + os.Exit(1) + } + defer db.Close() + } + + if err := runCUCloudPricingImport(cfg, db, os.Stdout); err != nil { + fmt.Fprintf(os.Stderr, "import_cucloud_pricing: %v\n", err) + os.Exit(1) + } +} + +func runCUCloudPricingImport(cfg cucloudPricingImportConfig, db *sql.DB, out io.Writer) error { + client := &http.Client{Timeout: cfg.Timeout} + raw, err := fetchRawPricingPage(cfg.URL, cfg.Fixture, client) + if err != nil { + return err + } + records, summary, err := parseCUCloudPricingCatalog(raw, cfg.URL) + if err != nil { + return err + } + records = dedupeOfficialPricingRecords(records) + if cfg.DryRun { + _, err = fmt.Fprintf(out, "source=cucloud-pricing-import models=%d records=%d regions=%d operator=%s payg_mode_confirmed=%t payg_price_table_public=%t dry_run=true\n", + summary.Models, summary.Records, summary.Regions, records[0].OperatorName, summary.PaygModeConfirmed, summary.PaygPriceTablePublic) + return err + } + if db == nil { + return fmt.Errorf("db is required when dry-run=false") + } + if err := upsertOfficialPricingRecords(db, records, "cucloud-pricing-import"); err != nil { + return err + } + var tableRows int + if err := db.QueryRow(`SELECT COUNT(*) FROM region_pricing`).Scan(&tableRows); err != nil { + return fmt.Errorf("count region_pricing: %w", err) + } + _, err = fmt.Fprintf(out, "source=cucloud-pricing-import models=%d records=%d regions=%d operator=%s table_rows=%d payg_mode_confirmed=%t payg_price_table_public=%t dry_run=false\n", + summary.Models, summary.Records, summary.Regions, records[0].OperatorName, tableRows, summary.PaygModeConfirmed, summary.PaygPriceTablePublic) + return err +} + +func parseCUCloudPricingCatalog(raw string, sourceURL string) ([]officialPricingRecord, cucloudPricingSummary, error) { + normalized := normalizeCUCloudRaw(raw) + priceMap, err := extractCUCloudBlendedPrices(normalized) + if err != nil { + return nil, cucloudPricingSummary{}, err + } + regionMap, err := extractCUCloudRegionSupport(normalized) + if err != nil { + return nil, cucloudPricingSummary{}, err + } + + records := make([]officialPricingRecord, 0) + modelSet := make(map[string]struct{}) + regionSet := make(map[string]struct{}) + for _, modelName := range cucloudRequiredModels { + price, ok := priceMap[modelName] + if !ok { + return nil, cucloudPricingSummary{}, fmt.Errorf("missing blended price for %s", modelName) + } + regions := regionMap[modelName] + if len(regions) == 0 { + return nil, cucloudPricingSummary{}, fmt.Errorf("missing supported regions for %s", modelName) + } + providerName := cucloudProviderName(modelName) + providerNameCn, providerCountry, providerWebsite := providerMetadata(providerName) + for _, region := range regions { + records = append(records, officialPricingRecord{ + ModelID: normalizeExternalID("cucloud", "aisp", modelName), + ModelName: modelName, + ProviderName: providerName, + ProviderNameCn: providerNameCn, + ProviderCountry: providerCountry, + ProviderWebsite: providerWebsite, + OperatorName: "Unicom AISP", + OperatorNameCn: "联通云 AI服务平台AISP", + OperatorCountry: "CN", + OperatorWebsite: "https://www.cucloud.cn", + OperatorType: "official", + Region: region, + Currency: "CNY", + InputPrice: price, + OutputPrice: price, + SourceURL: sourceURL, + ModelSourceURL: sourceURL, + DateConfidence: "unknown", + DateSourceKind: "official_pricing", + Modality: detectModality(modelName), + }) + regionSet[region] = struct{}{} + } + modelSet[modelName] = struct{}{} + } + if len(records) == 0 { + return nil, cucloudPricingSummary{}, fmt.Errorf("no cucloud pricing records found") + } + summary := cucloudPricingSummary{ + Models: len(modelSet), + Records: len(records), + Regions: len(regionSet), + PaygModeConfirmed: cucloudPaygModeConfirmed(normalized), + PaygPriceTablePublic: cucloudHasPublicPaygPriceTable(normalized), + } + return records, summary, nil +} + +func normalizeCUCloudRaw(raw string) string { + raw = strings.ReplaceAll(raw, `\u003c`, "<") + raw = strings.ReplaceAll(raw, `\u003e`, ">") + raw = strings.ReplaceAll(raw, `\u0026nbsp;`, " ") + raw = strings.ReplaceAll(raw, `\n`, "\n") + raw = strings.ReplaceAll(raw, `\t`, " ") + raw = strings.ReplaceAll(raw, `\r`, "\n") + raw = html.UnescapeString(raw) + return raw +} + +func extractCUCloudBlendedPrices(raw string) (map[string]float64, error) { + for _, table := range cucloudTableBlocks(raw) { + rows := cucloudTableRows(table) + if len(rows) == 0 { + continue + } + prices := make(map[string]float64) + for _, cell := range rows[0] { + modelName, price, ok := cucloudBlendedPriceCell(cell) + if ok { + prices[modelName] = price + } + } + if cucloudHasAllRequiredModels(prices) { + return prices, nil + } + } + return nil, fmt.Errorf("unexpected cucloud blended price table") +} + +func cucloudBlendedPriceCell(raw string) (string, float64, bool) { + cleaned := strings.TrimSpace(cleanHTMLText(raw)) + match := regexp.MustCompile(`^(.*?)\s*综合单价\s*([0-9]+(?:\.[0-9]+)?)元/百万tokens$`).FindStringSubmatch(cleaned) + if len(match) != 3 { + return "", 0, false + } + modelName := strings.TrimSpace(match[1]) + if modelName == "" { + return "", 0, false + } + return modelName, mustParseSubscriptionPrice(match[2]), true +} + +func extractCUCloudRegionSupport(raw string) (map[string][]string, error) { + for _, table := range cucloudTableBlocks(raw) { + rows := cucloudTableRows(table) + if len(rows) < 2 { + continue + } + headers := rows[0] + if len(headers) < 2 || strings.TrimSpace(headers[0]) != "模型" { + continue + } + if !strings.Contains(strings.Join(headers, "|"), "贵阳基地二区") { + continue + } + regionMap := make(map[string][]string) + regions := headers[1:] + for _, row := range rows[1:] { + if len(row) < len(regions)+1 { + continue + } + modelName := strings.TrimSpace(row[0]) + if modelName == "" { + continue + } + supported := make([]string, 0) + for idx, region := range regions { + if strings.Contains(strings.TrimSpace(row[idx+1]), "支持") { + supported = append(supported, strings.TrimSpace(region)) + } + } + if len(supported) > 0 { + regionMap[modelName] = supported + } + } + if cucloudHasAllRequiredRegionRows(regionMap) { + return regionMap, nil + } + } + return nil, fmt.Errorf("unexpected cucloud region support table") +} + +func cucloudTableBlocks(raw string) []string { + pattern := regexp.MustCompile(`(?is)]*>.*?`) + return pattern.FindAllString(raw, -1) +} + +func cucloudTableRows(table string) [][]string { + rowPattern := regexp.MustCompile(`(?is)]*>(.*?)`) + cellPattern := regexp.MustCompile(`(?is)]*>(.*?)`) + matches := rowPattern.FindAllStringSubmatch(table, -1) + rows := make([][]string, 0, len(matches)) + for _, rowMatch := range matches { + cells := cellPattern.FindAllStringSubmatch(rowMatch[1], -1) + if len(cells) == 0 { + continue + } + row := make([]string, 0, len(cells)) + for _, cell := range cells { + row = append(row, strings.TrimSpace(cleanHTMLText(cell[1]))) + } + rows = append(rows, row) + } + return rows +} + +func cucloudPaygModeConfirmed(raw string) bool { + text := cleanHTMLText(raw) + return strings.Contains(text, "按量计费") && (strings.Contains(text, "元/千 Tokens") || strings.Contains(text, "元/千Tokens")) +} + +func cucloudHasPublicPaygPriceTable(raw string) bool { + for _, table := range cucloudTableBlocks(raw) { + text := cleanHTMLText(table) + if !(strings.Contains(text, "元/千 Tokens") || strings.Contains(text, "元/千Tokens")) { + continue + } + if strings.Contains(text, "DeepSeek-V4-Pro") || strings.Contains(text, "MiniMax-M2.5") || strings.Contains(text, "DeepSeek-V4-Flash") { + return true + } + } + return false +} + +func cucloudProviderName(modelName string) string { + lower := strings.ToLower(strings.TrimSpace(modelName)) + switch { + case strings.HasPrefix(lower, "deepseek"): + return "DeepSeek" + case strings.HasPrefix(lower, "minimax"): + return "MiniMax" + default: + return "unknown" + } +} + +func cucloudHasAllRequiredModels(prices map[string]float64) bool { + for _, modelName := range cucloudRequiredModels { + if _, ok := prices[modelName]; !ok { + return false + } + } + return true +} + +func cucloudHasAllRequiredRegionRows(regionMap map[string][]string) bool { + for _, modelName := range cucloudRequiredModels { + if len(regionMap[modelName]) == 0 { + return false + } + } + return true +} diff --git a/scripts/import_cucloud_pricing_test.go b/scripts/import_cucloud_pricing_test.go new file mode 100644 index 0000000..e945c6f --- /dev/null +++ b/scripts/import_cucloud_pricing_test.go @@ -0,0 +1,95 @@ +//go:build llm_script + +package main + +import ( + "bytes" + "os" + "path/filepath" + "strings" + "testing" +) + +func TestParseCUCloudPricingBuildsBlendedRecords(t *testing.T) { + raw, err := os.ReadFile(filepath.Join("testdata", "cucloud_pricing_sample.html")) + if err != nil { + t.Fatalf("读取 fixture 失败: %v", err) + } + + records, summary, err := parseCUCloudPricingCatalog(string(raw), defaultCUCloudPricingURL) + if err != nil { + t.Fatalf("parseCUCloudPricingCatalog 返回错误: %v", err) + } + if len(records) != 4 { + t.Fatalf("期望 4 条联通云价格记录,实际 %d", len(records)) + } + if summary.Models != 3 || summary.Records != 4 || summary.Regions != 2 { + t.Fatalf("summary 计数错误: %+v", summary) + } + if !summary.PaygModeConfirmed || summary.PaygPriceTablePublic { + t.Fatalf("payg 摘要错误: %+v", summary) + } + + recordMap := make(map[string]officialPricingRecord, len(records)) + for _, record := range records { + recordMap[record.ModelName+"|"+record.Region] = record + } + + pro := recordMap["DeepSeek-V4-Pro|贵阳基地二区"] + if pro.InputPrice != 9.3 || pro.OutputPrice != 9.3 || pro.ProviderName != "DeepSeek" { + t.Fatalf("DeepSeek-V4-Pro 记录错误: %+v", pro) + } + flash := recordMap["DeepSeek-V4-Flash|贵阳基地二区"] + if flash.InputPrice != 0.7 || flash.OutputPrice != 0.7 || flash.ProviderName != "DeepSeek" { + t.Fatalf("DeepSeek-V4-Flash 记录错误: %+v", flash) + } + miniWuhan := recordMap["MiniMax-M2.5|武汉四区"] + if miniWuhan.InputPrice != 1.1 || miniWuhan.OutputPrice != 1.1 || miniWuhan.ProviderName != "MiniMax" { + t.Fatalf("MiniMax-M2.5 武汉记录错误: %+v", miniWuhan) + } + miniGuiyang := recordMap["MiniMax-M2.5|贵阳基地二区"] + if miniGuiyang.InputPrice != 1.1 || miniGuiyang.OutputPrice != 1.1 { + t.Fatalf("MiniMax-M2.5 贵阳记录错误: %+v", miniGuiyang) + } + if _, ok := recordMap["MiniMax-M2.5|上海二十二区"]; ok { + t.Fatalf("不支持的区域不应生成记录") + } +} + +func TestParseCUCloudPricingRequiresBlendedPriceTable(t *testing.T) { + raw, err := os.ReadFile(filepath.Join("testdata", "cucloud_pricing_sample.html")) + if err != nil { + t.Fatalf("读取 fixture 失败: %v", err) + } + broken := strings.Replace(string(raw), "综合单价9.30元/百万tokens", "综合单价元/百万tokens", 1) + if _, _, err := parseCUCloudPricingCatalog(broken, defaultCUCloudPricingURL); err == nil { + t.Fatalf("缺少 blended 价格时应返回错误") + } +} + +func TestRunCUCloudPricingImportDryRunPrintsSummary(t *testing.T) { + var out bytes.Buffer + err := runCUCloudPricingImport(cucloudPricingImportConfig{ + URL: defaultCUCloudPricingURL, + Fixture: filepath.Join("testdata", "cucloud_pricing_sample.html"), + DryRun: true, + }, nil, &out) + if err != nil { + t.Fatalf("runCUCloudPricingImport 返回错误: %v", err) + } + output := out.String() + for _, want := range []string{ + "source=cucloud-pricing-import", + "models=3", + "records=4", + "regions=2", + "operator=Unicom AISP", + "payg_mode_confirmed=true", + "payg_price_table_public=false", + "dry_run=true", + } { + if !strings.Contains(output, want) { + t.Fatalf("输出缺少 %q,实际: %q", want, output) + } + } +} diff --git a/scripts/import_plan_catalog_test.go b/scripts/import_plan_catalog_test.go index 198d3d6..3d3c8ee 100644 --- a/scripts/import_plan_catalog_test.go +++ b/scripts/import_plan_catalog_test.go @@ -24,8 +24,8 @@ func TestBuildPlanCatalogRows(t *testing.T) { if err != nil { t.Fatalf("buildPlanCatalogRows 失败: %v", err) } - if len(rows) != 70 { - t.Fatalf("期望 70 条基础目录记录,实际 %d", len(rows)) + if len(rows) != 71 { + t.Fatalf("期望 71 条基础目录记录,实际 %d", len(rows)) } foundVendorTop20 := false @@ -38,6 +38,7 @@ func TestBuildPlanCatalogRows(t *testing.T) { "aliyun-bailian-coding-plan": "import_aliyun_subscription.go", "baidu-qianfan-token-benefit-pack": "import_baidu_subscription.go", "baidu-qianfan-coding-plan": "import_baidu_subscription.go", + "bytedance-doubao-api-payg": "import_bytedance_pricing.go", "zhipu-glm-coding-plan": "import_zhipu_coding_plan.go", "minimax-token-plan": "import_minimax_subscription.go", "volcengine-ark-coding-plan": "import_bytedance_subscription.go", @@ -46,7 +47,8 @@ func TestBuildPlanCatalogRows(t *testing.T) { "ctyun-coding-plan": "import_ctyun_subscription.go", "cucloud-aicp-platform": "import_cucloud_catalog.go", "cucloud-ai-app-platform": "import_cucloud_catalog.go", - "mobile-cloud-ai-market": "import_mobile_cloud_catalog.go", + "cucloud-aisp-token-plan-pricing": "import_cucloud_pricing.go", + "mobile-cloud-ai-market": "import_mobile_cloud_pricing.go", "aliyun-modelscope-api-inference": "import_catalog_seed_verification.go", "youdao-zhiyun-maas": "import_youdao_pricing.go", "ctyun-model-inference-payg": "import_catalog_seed_verification.go", @@ -135,13 +137,13 @@ func TestRunPlanCatalogImportDryRunPrintsSummary(t *testing.T) { output := out.String() for _, want := range []string{ "source=plan-catalog-import", - "rows=70", + "rows=71", "coding_plan:7", "package_plan:1", - "pay_as_you_go:51", - "token_plan:7", - "unknown:4", - "confirmed:70", + "pay_as_you_go:52", + "token_plan:8", + "unknown:3", + "confirmed:71", "dry_run=true", } { if !strings.Contains(output, want) { diff --git a/scripts/importer_smoke_gate_test.sh b/scripts/importer_smoke_gate_test.sh index e49b10a..5e11a54 100755 --- a/scripts/importer_smoke_gate_test.sh +++ b/scripts/importer_smoke_gate_test.sh @@ -30,7 +30,13 @@ printf '%s' "$PASS_OUTPUT" | grep -q '\[PASS\] importer_smoke=qwen-fixture' printf '%s' "$PASS_OUTPUT" | grep -q '\[PASS\] importer_smoke=qwen-live' printf '%s' "$PASS_OUTPUT" | grep -q '\[PASS\] importer_smoke=hunyuan-fixture' printf '%s' "$PASS_OUTPUT" | grep -q '\[PASS\] importer_smoke=hunyuan-live' +printf '%s' "$PASS_OUTPUT" | grep -q '\[PASS\] importer_smoke=mobile-cloud-fixture' +printf '%s' "$PASS_OUTPUT" | grep -q '\[PASS\] importer_smoke=mobile-cloud-live' +printf '%s' "$PASS_OUTPUT" | grep -q '\[PASS\] importer_smoke=cucloud-pricing-fixture' +printf '%s' "$PASS_OUTPUT" | grep -q '\[PASS\] importer_smoke=cucloud-pricing-live' printf '%s' "$PASS_OUTPUT" | grep -q '\[PASS\] importer_smoke=huawei-maas-fixture' printf '%s' "$PASS_OUTPUT" | grep -q '\[PASS\] importer_smoke=huawei-maas-live' +printf '%s' "$PASS_OUTPUT" | grep -q '\[PASS\] importer_smoke=bytedance-fixture' +printf '%s' "$PASS_OUTPUT" | grep -q '\[PASS\] importer_smoke=bytedance-live' echo "importer_smoke_gate_test: PASS" diff --git a/scripts/pipeline_runtime_alignment_test.sh b/scripts/pipeline_runtime_alignment_test.sh index 2ffc8fd..58ceda7 100644 --- a/scripts/pipeline_runtime_alignment_test.sh +++ b/scripts/pipeline_runtime_alignment_test.sh @@ -25,18 +25,30 @@ check_contains "scripts/run_daily.sh" 'merge_failed_source_keys "tencent_subscri check_contains "scripts/run_daily.sh" 'error_exit "腾讯云套餐导入失败"' check_contains "scripts/run_intel_pipeline.sh" 'run_or_fail "qwen_pricing" "通义千问价格导入失败"' check_contains "scripts/run_intel_pipeline.sh" 'run_or_fail "hunyuan_pricing" "腾讯混元价格导入失败"' +check_contains "scripts/run_intel_pipeline.sh" 'run_or_fail "mobile_cloud_pricing" "移动云 MoMA 价格导入失败"' +check_contains "scripts/run_intel_pipeline.sh" 'run_or_fail "cucloud_pricing" "联通云 Token Plan 价格导入失败"' check_contains "scripts/run_intel_pipeline.sh" 'run_or_fail "huawei_maas_pricing" "华为云 MaaS 价格导入失败"' +check_contains "scripts/run_intel_pipeline.sh" 'run_or_fail "bytedance_pricing" "火山方舟价格导入失败"' check_contains "scripts/run_real_pipeline.sh" 'merge_failed_source_keys "qwen_pricing"' check_contains "scripts/run_real_pipeline.sh" 'merge_failed_source_keys "hunyuan_pricing"' +check_contains "scripts/run_real_pipeline.sh" 'merge_failed_source_keys "mobile_cloud_pricing"' +check_contains "scripts/run_real_pipeline.sh" 'merge_failed_source_keys "cucloud_pricing"' check_contains "scripts/run_real_pipeline.sh" 'merge_failed_source_keys "huawei_maas_pricing"' +check_contains "scripts/run_real_pipeline.sh" 'merge_failed_source_keys "bytedance_pricing"' check_contains "scripts/run_daily.sh" 'merge_failed_source_keys "qwen_pricing"' check_contains "scripts/run_daily.sh" 'merge_failed_source_keys "hunyuan_pricing"' +check_contains "scripts/run_daily.sh" 'merge_failed_source_keys "mobile_cloud_pricing"' +check_contains "scripts/run_daily.sh" 'merge_failed_source_keys "cucloud_pricing"' check_contains "scripts/run_daily.sh" 'merge_failed_source_keys "huawei_maas_pricing"' +check_contains "scripts/run_daily.sh" 'merge_failed_source_keys "bytedance_pricing"' check_contains "scripts/verify_importer_smoke.sh" 'run_smoke "tencent-live"' check_contains "scripts/verify_importer_smoke.sh" 'run_smoke "qwen-fixture"' check_contains "scripts/verify_importer_smoke.sh" 'run_smoke "hunyuan-fixture"' +check_contains "scripts/verify_importer_smoke.sh" 'run_smoke "mobile-cloud-fixture"' +check_contains "scripts/verify_importer_smoke.sh" 'run_smoke "cucloud-pricing-fixture"' check_contains "scripts/verify_importer_smoke.sh" 'run_smoke "huawei-maas-fixture"' +check_contains "scripts/verify_importer_smoke.sh" 'run_smoke "bytedance-fixture"' echo "pipeline_runtime_alignment_test: PASS" diff --git a/scripts/run_daily.sh b/scripts/run_daily.sh index fce5aed..509f0bc 100755 --- a/scripts/run_daily.sh +++ b/scripts/run_daily.sh @@ -22,7 +22,7 @@ MODEL_COUNT="" FETCH_OUT="${PROJECT_DIR}/models.json" FETCH_TOTAL="0" PIPELINE_STAGE_SET="openrouter,multi_source,official_imports,daily_signal_snapshot,daily_report" -PIPELINE_SOURCE_SET="openrouter,moonshot,deepseek,openai,zhipu,baidu,bytedance,tencent_subscription,aliyun_subscription,baidu_subscription,ctyun_subscription,bytedance_subscription,huawei_package,zhipu_coding_plan,minimax_subscription,cucloud_catalog,mobile_cloud_catalog,youdao_pricing,platform360_pricing,siliconflow_pricing,ppio_pricing,ucloud_pricing,coreshub_pricing,cloudflare_pricing,perplexity_pricing,vertex_pricing,bedrock_pricing,azure_openai_pricing,qwen_pricing,hunyuan_pricing,huawei_maas_pricing,catalog_seed_verification" +PIPELINE_SOURCE_SET="openrouter,moonshot,deepseek,openai,zhipu,baidu,bytedance,tencent_subscription,aliyun_subscription,baidu_subscription,ctyun_subscription,bytedance_subscription,huawei_package,zhipu_coding_plan,minimax_subscription,cucloud_catalog,cucloud_pricing,mobile_cloud_pricing,youdao_pricing,platform360_pricing,siliconflow_pricing,ppio_pricing,ucloud_pricing,coreshub_pricing,cloudflare_pricing,perplexity_pricing,vertex_pricing,bedrock_pricing,azure_openai_pricing,qwen_pricing,hunyuan_pricing,huawei_maas_pricing,bytedance_pricing,catalog_seed_verification" PIPELINE_FAILED_SOURCE_SET="none" MULTI_SOURCE_AUDIT="multi_source_audit=unavailable" PIPELINE_AUDIT_SUMMARY="" @@ -240,10 +240,17 @@ if ! go run -tags llm_script \ fi if ! go run -tags llm_script \ scripts/subscription_import_common.go \ - scripts/catalog_verification_common.go \ - scripts/import_mobile_cloud_catalog.go >> "$LOG_FILE" 2>&1; then - merge_failed_source_keys "mobile_cloud_catalog" - error_exit "移动云目录校验失败" + scripts/official_pricing_import_common.go \ + scripts/import_cucloud_pricing.go >> "$LOG_FILE" 2>&1; then + merge_failed_source_keys "cucloud_pricing" + error_exit "联通云 Token Plan 价格导入失败" +fi +if ! go run -tags llm_script \ + scripts/subscription_import_common.go \ + scripts/official_pricing_import_common.go \ + scripts/import_mobile_cloud_pricing.go >> "$LOG_FILE" 2>&1; then + merge_failed_source_keys "mobile_cloud_pricing" + error_exit "移动云 MoMA 价格导入失败" fi if ! go run -tags llm_script \ scripts/subscription_import_common.go \ @@ -412,6 +419,13 @@ if ! go run -tags llm_script \ merge_failed_source_keys "huawei_maas_pricing" error_exit "华为云 MaaS 价格导入失败" fi +if ! go run -tags llm_script \ + scripts/subscription_import_common.go \ + scripts/official_pricing_import_common.go \ + scripts/import_bytedance_pricing.go >> "$LOG_FILE" 2>&1; then + merge_failed_source_keys "bytedance_pricing" + error_exit "火山方舟价格导入失败" +fi if ! go run -tags llm_script \ scripts/subscription_import_common.go \ scripts/import_catalog_seed_verification.go >> "$LOG_FILE" 2>&1; then diff --git a/scripts/run_intel_pipeline.sh b/scripts/run_intel_pipeline.sh index d9b9b94..2f404c0 100755 --- a/scripts/run_intel_pipeline.sh +++ b/scripts/run_intel_pipeline.sh @@ -27,7 +27,7 @@ REPORT_DATE="${REPORT_DATE:-$(date +%F)}" FETCH_OUT="$ROOT_DIR/models.json" FETCH_TOTAL="0" PIPELINE_STAGE_SET="openrouter,multi_source,official_imports,daily_signal_snapshot" -PIPELINE_SOURCE_SET="openrouter,moonshot,deepseek,openai,zhipu,baidu,bytedance,tencent_subscription,aliyun_subscription,baidu_subscription,ctyun_subscription,bytedance_subscription,huawei_package,zhipu_coding_plan,minimax_subscription,cucloud_catalog,mobile_cloud_catalog,youdao_pricing,platform360_pricing,siliconflow_pricing,ppio_pricing,ucloud_pricing,coreshub_pricing,cloudflare_pricing,perplexity_pricing,vertex_pricing,bedrock_pricing,azure_openai_pricing,qwen_pricing,hunyuan_pricing,huawei_maas_pricing,catalog_seed_verification" +PIPELINE_SOURCE_SET="openrouter,moonshot,deepseek,openai,zhipu,baidu,bytedance,tencent_subscription,aliyun_subscription,baidu_subscription,ctyun_subscription,bytedance_subscription,huawei_package,zhipu_coding_plan,minimax_subscription,cucloud_catalog,cucloud_pricing,mobile_cloud_pricing,youdao_pricing,platform360_pricing,siliconflow_pricing,ppio_pricing,ucloud_pricing,coreshub_pricing,cloudflare_pricing,perplexity_pricing,vertex_pricing,bedrock_pricing,azure_openai_pricing,qwen_pricing,hunyuan_pricing,huawei_maas_pricing,bytedance_pricing,catalog_seed_verification" PIPELINE_FAILED_SOURCE_SET="none" MULTI_SOURCE_AUDIT="multi_source_audit=unavailable" PIPELINE_AUDIT_SUMMARY="" @@ -140,8 +140,10 @@ run_or_fail "minimax_subscription" "MiniMax Token Plan 导入失败" \ go run -tags llm_script ./scripts/subscription_import_common.go ./scripts/minimax_subscription_lib.go ./scripts/import_minimax_subscription.go run_or_fail "cucloud_catalog" "联通云目录校验失败" \ go run -tags llm_script ./scripts/subscription_import_common.go ./scripts/catalog_verification_common.go ./scripts/import_cucloud_catalog.go -run_or_fail "mobile_cloud_catalog" "移动云目录校验失败" \ - go run -tags llm_script ./scripts/subscription_import_common.go ./scripts/catalog_verification_common.go ./scripts/import_mobile_cloud_catalog.go +run_or_fail "cucloud_pricing" "联通云 Token Plan 价格导入失败" \ + go run -tags llm_script ./scripts/subscription_import_common.go ./scripts/official_pricing_import_common.go ./scripts/import_cucloud_pricing.go +run_or_fail "mobile_cloud_pricing" "移动云 MoMA 价格导入失败" \ + go run -tags llm_script ./scripts/subscription_import_common.go ./scripts/official_pricing_import_common.go ./scripts/import_mobile_cloud_pricing.go run_or_fail "tencent_subscription" "腾讯云套餐导入失败" \ go run -tags llm_script ./scripts/subscription_import_common.go ./scripts/tencent_catalog_lib.go ./scripts/import_tencent_subscription.go run_or_fail "youdao_pricing" "网易有道价格导入失败" \ @@ -179,6 +181,8 @@ run_or_fail "hunyuan_pricing" "腾讯混元价格导入失败" \ go run -tags llm_script ./scripts/subscription_import_common.go ./scripts/official_pricing_import_common.go ./scripts/import_hunyuan_pricing.go run_or_fail "huawei_maas_pricing" "华为云 MaaS 价格导入失败" \ go run -tags llm_script ./scripts/subscription_import_common.go ./scripts/official_pricing_import_common.go ./scripts/import_huawei_maas_pricing.go +run_or_fail "bytedance_pricing" "火山方舟价格导入失败" \ + go run -tags llm_script ./scripts/subscription_import_common.go ./scripts/official_pricing_import_common.go ./scripts/import_bytedance_pricing.go refresh_pipeline_audit run_or_fail "catalog_seed_verification" "目录级官方入口核验失败" \ diff --git a/scripts/run_real_pipeline.sh b/scripts/run_real_pipeline.sh index 8846d79..ca2b36b 100755 --- a/scripts/run_real_pipeline.sh +++ b/scripts/run_real_pipeline.sh @@ -28,7 +28,7 @@ REPORT_DATE="$(report_date_value)" FETCH_OUT="$ROOT_DIR/models.json" FETCH_TOTAL="0" PIPELINE_STAGE_SET="openrouter,multi_source,official_imports,daily_signal_snapshot,daily_report" -PIPELINE_SOURCE_SET="openrouter,moonshot,deepseek,openai,zhipu,baidu,bytedance,tencent_subscription,aliyun_subscription,baidu_subscription,ctyun_subscription,bytedance_subscription,huawei_package,zhipu_coding_plan,minimax_subscription,cucloud_catalog,mobile_cloud_catalog,youdao_pricing,platform360_pricing,siliconflow_pricing,ppio_pricing,ucloud_pricing,coreshub_pricing,cloudflare_pricing,perplexity_pricing,vertex_pricing,bedrock_pricing,azure_openai_pricing,qwen_pricing,hunyuan_pricing,huawei_maas_pricing,catalog_seed_verification" +PIPELINE_SOURCE_SET="openrouter,moonshot,deepseek,openai,zhipu,baidu,bytedance,tencent_subscription,aliyun_subscription,baidu_subscription,ctyun_subscription,bytedance_subscription,huawei_package,zhipu_coding_plan,minimax_subscription,cucloud_catalog,cucloud_pricing,mobile_cloud_pricing,youdao_pricing,platform360_pricing,siliconflow_pricing,ppio_pricing,ucloud_pricing,coreshub_pricing,cloudflare_pricing,perplexity_pricing,vertex_pricing,bedrock_pricing,azure_openai_pricing,qwen_pricing,hunyuan_pricing,huawei_maas_pricing,bytedance_pricing,catalog_seed_verification" PIPELINE_FAILED_SOURCE_SET="none" MULTI_SOURCE_AUDIT="multi_source_audit=unavailable" PIPELINE_AUDIT_SUMMARY="" @@ -189,9 +189,14 @@ if ! go run -tags llm_script "./scripts/subscription_import_common.go" "./script record_failure "联通云目录校验失败" exit 1 fi -if ! go run -tags llm_script "./scripts/subscription_import_common.go" "./scripts/catalog_verification_common.go" "./scripts/import_mobile_cloud_catalog.go"; then - merge_failed_source_keys "mobile_cloud_catalog" - record_failure "移动云目录校验失败" +if ! go run -tags llm_script "./scripts/subscription_import_common.go" "./scripts/official_pricing_import_common.go" "./scripts/import_cucloud_pricing.go"; then + merge_failed_source_keys "cucloud_pricing" + record_failure "联通云 Token Plan 价格导入失败" + exit 1 +fi +if ! go run -tags llm_script "./scripts/subscription_import_common.go" "./scripts/official_pricing_import_common.go" "./scripts/import_mobile_cloud_pricing.go"; then + merge_failed_source_keys "mobile_cloud_pricing" + record_failure "移动云 MoMA 价格导入失败" exit 1 fi if ! go run -tags llm_script "./scripts/subscription_import_common.go" "./scripts/tencent_catalog_lib.go" "./scripts/import_tencent_subscription.go"; then @@ -284,6 +289,11 @@ if ! go run -tags llm_script "./scripts/subscription_import_common.go" "./script record_failure "华为云 MaaS 价格导入失败" exit 1 fi +if ! go run -tags llm_script "./scripts/subscription_import_common.go" "./scripts/official_pricing_import_common.go" "./scripts/import_bytedance_pricing.go"; then + merge_failed_source_keys "bytedance_pricing" + record_failure "火山方舟价格导入失败" + exit 1 +fi if ! go run -tags llm_script "./scripts/subscription_import_common.go" "./scripts/import_catalog_seed_verification.go"; then merge_failed_source_keys "catalog_seed_verification" record_failure "目录级官方入口核验失败" diff --git a/scripts/testdata/bytedance_ark_pricing_sample.txt b/scripts/testdata/bytedance_ark_pricing_sample.txt new file mode 100644 index 0000000..1b6b316 --- /dev/null +++ b/scripts/testdata/bytedance_ark_pricing_sample.txt @@ -0,0 +1,59 @@ +# 大语言模型 + +## 在线推理(常规) + +|模型名称 |条件

千 token |输入

元/百万token |缓存存储

元/百万 token /小时 |缓存输入

元/百万token |输出

元/百万token | +|---|---|---|---|---|---| +|doubao\-seed\-2.0\-pro |输入长度 [0, 32] |3.2 |0.017 |0.64 |16.0 | +||输入长度 (32, 128] |4.8 |0.017 |0.96 |24.0 | +||输入长度 (128, 256] |9.6 |0.017 |1.92 |48.0 | +|doubao\-seed\-2.0\-lite |输入长度 [0, 32] |0.6 |0.017 |0.12 |3.6 | +||输入长度 (32, 128] |0.9 |0.017 |0.18 |5.4 | +||输入长度 (128, 256] |1.8 |0.017 |0.36 |10.8 | +|doubao\-seed\-2.0\-mini |输入长度 [0, 32] |0.2 |0.017 |0.04 |2.0 | +||输入长度 (32, 128] |0.4 |0.017 |0.08 |4.0 | +||输入长度 (128, 256] |0.8 |0.017 |0.16 |8.0 | +|doubao\-seed\-2.0\-code |输入长度 [0, 32] |3.2 |0.017 |0.64 |16.0 | +||输入长度 (32, 128] |4.8 |0.017 |0.96 |24.0 | +||输入长度 (128, 256] |9.6 |0.017 |1.92 |48.0 | +|doubao\-seed\-1.8 |输入长度 [0, 32]

且输出长度 [0, 0.2] |0.80 |0.017 |0.16 |2.00 | +||输入长度 [0, 32]

且输出长度 (0.2,+∞) |0.80 |0.017 |0.16 |8.00 | +||输入长度 (32, 128] |1.20 |0.017 |0.16 |16.00 | +||输入长度 (128, 256] |2.40 |0.017 |0.16 |24.00 | +|doubao\-seed\-character |输入长度 [0, 32] |0.80 |0.017 |0.16 |2.00 | +||输入长度 (32, 128] |1.20 |0.017 |0.16 |6.00 | +|doubao\-seed\-code |输入长度 [0, 32] |1.20 |0.017 |0.24 |8.00 | +||输入长度 (32, 128] |1.40 |0.017 |0.24 |12.00 | +||输入长度 (128, 256] |2.80 |0.017 |0.24 |16.00 | +|doubao\-seed\-1.6 |输入长度 [0, 32]

且输出长度 [0, 0.2] |0.80 |0.017 |0.16 |2.00 | +||输入长度 [0, 32]

且输出长度 (0.2,+∞) |0.80 |0.017 |0.16 |8.00 | +||输入长度 (32, 128] |1.20 |0.017 |0.16 |16.00 | +||输入长度 (128, 256] |2.40 |0.017 |0.16 |24.00 | +|doubao\-seed\-1.6\-lite |输入长度 [0, 32]

且输出长度 [0, 0.2] |0.30 |0.017 |0.06 |0.60 | +||输入长度 [0, 32]

且输出长度 (0.2,+∞) |0.30 |0.017 |0.06 |2.40 | +||输入长度 (32, 128] |0.60 |0.017 |0.06 |4.00 | +||输入长度 (128, 256] |1.20 |0.017 |0.06 |12.00 | +|doubao\-seed\-1.6\-flash |输入长度 [0, 32] |0.15 |0.017 |0.03 |1.50 | +||输入长度 (32, 128] |0.30 |0.017 |0.03 |3.00 | +||输入长度 (128, 256] |0.60 |0.017 |0.03 |6.00 | +|doubao\-seed\-1.6\-vision |输入长度 [0, 32] |0.80 |0.017 |0.16 |8.00 | +||输入长度 (32, 128] |1.20 |0.017 |0.16 |16.00 | +||输入长度 (128, 256] |2.40 |0.017 |0.16 |24.00 | +|doubao\-seed\-translation |\- |1.20 |不支持 |不支持 |3.60 | +|doubao\-1.5\-pro\-32k |\- |0.80 |0.017 |0.16 |2.00 | +|doubao\-1.5\-lite\-32k |\- |0.30 |0.017 |0.06 |0.60 | +|doubao\-1.5\-vision\-pro |\- |3.00 |不支持 |不支持 |9.00 | +|glm\-4.7 |输入长度 [0, 32]

且输出长度 [0, 0.2] |2.0 |0.017 |0.4 |8.0 | +||输入长度 [0, 32]

且输出长度 (0.2,+∞) |3.0 |0.017 |0.6 |14.0 | +||输入长度 (32, 200] |4.0 |0.017 |0.8 |16.0 | +|deepseek\-v3.2 |输入长度 [0, 32] |2.00 |0.017 |0.4 |3.00 | +||输入长度 (32, 128] |4.00 |0.017 |0.4 |6.00 | +|deepseek\-v3.1 |\- |4.00 |0.017 |0.80 |12.00 | +|deepseek\-v3 |\- |2.00 |0.017 |0.40 |8.00 | +|deepseek\-r1 |\- |4.00 |0.017 |0.80 |16.00 | + +# 向量模型 + +|模型 |文本输入

元/百万 token |图片输入

元/百万 token | +|---|---|---| +|doubao\-embedding\-vision |0.70 |1.80 | diff --git a/scripts/testdata/cucloud_pricing_sample.html b/scripts/testdata/cucloud_pricing_sample.html new file mode 100644 index 0000000..1fd0595 --- /dev/null +++ b/scripts/testdata/cucloud_pricing_sample.html @@ -0,0 +1,62 @@ +
+

Token Plan概述

+ + + + + + + + + + + + + + + +
套餐额度

DeepSeek-V4-Pro

综合单价9.30元/百万tokens

DeepSeek-V4-Flash

综合单价0.70元/百万tokens

MiniMax-M2.5

综合单价1.10元/百万tokens

25,000 credits约 27 百万tokens约 357 百万tokens约 227 百万tokens
+
+
+

各云区域模型支持情况

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
模型呼和浩特二区上海二十二区武汉四区济南五区贵阳基地二区
DeepSeek-V4-Pro



支持
DeepSeek-V4-Flash



支持
MiniMax-M2.5

支持
支持
+
+
+

按量计费模式

+

按量计费按模型销售价实时累加,计费单位为元/千 Tokens。

+
diff --git a/scripts/verify_importer_smoke.sh b/scripts/verify_importer_smoke.sh index 162c320..805bc14 100755 --- a/scripts/verify_importer_smoke.sh +++ b/scripts/verify_importer_smoke.sh @@ -11,6 +11,9 @@ CTYUN_TOKEN_FIXTURE_PATH="${CTYUN_TOKEN_FIXTURE_PATH:-./scripts/testdata/ctyun_t TENCENT_FIXTURE_PATH="${TENCENT_FIXTURE_PATH:-./scripts/testdata/tencent_token_plan_sample.txt}" QWEN_FIXTURE_PATH="${QWEN_FIXTURE_PATH:-./scripts/testdata/qwen_pricing_sample.txt}" HUNYUAN_FIXTURE_PATH="${HUNYUAN_FIXTURE_PATH:-./scripts/testdata/hunyuan_pricing_sample.txt}" +MOBILE_CLOUD_FIXTURE_PATH="${MOBILE_CLOUD_FIXTURE_PATH:-./scripts/testdata/mobile_cloud_pricing_sample.html}" +CUCLOUD_FIXTURE_PATH="${CUCLOUD_FIXTURE_PATH:-./scripts/testdata/cucloud_pricing_sample.html}" +BYTEDANCE_FIXTURE_PATH="${BYTEDANCE_FIXTURE_PATH:-./scripts/testdata/bytedance_ark_pricing_sample.txt}" HUAWEI_MAAS_FIXTURE_PATH="${HUAWEI_MAAS_FIXTURE_PATH:-./scripts/testdata/huawei_maas_pricing_sample.json}" last_meaningful_line() { @@ -48,7 +51,13 @@ run_smoke "qwen-fixture" "go run -tags llm_script ./scripts/subscription_import_ run_smoke "qwen-live" "go run -tags llm_script ./scripts/subscription_import_common.go ./scripts/official_pricing_import_common.go ./scripts/import_qwen_pricing.go -dry-run" run_smoke "hunyuan-fixture" "go run -tags llm_script ./scripts/subscription_import_common.go ./scripts/official_pricing_import_common.go ./scripts/import_hunyuan_pricing.go -fixture ${HUNYUAN_FIXTURE_PATH@Q} -dry-run" run_smoke "hunyuan-live" "go run -tags llm_script ./scripts/subscription_import_common.go ./scripts/official_pricing_import_common.go ./scripts/import_hunyuan_pricing.go -dry-run" +run_smoke "mobile-cloud-fixture" "go run -tags llm_script ./scripts/subscription_import_common.go ./scripts/official_pricing_import_common.go ./scripts/import_mobile_cloud_pricing.go -fixture ${MOBILE_CLOUD_FIXTURE_PATH@Q} -dry-run" +run_smoke "mobile-cloud-live" "go run -tags llm_script ./scripts/subscription_import_common.go ./scripts/official_pricing_import_common.go ./scripts/import_mobile_cloud_pricing.go -dry-run" +run_smoke "cucloud-pricing-fixture" "go run -tags llm_script ./scripts/subscription_import_common.go ./scripts/official_pricing_import_common.go ./scripts/import_cucloud_pricing.go -fixture ${CUCLOUD_FIXTURE_PATH@Q} -dry-run" +run_smoke "cucloud-pricing-live" "go run -tags llm_script ./scripts/subscription_import_common.go ./scripts/official_pricing_import_common.go ./scripts/import_cucloud_pricing.go -dry-run" run_smoke "huawei-maas-fixture" "go run -tags llm_script ./scripts/subscription_import_common.go ./scripts/official_pricing_import_common.go ./scripts/import_huawei_maas_pricing.go -fixture ${HUAWEI_MAAS_FIXTURE_PATH@Q} -dry-run" run_smoke "huawei-maas-live" "go run -tags llm_script ./scripts/subscription_import_common.go ./scripts/official_pricing_import_common.go ./scripts/import_huawei_maas_pricing.go -dry-run" +run_smoke "bytedance-fixture" "go run -tags llm_script ./scripts/subscription_import_common.go ./scripts/official_pricing_import_common.go ./scripts/import_bytedance_pricing.go -fixture ${BYTEDANCE_FIXTURE_PATH@Q} -dry-run" +run_smoke "bytedance-live" "go run -tags llm_script ./scripts/subscription_import_common.go ./scripts/official_pricing_import_common.go ./scripts/import_bytedance_pricing.go -dry-run" echo "IMPORTER_SMOKE_RESULT: PASS" diff --git a/seeds/plan_catalog_inventory_seed_cn_relays_top20plus.json b/seeds/plan_catalog_inventory_seed_cn_relays_top20plus.json index 773a7f2..c141f52 100644 --- a/seeds/plan_catalog_inventory_seed_cn_relays_top20plus.json +++ b/seeds/plan_catalog_inventory_seed_cn_relays_top20plus.json @@ -514,6 +514,33 @@ "catalogSegment": "relay_top20plus", "marketRank": 12 }, + { + "catalogCode": "cucloud-aisp-token-plan-pricing", + "providerName": "China Unicom", + "providerNameCn": "中国联通", + "providerCountry": "CN", + "providerWebsite": "https://www.cucloud.cn", + "operatorName": "Unicom AISP", + "operatorNameCn": "联通云 AI服务平台AISP", + "operatorCountry": "CN", + "operatorWebsite": "https://www.cucloud.cn", + "operatorType": "cloud", + "platformName": "Unicom AISP Token Plan Pricing", + "platformNameCn": "联通云 AISP Token Plan 定价", + "platformType": "cloud_operator", + "planFamily": "token_plan", + "planStatus": "confirmed", + "sourceKind": "official_pricing", + "sourceURL": "https://support.cucloud.cn/document/127/591/2357.html?id=2357&folderid=3236", + "sourceTitle": "Token Plan概述", + "region": "CN", + "currency": "CNY", + "billingCycle": "monthly", + "importerKey": "import_cucloud_pricing.go", + "notes": "已确认联通云 AISP Token Plan 公开披露 DeepSeek-V4-Pro / DeepSeek-V4-Flash / MiniMax-M2.5 三模型 blended 单价与区域支持矩阵;公开页仅确认按量计费模式存在,未披露 payg per-model 销售价表。", + "catalogSegment": "relay_top20plus", + "marketRank": 12 + }, { "catalogCode": "mobile-cloud-ai-market", "providerName": "China Mobile", @@ -525,19 +552,19 @@ "operatorCountry": "CN", "operatorWebsite": "https://ecloud.10086.cn", "operatorType": "cloud", - "platformName": "Mobile Cloud AI Market", - "platformNameCn": "移动云 AI 应用专区", + "platformName": "Mobile Cloud MoMA", + "platformNameCn": "移动云 模型服务平台MoMA", "platformType": "cloud_operator", - "planFamily": "unknown", + "planFamily": "pay_as_you_go", "planStatus": "confirmed", - "sourceKind": "official_product_page", - "sourceURL": "https://saas.ecloud.10086.cn/Store/List", - "sourceTitle": "移动云市场 AI 应用专区", + "sourceKind": "official_pricing", + "sourceURL": "https://ecloud.10086.cn/op-help-center/doc/article/91592", + "sourceTitle": "预置模型服务-token按量计费", "region": "CN", "currency": "CNY", "billingCycle": "usage", - "importerKey": "import_mobile_cloud_catalog.go", - "notes": "已确认移动云云市场公开展示 AI 应用专区,公开统一编程套餐价格仍待后续核验。", + "importerKey": "import_mobile_cloud_pricing.go", + "notes": "已确认移动云 MoMA 官方公开 token 按量计费文档;当前 importer 覆盖文本/视觉/向量/排序模型,语音模型的按字符/按秒计费暂未写入 region_pricing。", "catalogSegment": "relay_top20plus", "marketRank": 13 }, diff --git a/seeds/plan_catalog_inventory_seed_cn_vendors_top20.json b/seeds/plan_catalog_inventory_seed_cn_vendors_top20.json index 052e171..42332dd 100644 --- a/seeds/plan_catalog_inventory_seed_cn_vendors_top20.json +++ b/seeds/plan_catalog_inventory_seed_cn_vendors_top20.json @@ -104,8 +104,8 @@ "region": "CN", "currency": "CNY", "billingCycle": "usage", - "importerKey": "existing_price_importer", - "notes": "官方模型与平台由火山方舟统一暴露,Coding Plan 另作订阅目录记录。", + "importerKey": "import_bytedance_pricing.go", + "notes": "官方模型与平台由火山方舟统一暴露;当前 importer 落地在线推理(常规)公开 token 价格,向量等仅输入计费模型暂不写入 region_pricing。", "catalogSegment": "vendor_top20", "marketRank": 4 },