- 新增 region_pricing.pricing_mode / price_unit / flat_price 字段 - 新增 migration 016_region_pricing_non_token_units.sql - officialPricingRecord 新增 PricingMode/PriceUnit/FlatPrice 字段 - detectModality 新增 audio 模态检测(voice/audio/speech) - providerMetadata 新增 BAAI/ByteDance/China Mobile 元数据 - import_mobile_cloud_pricing.go: 解析语音计费表(CosyVoice/SenseVoice) - CosyVoice: 2元/万字符 → pricingMode=flat, priceUnit=10k_characters - SenseVoice: 0.0007元/秒 → pricingMode=flat, priceUnit=second - mobileCloudProviderName 新增 cosyvoice/sensevoice → Alibaba 映射 - cmd/server: modelResponse 新增 pricingMode/priceUnit/flatPrice,API 字段说明同步更新 - 新增 TestModelsHandlerReturnsFlatPricingFields 测试
518 lines
17 KiB
Go
518 lines
17 KiB
Go
//go:build llm_script
|
||
|
||
package main
|
||
|
||
import (
|
||
"database/sql"
|
||
"encoding/json"
|
||
"flag"
|
||
"fmt"
|
||
"html"
|
||
"io"
|
||
"net/http"
|
||
"os"
|
||
"regexp"
|
||
"strings"
|
||
"time"
|
||
)
|
||
|
||
const (
|
||
defaultMobileCloudOutlineTreeURL = "https://ecloud.10086.cn/op-help-center/request-api/service-api/outline/tree?outlineId=972"
|
||
defaultMobileCloudArticleInfoURL = "https://ecloud.10086.cn/op-help-center/request-api/service-api/article/info/%d"
|
||
defaultMobileCloudArticleContentURL = "https://ecloud.10086.cn/op-help-center/request-api/service-api/article/content/%s"
|
||
defaultMobileCloudDocURLPattern = "https://ecloud.10086.cn/op-help-center/doc/article/%d"
|
||
mobileCloudPricingArticleTitle = "预置模型服务-token按量计费"
|
||
)
|
||
|
||
type mobileCloudPricingImportConfig struct {
|
||
OutlineTreeURL string
|
||
Fixture string
|
||
DryRun bool
|
||
Timeout time.Duration
|
||
}
|
||
|
||
type mobileCloudOutlineEnvelope struct {
|
||
Code int `json:"code"`
|
||
Data mobileCloudOutlineNode `json:"data"`
|
||
}
|
||
|
||
type mobileCloudOutlineNode struct {
|
||
ArticleID int `json:"articleId"`
|
||
ArticleTitle string `json:"articleTitle"`
|
||
ArticleContentPublished string `json:"articleContentPublished"`
|
||
Children []mobileCloudOutlineNode `json:"children"`
|
||
}
|
||
|
||
type mobileCloudArticleInfoEnvelope struct {
|
||
Code int `json:"code"`
|
||
Data mobileCloudArticleInfo `json:"data"`
|
||
}
|
||
|
||
type mobileCloudArticleInfo struct {
|
||
ID int `json:"id"`
|
||
Title string `json:"title"`
|
||
ContentPublished string `json:"contentPublished"`
|
||
}
|
||
|
||
type mobileCloudArticlePayload struct {
|
||
ArticleID int
|
||
Title string
|
||
ContentPublished string
|
||
DocURL string
|
||
ContentHTML string
|
||
}
|
||
|
||
func main() {
|
||
loadSubscriptionImportEnv()
|
||
|
||
var outlineTreeURL string
|
||
var fixture string
|
||
var dryRun bool
|
||
var timeoutSeconds int
|
||
|
||
flag.StringVar(&outlineTreeURL, "outline-tree-url", defaultMobileCloudOutlineTreeURL, "移动云 MoMA 文档大纲树接口")
|
||
flag.StringVar(&fixture, "fixture", "", "移动云 MoMA 价格样例文件")
|
||
flag.BoolVar(&dryRun, "dry-run", false, "仅解析并打印摘要,不写入数据库")
|
||
flag.IntVar(&timeoutSeconds, "timeout", 20, "请求超时(秒)")
|
||
flag.Parse()
|
||
|
||
cfg := mobileCloudPricingImportConfig{OutlineTreeURL: outlineTreeURL, Fixture: fixture, DryRun: dryRun, Timeout: time.Duration(timeoutSeconds) * time.Second}
|
||
|
||
var db *sql.DB
|
||
var err error
|
||
if !cfg.DryRun {
|
||
db, err = subscriptionImportDB()
|
||
if err != nil {
|
||
fmt.Fprintf(os.Stderr, "open db: %v\n", err)
|
||
os.Exit(1)
|
||
}
|
||
defer db.Close()
|
||
}
|
||
|
||
if err := runMobileCloudPricingImport(cfg, db, os.Stdout); err != nil {
|
||
fmt.Fprintf(os.Stderr, "import_mobile_cloud_pricing: %v\n", err)
|
||
os.Exit(1)
|
||
}
|
||
}
|
||
|
||
func runMobileCloudPricingImport(cfg mobileCloudPricingImportConfig, db *sql.DB, out io.Writer) error {
|
||
client := &http.Client{Timeout: cfg.Timeout}
|
||
payload, err := fetchMobileCloudArticlePayload(cfg, client)
|
||
if err != nil {
|
||
return err
|
||
}
|
||
records, err := parseMobileCloudPricingHTML(payload.ContentHTML, payload.DocURL)
|
||
if err != nil {
|
||
return err
|
||
}
|
||
records = dedupeOfficialPricingRecords(records)
|
||
if cfg.DryRun {
|
||
_, err = fmt.Fprintf(out, "source=mobile-cloud-pricing-import models=%d operator=%s dry_run=true\n", len(records), records[0].OperatorName)
|
||
return err
|
||
}
|
||
if db == nil {
|
||
return fmt.Errorf("db is required when dry-run=false")
|
||
}
|
||
if err := upsertOfficialPricingRecords(db, records, "mobile-cloud-pricing-import"); err != nil {
|
||
return err
|
||
}
|
||
var tableRows int
|
||
if err := db.QueryRow(`SELECT COUNT(*) FROM region_pricing`).Scan(&tableRows); err != nil {
|
||
return fmt.Errorf("count region_pricing: %w", err)
|
||
}
|
||
_, err = fmt.Fprintf(out, "source=mobile-cloud-pricing-import models=%d operator=%s table_rows=%d dry_run=false\n", len(records), records[0].OperatorName, tableRows)
|
||
return err
|
||
}
|
||
|
||
func fetchMobileCloudArticlePayload(cfg mobileCloudPricingImportConfig, client *http.Client) (mobileCloudArticlePayload, error) {
|
||
if cfg.Fixture != "" {
|
||
data, err := os.ReadFile(cfg.Fixture)
|
||
if err != nil {
|
||
return mobileCloudArticlePayload{}, fmt.Errorf("read fixture %s: %w", cfg.Fixture, err)
|
||
}
|
||
return mobileCloudArticlePayload{
|
||
ArticleID: 91592,
|
||
Title: mobileCloudPricingArticleTitle,
|
||
DocURL: fmt.Sprintf(defaultMobileCloudDocURLPattern, 91592),
|
||
ContentHTML: string(data),
|
||
}, nil
|
||
}
|
||
if client == nil {
|
||
client = &http.Client{Timeout: 20 * time.Second}
|
||
}
|
||
outlineRaw, err := fetchRawPricingPage(cfg.OutlineTreeURL, "", client)
|
||
if err != nil {
|
||
return mobileCloudArticlePayload{}, err
|
||
}
|
||
articleID, contentPublished, err := resolveMobileCloudPricingArticle(outlineRaw)
|
||
if err != nil {
|
||
return mobileCloudArticlePayload{}, err
|
||
}
|
||
infoURL := fmt.Sprintf(defaultMobileCloudArticleInfoURL, articleID)
|
||
infoRaw, err := fetchRawPricingPage(infoURL, "", client)
|
||
if err != nil {
|
||
return mobileCloudArticlePayload{}, err
|
||
}
|
||
articleInfo, err := parseMobileCloudArticleInfo(infoRaw)
|
||
if err != nil {
|
||
return mobileCloudArticlePayload{}, err
|
||
}
|
||
if strings.TrimSpace(contentPublished) == "" {
|
||
contentPublished = articleInfo.ContentPublished
|
||
}
|
||
contentURL := fmt.Sprintf(defaultMobileCloudArticleContentURL, contentPublished)
|
||
contentHTML, err := fetchRawPricingPage(contentURL, "", client)
|
||
if err != nil {
|
||
return mobileCloudArticlePayload{}, err
|
||
}
|
||
return mobileCloudArticlePayload{
|
||
ArticleID: articleInfo.ID,
|
||
Title: articleInfo.Title,
|
||
ContentPublished: contentPublished,
|
||
DocURL: fmt.Sprintf(defaultMobileCloudDocURLPattern, articleInfo.ID),
|
||
ContentHTML: contentHTML,
|
||
}, nil
|
||
}
|
||
|
||
func resolveMobileCloudPricingArticle(raw string) (int, string, error) {
|
||
var envelope mobileCloudOutlineEnvelope
|
||
if err := json.Unmarshal([]byte(raw), &envelope); err != nil {
|
||
return 0, "", fmt.Errorf("parse mobile cloud outline tree: %w", err)
|
||
}
|
||
articleID, contentPublished, ok := findMobileCloudPricingArticle(envelope.Data)
|
||
if !ok {
|
||
return 0, "", fmt.Errorf("mobile cloud pricing article %q not found in outline tree", mobileCloudPricingArticleTitle)
|
||
}
|
||
return articleID, contentPublished, nil
|
||
}
|
||
|
||
func findMobileCloudPricingArticle(node mobileCloudOutlineNode) (int, string, bool) {
|
||
if strings.TrimSpace(node.ArticleTitle) == mobileCloudPricingArticleTitle && node.ArticleID > 0 {
|
||
return node.ArticleID, strings.TrimSpace(node.ArticleContentPublished), true
|
||
}
|
||
for _, child := range node.Children {
|
||
if articleID, contentPublished, ok := findMobileCloudPricingArticle(child); ok {
|
||
return articleID, contentPublished, true
|
||
}
|
||
}
|
||
return 0, "", false
|
||
}
|
||
|
||
func parseMobileCloudArticleInfo(raw string) (mobileCloudArticleInfo, error) {
|
||
var envelope mobileCloudArticleInfoEnvelope
|
||
if err := json.Unmarshal([]byte(raw), &envelope); err != nil {
|
||
return mobileCloudArticleInfo{}, fmt.Errorf("parse mobile cloud article info: %w", err)
|
||
}
|
||
if envelope.Data.ID == 0 {
|
||
return mobileCloudArticleInfo{}, fmt.Errorf("unexpected mobile cloud article info content")
|
||
}
|
||
return envelope.Data, nil
|
||
}
|
||
|
||
func parseMobileCloudPricingHTML(raw string, docURL string) ([]officialPricingRecord, error) {
|
||
sections := mobileCloudRegionSections(raw)
|
||
if len(sections) == 0 {
|
||
return nil, fmt.Errorf("no mobile cloud pricing regions found")
|
||
}
|
||
records := make([]officialPricingRecord, 0)
|
||
for _, section := range sections {
|
||
for _, table := range mobileCloudTableBlocks(section.Body) {
|
||
rows := mobileCloudTableRows(table)
|
||
if len(rows) < 2 {
|
||
continue
|
||
}
|
||
switch {
|
||
case isMobileCloudTokenPricingHeader(rows[0]):
|
||
records = append(records, buildMobileCloudRecordsFromTable(section.Region, rows[1:], docURL)...)
|
||
case isMobileCloudVoicePricingHeader(rows[0]):
|
||
records = append(records, buildMobileCloudVoiceRecordsFromTable(section.Region, rows[1:], docURL)...)
|
||
}
|
||
}
|
||
}
|
||
if len(records) == 0 {
|
||
return nil, fmt.Errorf("no mobile cloud token pricing rows found")
|
||
}
|
||
return records, nil
|
||
}
|
||
|
||
type mobileCloudRegionSection struct {
|
||
Region string
|
||
Body string
|
||
}
|
||
|
||
func mobileCloudRegionSections(raw string) []mobileCloudRegionSection {
|
||
headingPattern := regexp.MustCompile(`(?is)<h2[^>]*>(.*?)</h2>`)
|
||
matches := headingPattern.FindAllStringSubmatchIndex(raw, -1)
|
||
sections := make([]mobileCloudRegionSection, 0, len(matches))
|
||
for i, match := range matches {
|
||
heading := cleanMobileCloudHTMLText(raw[match[2]:match[3]])
|
||
if !strings.Contains(heading, "支持订购模型") {
|
||
continue
|
||
}
|
||
start := match[1]
|
||
end := len(raw)
|
||
if i+1 < len(matches) {
|
||
end = matches[i+1][0]
|
||
}
|
||
region := strings.TrimSpace(strings.TrimSuffix(heading, "资源池支持订购模型"))
|
||
if region == heading {
|
||
region = strings.TrimSpace(strings.TrimSuffix(heading, "支持订购模型"))
|
||
}
|
||
sections = append(sections, mobileCloudRegionSection{Region: region, Body: raw[start:end]})
|
||
}
|
||
return sections
|
||
}
|
||
|
||
func mobileCloudTableBlocks(raw string) []string {
|
||
return regexp.MustCompile(`(?is)<table.*?</table>`).FindAllString(raw, -1)
|
||
}
|
||
|
||
func mobileCloudTableRows(raw string) [][]string {
|
||
rowMatches := regexp.MustCompile(`(?is)<tr[^>]*>(.*?)</tr>`).FindAllStringSubmatch(raw, -1)
|
||
rows := make([][]string, 0, len(rowMatches))
|
||
for _, rowMatch := range rowMatches {
|
||
cellMatches := regexp.MustCompile(`(?is)<t[dh][^>]*>(.*?)</t[dh]>`).FindAllStringSubmatch(rowMatch[1], -1)
|
||
cells := make([]string, 0, len(cellMatches))
|
||
for _, cellMatch := range cellMatches {
|
||
cells = append(cells, cleanMobileCloudHTMLText(cellMatch[1]))
|
||
}
|
||
if len(cells) > 0 {
|
||
rows = append(rows, cells)
|
||
}
|
||
}
|
||
return rows
|
||
}
|
||
|
||
func cleanMobileCloudHTMLText(raw string) string {
|
||
raw = strings.ReplaceAll(raw, "<br>", " ")
|
||
raw = strings.ReplaceAll(raw, "<br/>", " ")
|
||
raw = strings.ReplaceAll(raw, "<br />", " ")
|
||
raw = regexp.MustCompile(`(?is)<[^>]+>`).ReplaceAllString(raw, " ")
|
||
raw = html.UnescapeString(raw)
|
||
raw = regexp.MustCompile(`\s+`).ReplaceAllString(raw, " ")
|
||
return strings.TrimSpace(raw)
|
||
}
|
||
|
||
func isMobileCloudTokenPricingHeader(cells []string) bool {
|
||
if len(cells) < 4 {
|
||
return false
|
||
}
|
||
return cells[0] == "规格名称" && cells[1] == "输入/输出tokens" && cells[2] == "单价(元/百万tokens)" && cells[3] == "包含模型"
|
||
}
|
||
|
||
func isMobileCloudVoicePricingHeader(cells []string) bool {
|
||
if len(cells) < 5 {
|
||
return false
|
||
}
|
||
return cells[0] == "规格名称" && cells[1] == "模型类别" && cells[2] == "资费场景" && cells[3] == "单价" && cells[4] == "包含模型"
|
||
}
|
||
|
||
func buildMobileCloudRecordsFromTable(region string, rows [][]string, docURL string) []officialPricingRecord {
|
||
records := make([]officialPricingRecord, 0)
|
||
currentModels := make([]string, 0)
|
||
currentInputPrice := 0.0
|
||
for _, row := range rows {
|
||
switch {
|
||
case len(row) >= 4:
|
||
billingKind := strings.TrimSpace(row[1])
|
||
price := mustParseSubscriptionPrice(row[2])
|
||
currentModels = mobileCloudModelNames(row[3])
|
||
switch billingKind {
|
||
case "输入tokens":
|
||
currentInputPrice = price
|
||
case "tokens资费":
|
||
records = append(records, buildMobileCloudFlatTokenRecords(region, currentModels, price, docURL)...)
|
||
currentInputPrice = 0
|
||
default:
|
||
currentInputPrice = 0
|
||
}
|
||
case len(row) >= 2 && strings.TrimSpace(row[0]) == "输出tokens":
|
||
if currentInputPrice <= 0 || len(currentModels) == 0 {
|
||
continue
|
||
}
|
||
outputPrice := mustParseSubscriptionPrice(row[1])
|
||
records = append(records, buildMobileCloudInputOutputRecords(region, currentModels, currentInputPrice, outputPrice, docURL)...)
|
||
currentInputPrice = 0
|
||
}
|
||
}
|
||
return records
|
||
}
|
||
|
||
func buildMobileCloudInputOutputRecords(region string, modelNames []string, inputPrice float64, outputPrice float64, docURL string) []officialPricingRecord {
|
||
records := make([]officialPricingRecord, 0, len(modelNames))
|
||
for _, modelName := range modelNames {
|
||
providerName := mobileCloudProviderName(modelName)
|
||
providerNameCn, providerCountry, providerWebsite := providerMetadata(providerName)
|
||
records = append(records, officialPricingRecord{
|
||
ModelID: normalizeExternalID("mobile-cloud", mobileCloudRegionCode(region), modelName),
|
||
ModelName: modelName,
|
||
ProviderName: providerName,
|
||
ProviderNameCn: providerNameCn,
|
||
ProviderCountry: providerCountry,
|
||
ProviderWebsite: providerWebsite,
|
||
OperatorName: "Mobile Cloud",
|
||
OperatorNameCn: "移动云",
|
||
OperatorCountry: "CN",
|
||
OperatorWebsite: "https://ecloud.10086.cn/portal/product/MaaS",
|
||
OperatorType: "official",
|
||
Region: region,
|
||
Currency: "CNY",
|
||
InputPrice: inputPrice,
|
||
OutputPrice: outputPrice,
|
||
SourceURL: docURL,
|
||
ModelSourceURL: docURL,
|
||
DateConfidence: "unknown",
|
||
DateSourceKind: "official_pricing",
|
||
Modality: detectModality(modelName),
|
||
})
|
||
}
|
||
return records
|
||
}
|
||
|
||
func buildMobileCloudFlatTokenRecords(region string, modelNames []string, price float64, docURL string) []officialPricingRecord {
|
||
records := make([]officialPricingRecord, 0, len(modelNames))
|
||
for _, modelName := range modelNames {
|
||
providerName := mobileCloudProviderName(modelName)
|
||
providerNameCn, providerCountry, providerWebsite := providerMetadata(providerName)
|
||
records = append(records, officialPricingRecord{
|
||
ModelID: normalizeExternalID("mobile-cloud", mobileCloudRegionCode(region), modelName),
|
||
ModelName: modelName,
|
||
ProviderName: providerName,
|
||
ProviderNameCn: providerNameCn,
|
||
ProviderCountry: providerCountry,
|
||
ProviderWebsite: providerWebsite,
|
||
OperatorName: "Mobile Cloud",
|
||
OperatorNameCn: "移动云",
|
||
OperatorCountry: "CN",
|
||
OperatorWebsite: "https://ecloud.10086.cn/portal/product/MaaS",
|
||
OperatorType: "official",
|
||
Region: region,
|
||
Currency: "CNY",
|
||
InputPrice: price,
|
||
OutputPrice: price,
|
||
SourceURL: docURL,
|
||
ModelSourceURL: docURL,
|
||
DateConfidence: "unknown",
|
||
DateSourceKind: "official_pricing",
|
||
Modality: detectModality(modelName),
|
||
})
|
||
}
|
||
return records
|
||
}
|
||
|
||
func buildMobileCloudVoiceRecordsFromTable(region string, rows [][]string, docURL string) []officialPricingRecord {
|
||
records := make([]officialPricingRecord, 0, len(rows))
|
||
for _, row := range rows {
|
||
if len(row) < 5 {
|
||
continue
|
||
}
|
||
modelNames := mobileCloudModelNames(row[4])
|
||
if len(modelNames) == 0 {
|
||
modelNames = []string{strings.TrimSpace(row[0])}
|
||
}
|
||
flatPrice := mobileCloudInlinePrice(row[3])
|
||
priceUnit := mobileCloudVoicePriceUnit(row[2], row[3])
|
||
for _, modelName := range modelNames {
|
||
providerName := mobileCloudProviderName(modelName)
|
||
providerNameCn, providerCountry, providerWebsite := providerMetadata(providerName)
|
||
records = append(records, officialPricingRecord{
|
||
ModelID: normalizeExternalID("mobile-cloud", mobileCloudRegionCode(region), modelName),
|
||
ModelName: modelName,
|
||
ProviderName: providerName,
|
||
ProviderNameCn: providerNameCn,
|
||
ProviderCountry: providerCountry,
|
||
ProviderWebsite: providerWebsite,
|
||
OperatorName: "Mobile Cloud",
|
||
OperatorNameCn: "移动云",
|
||
OperatorCountry: "CN",
|
||
OperatorWebsite: "https://ecloud.10086.cn/portal/product/MaaS",
|
||
OperatorType: "official",
|
||
Region: region,
|
||
Currency: "CNY",
|
||
PricingMode: "flat",
|
||
PriceUnit: priceUnit,
|
||
FlatPrice: flatPrice,
|
||
SourceURL: docURL,
|
||
ModelSourceURL: docURL,
|
||
DateConfidence: "unknown",
|
||
DateSourceKind: "official_pricing",
|
||
Modality: "audio",
|
||
})
|
||
}
|
||
}
|
||
return records
|
||
}
|
||
|
||
func mobileCloudVoicePriceUnit(scene string, price string) string {
|
||
text := strings.ToLower(strings.TrimSpace(scene + " " + price))
|
||
switch {
|
||
case strings.Contains(text, "万字符"), strings.Contains(text, "字符"):
|
||
return "10k_characters"
|
||
case strings.Contains(text, "元/秒"), strings.Contains(text, "秒"):
|
||
return "second"
|
||
default:
|
||
return "flat"
|
||
}
|
||
}
|
||
|
||
func mobileCloudInlinePrice(raw string) float64 {
|
||
matches := regexp.MustCompile(`([0-9]+(?:\.[0-9]+)?)`).FindStringSubmatch(raw)
|
||
if len(matches) != 2 {
|
||
return 0
|
||
}
|
||
return mustParseSubscriptionPrice(matches[1])
|
||
}
|
||
|
||
func mobileCloudModelNames(raw string) []string {
|
||
parts := strings.Fields(strings.TrimSpace(raw))
|
||
models := make([]string, 0, len(parts))
|
||
for _, part := range parts {
|
||
cleaned := strings.TrimSpace(strings.TrimSuffix(part, "、"))
|
||
if cleaned != "" {
|
||
models = append(models, cleaned)
|
||
}
|
||
}
|
||
return models
|
||
}
|
||
|
||
func mobileCloudProviderName(modelName string) string {
|
||
lower := strings.ToLower(strings.TrimSpace(modelName))
|
||
switch {
|
||
case strings.HasPrefix(lower, "minimax"):
|
||
return "MiniMax"
|
||
case strings.HasPrefix(lower, "deepseek"):
|
||
return "DeepSeek"
|
||
case strings.HasPrefix(lower, "qwen"), strings.HasPrefix(lower, "qwq"):
|
||
return "Qwen"
|
||
case strings.HasPrefix(lower, "bge"):
|
||
return "BAAI"
|
||
case strings.HasPrefix(lower, "cosyvoice"), strings.HasPrefix(lower, "sensevoice"):
|
||
return "Alibaba"
|
||
default:
|
||
return "China Mobile"
|
||
}
|
||
}
|
||
|
||
func mobileCloudRegionCode(region string) string {
|
||
switch strings.TrimSpace(region) {
|
||
case "华北-呼和浩特":
|
||
return "huabei-huhehaote"
|
||
case "东北-哈尔滨":
|
||
return "dongbei-haerbin"
|
||
case "华中-郑州":
|
||
return "huazhong-zhengzhou"
|
||
case "黑龙江-哈尔滨":
|
||
return "heilongjiang-haerbin"
|
||
case "华东-上海5":
|
||
return "huadong-shanghai5"
|
||
case "江西-南昌":
|
||
return "jiangxi-nanchang"
|
||
case "湖北-武汉":
|
||
return "hubei-wuhan"
|
||
case "华南-广州8":
|
||
return "huanan-guangzhou8"
|
||
default:
|
||
return normalizeExternalID(region)
|
||
}
|
||
}
|