feat(importers): add official pricing importers for baichuan lingyiwanwu sensenova and xfyun
This commit is contained in:
188
scripts/import_baichuan_pricing.go
Normal file
188
scripts/import_baichuan_pricing.go
Normal file
@@ -0,0 +1,188 @@
|
||||
//go:build llm_script
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"database/sql"
|
||||
"flag"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"os"
|
||||
"regexp"
|
||||
"sort"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
const defaultBaichuanPricingURL = "https://platform.baichuan-ai.com/prices"
|
||||
|
||||
type baichuanPricingImportConfig struct {
|
||||
URL string
|
||||
Fixture string
|
||||
DryRun bool
|
||||
Timeout time.Duration
|
||||
}
|
||||
|
||||
type baichuanPricingRow struct {
|
||||
Index int
|
||||
ModelName string
|
||||
ContextLength int
|
||||
InputPrice float64
|
||||
OutputPrice float64
|
||||
}
|
||||
|
||||
var baichuanModelContextPattern = regexp.MustCompile(`模型调用\s+(Baichuan[-A-Za-z0-9]+)\s+([0-9]+k)`)
|
||||
var baichuanPairPricePattern = regexp.MustCompile(`输入:([0-9.]+)元/千tokens\s+输出:([0-9.]+)元/千tokens`)
|
||||
var baichuanFlatPricePattern = regexp.MustCompile(`(?:00:00\s*~\s*24:00|00:00\s*~\s*8:00)\s+([0-9.]+)元/千tokens`)
|
||||
|
||||
func main() {
|
||||
loadSubscriptionImportEnv()
|
||||
|
||||
var url string
|
||||
var fixture string
|
||||
var dryRun bool
|
||||
var timeoutSeconds int
|
||||
|
||||
flag.StringVar(&url, "url", defaultBaichuanPricingURL, "百川官方价格页")
|
||||
flag.StringVar(&fixture, "fixture", "", "百川价格样例文件")
|
||||
flag.BoolVar(&dryRun, "dry-run", false, "仅解析并打印摘要,不写入数据库")
|
||||
flag.IntVar(&timeoutSeconds, "timeout", 20, "请求超时(秒)")
|
||||
flag.Parse()
|
||||
|
||||
cfg := baichuanPricingImportConfig{URL: url, Fixture: fixture, DryRun: dryRun, Timeout: time.Duration(timeoutSeconds) * time.Second}
|
||||
|
||||
var db *sql.DB
|
||||
var err error
|
||||
if !cfg.DryRun {
|
||||
db, err = subscriptionImportDB()
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "open db: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
defer db.Close()
|
||||
}
|
||||
|
||||
if err := runBaichuanPricingImport(cfg, db, os.Stdout); err != nil {
|
||||
fmt.Fprintf(os.Stderr, "import_baichuan_pricing: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
func runBaichuanPricingImport(cfg baichuanPricingImportConfig, db *sql.DB, out io.Writer) error {
|
||||
client := &http.Client{Timeout: cfg.Timeout}
|
||||
raw, err := fetchRawPricingPage(cfg.URL, cfg.Fixture, client)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
records, err := parseBaichuanPricingCatalog(raw)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
records = dedupeOfficialPricingRecords(records)
|
||||
if cfg.DryRun {
|
||||
_, err = fmt.Fprintf(out, "source=baichuan-pricing-import models=%d operator=%s dry_run=true\n", len(records), records[0].OperatorName)
|
||||
return err
|
||||
}
|
||||
if db == nil {
|
||||
return fmt.Errorf("db is required when dry-run=false")
|
||||
}
|
||||
if err := upsertOfficialPricingRecords(db, records, "baichuan-pricing-import"); err != nil {
|
||||
return err
|
||||
}
|
||||
var tableRows int
|
||||
if err := db.QueryRow(`SELECT COUNT(*) FROM region_pricing`).Scan(&tableRows); err != nil {
|
||||
return fmt.Errorf("count region_pricing: %w", err)
|
||||
}
|
||||
_, err = fmt.Fprintf(out, "source=baichuan-pricing-import models=%d operator=%s table_rows=%d dry_run=false\n", len(records), records[0].OperatorName, tableRows)
|
||||
return err
|
||||
}
|
||||
|
||||
func parseBaichuanPricingCatalog(raw string) ([]officialPricingRecord, error) {
|
||||
text := cleanHTMLText(raw)
|
||||
text = strings.ReplaceAll(text, "\n", " ")
|
||||
text = regexp.MustCompile(`\s+`).ReplaceAllString(text, " ")
|
||||
text = strings.TrimSpace(text)
|
||||
|
||||
sectionStart := strings.Index(text, "通用大模型")
|
||||
if sectionStart == -1 {
|
||||
return nil, fmt.Errorf("unexpected baichuan pricing content: missing 通用大模型")
|
||||
}
|
||||
text = text[sectionStart:]
|
||||
sectionEnd := strings.Index(text, "搜索增强服务")
|
||||
if sectionEnd == -1 {
|
||||
return nil, fmt.Errorf("unexpected baichuan pricing content: missing 搜索增强服务")
|
||||
}
|
||||
section := text[:sectionEnd]
|
||||
|
||||
chunks := strings.Split(section, "模型调用 ")
|
||||
rows := make([]baichuanPricingRow, 0, len(chunks))
|
||||
for idx, chunk := range chunks {
|
||||
chunk = strings.TrimSpace(chunk)
|
||||
if chunk == "" {
|
||||
continue
|
||||
}
|
||||
chunk = "模型调用 " + chunk
|
||||
if strings.Contains(chunk, "Baichuan-Text-Embedding") {
|
||||
continue
|
||||
}
|
||||
meta := baichuanModelContextPattern.FindStringSubmatch(chunk)
|
||||
if len(meta) != 3 {
|
||||
continue
|
||||
}
|
||||
modelName := strings.TrimSpace(meta[1])
|
||||
contextLength := parseContextLengthCommon(meta[2])
|
||||
if contextLength == 0 {
|
||||
continue
|
||||
}
|
||||
row := baichuanPricingRow{Index: idx, ModelName: modelName, ContextLength: contextLength}
|
||||
if pair := baichuanPairPricePattern.FindStringSubmatch(chunk); len(pair) == 3 {
|
||||
row.InputPrice = baichuanPerKTokenToPerMToken(pair[1])
|
||||
row.OutputPrice = baichuanPerKTokenToPerMToken(pair[2])
|
||||
} else if flat := baichuanFlatPricePattern.FindStringSubmatch(chunk); len(flat) == 2 {
|
||||
price := baichuanPerKTokenToPerMToken(flat[1])
|
||||
row.InputPrice = price
|
||||
row.OutputPrice = price
|
||||
} else {
|
||||
continue
|
||||
}
|
||||
rows = append(rows, row)
|
||||
}
|
||||
if len(rows) == 0 {
|
||||
return nil, fmt.Errorf("unexpected baichuan pricing content: no model rows parsed")
|
||||
}
|
||||
sort.Slice(rows, func(i, j int) bool { return rows[i].Index < rows[j].Index })
|
||||
|
||||
providerNameCn, providerCountry, providerWebsite := providerMetadata("Baichuan")
|
||||
records := make([]officialPricingRecord, 0, len(rows))
|
||||
for _, row := range rows {
|
||||
records = append(records, officialPricingRecord{
|
||||
ModelID: normalizeExternalID("baichuan", row.ModelName),
|
||||
ModelName: row.ModelName,
|
||||
ProviderName: "Baichuan",
|
||||
ProviderNameCn: providerNameCn,
|
||||
ProviderCountry: providerCountry,
|
||||
ProviderWebsite: providerWebsite,
|
||||
OperatorName: "Baichuan API",
|
||||
OperatorNameCn: "百川开放平台",
|
||||
OperatorCountry: "CN",
|
||||
OperatorWebsite: "https://platform.baichuan-ai.com/docs",
|
||||
OperatorType: "official",
|
||||
Region: "CN",
|
||||
Currency: "CNY",
|
||||
InputPrice: row.InputPrice,
|
||||
OutputPrice: row.OutputPrice,
|
||||
ContextLength: row.ContextLength,
|
||||
SourceURL: defaultBaichuanPricingURL,
|
||||
ModelSourceURL: defaultBaichuanPricingURL,
|
||||
DateConfidence: "unknown",
|
||||
DateSourceKind: "official_pricing",
|
||||
Modality: detectModality(row.ModelName),
|
||||
})
|
||||
}
|
||||
return records, nil
|
||||
}
|
||||
|
||||
func baichuanPerKTokenToPerMToken(raw string) float64 {
|
||||
return mustParseSubscriptionPrice(raw) * 1000
|
||||
}
|
||||
64
scripts/import_baichuan_pricing_test.go
Normal file
64
scripts/import_baichuan_pricing_test.go
Normal file
@@ -0,0 +1,64 @@
|
||||
//go:build llm_script
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestParseBaichuanPricingCatalogBuildsRecords(t *testing.T) {
|
||||
raw, err := os.ReadFile(filepath.Join("testdata", "baichuan_pricing_sample.txt"))
|
||||
if err != nil {
|
||||
t.Fatalf("读取 fixture 失败: %v", err)
|
||||
}
|
||||
|
||||
records, err := parseBaichuanPricingCatalog(string(raw))
|
||||
if err != nil {
|
||||
t.Fatalf("parseBaichuanPricingCatalog 返回错误: %v", err)
|
||||
}
|
||||
if len(records) != 11 {
|
||||
t.Fatalf("期望 11 条百川价格记录,实际 %d", len(records))
|
||||
}
|
||||
if records[0].ModelID != "baichuan-baichuan-m3-plus" {
|
||||
t.Fatalf("首条 modelID 错误: %q", records[0].ModelID)
|
||||
}
|
||||
if records[0].InputPrice != 5 || records[0].OutputPrice != 9 {
|
||||
t.Fatalf("Baichuan-M3-Plus 定价错误: %v / %v", records[0].InputPrice, records[0].OutputPrice)
|
||||
}
|
||||
if records[4].InputPrice != 15 || records[4].OutputPrice != 15 {
|
||||
t.Fatalf("Baichuan4-Turbo blended 定价错误: %v / %v", records[4].InputPrice, records[4].OutputPrice)
|
||||
}
|
||||
if records[8].ContextLength != 128000 {
|
||||
t.Fatalf("Baichuan3-Turbo-128k context 错误: %d", records[8].ContextLength)
|
||||
}
|
||||
if records[10].InputPrice != 10 || records[10].OutputPrice != 10 {
|
||||
t.Fatalf("Baichuan2-53B 基线定价错误: %v / %v", records[10].InputPrice, records[10].OutputPrice)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunBaichuanPricingImportDryRunPrintsSummary(t *testing.T) {
|
||||
var out bytes.Buffer
|
||||
err := runBaichuanPricingImport(baichuanPricingImportConfig{
|
||||
URL: defaultBaichuanPricingURL,
|
||||
Fixture: filepath.Join("testdata", "baichuan_pricing_sample.txt"),
|
||||
DryRun: true,
|
||||
}, nil, &out)
|
||||
if err != nil {
|
||||
t.Fatalf("runBaichuanPricingImport 返回错误: %v", err)
|
||||
}
|
||||
output := out.String()
|
||||
for _, want := range []string{
|
||||
"source=baichuan-pricing-import",
|
||||
"models=11",
|
||||
"operator=Baichuan API",
|
||||
"dry_run=true",
|
||||
} {
|
||||
if !strings.Contains(output, want) {
|
||||
t.Fatalf("输出缺少 %q,实际: %q", want, output)
|
||||
}
|
||||
}
|
||||
}
|
||||
160
scripts/import_lingyiwanwu_pricing.go
Normal file
160
scripts/import_lingyiwanwu_pricing.go
Normal file
@@ -0,0 +1,160 @@
|
||||
//go:build llm_script
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"database/sql"
|
||||
"flag"
|
||||
"fmt"
|
||||
"html"
|
||||
"io"
|
||||
"net/http"
|
||||
"os"
|
||||
"regexp"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
const defaultLingyiwanwuPricingURL = "https://platform.lingyiwanwu.com/docs"
|
||||
|
||||
type lingyiwanwuPricingImportConfig struct {
|
||||
URL string
|
||||
Fixture string
|
||||
DryRun bool
|
||||
Timeout time.Duration
|
||||
}
|
||||
|
||||
var lingyiwanwuPricingRowPattern = regexp.MustCompile(`(?s)"children":"(yi-[a-z0-9-]+)"\}\],\["\$","td",null,\{"children":"([0-9]+K)"\}.*?"children":"¥([0-9]+(?:\.[0-9]+)?)"`)
|
||||
|
||||
func main() {
|
||||
loadSubscriptionImportEnv()
|
||||
|
||||
var url string
|
||||
var fixture string
|
||||
var dryRun bool
|
||||
var timeoutSeconds int
|
||||
|
||||
flag.StringVar(&url, "url", defaultLingyiwanwuPricingURL, "零一万物官方价格页")
|
||||
flag.StringVar(&fixture, "fixture", "", "零一万物价格样例文件")
|
||||
flag.BoolVar(&dryRun, "dry-run", false, "仅解析并打印摘要,不写入数据库")
|
||||
flag.IntVar(&timeoutSeconds, "timeout", 20, "请求超时(秒)")
|
||||
flag.Parse()
|
||||
|
||||
cfg := lingyiwanwuPricingImportConfig{URL: url, Fixture: fixture, DryRun: dryRun, Timeout: time.Duration(timeoutSeconds) * time.Second}
|
||||
|
||||
var db *sql.DB
|
||||
var err error
|
||||
if !cfg.DryRun {
|
||||
db, err = subscriptionImportDB()
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "open db: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
defer db.Close()
|
||||
}
|
||||
|
||||
if err := runLingyiwanwuPricingImport(cfg, db, os.Stdout); err != nil {
|
||||
fmt.Fprintf(os.Stderr, "import_lingyiwanwu_pricing: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
func runLingyiwanwuPricingImport(cfg lingyiwanwuPricingImportConfig, db *sql.DB, out io.Writer) error {
|
||||
client := &http.Client{Timeout: cfg.Timeout}
|
||||
raw, err := fetchRawPricingPage(cfg.URL, cfg.Fixture, client)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
records, err := parseLingyiwanwuPricingCatalog(raw)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
records = dedupeOfficialPricingRecords(records)
|
||||
if len(records) == 0 {
|
||||
return fmt.Errorf("unexpected lingyiwanwu pricing content: no records")
|
||||
}
|
||||
if cfg.DryRun {
|
||||
_, err = fmt.Fprintf(out, "source=lingyiwanwu-pricing-import models=%d operator=%s dry_run=true\n", len(records), records[0].OperatorName)
|
||||
return err
|
||||
}
|
||||
if db == nil {
|
||||
return fmt.Errorf("db is required when dry-run=false")
|
||||
}
|
||||
if err := upsertOfficialPricingRecords(db, records, "lingyiwanwu-pricing-import"); err != nil {
|
||||
return err
|
||||
}
|
||||
var tableRows int
|
||||
if err := db.QueryRow(`SELECT COUNT(*) FROM region_pricing`).Scan(&tableRows); err != nil {
|
||||
return fmt.Errorf("count region_pricing: %w", err)
|
||||
}
|
||||
_, err = fmt.Fprintf(out, "source=lingyiwanwu-pricing-import models=%d operator=%s table_rows=%d dry_run=false\n", len(records), records[0].OperatorName, tableRows)
|
||||
return err
|
||||
}
|
||||
|
||||
func parseLingyiwanwuPricingCatalog(raw string) ([]officialPricingRecord, error) {
|
||||
payload := lingyiwanwuPricingPayload(raw)
|
||||
sectionStart := strings.Index(payload, "模型与计费")
|
||||
if sectionStart == -1 {
|
||||
return nil, fmt.Errorf("unexpected lingyiwanwu pricing content: missing 模型与计费")
|
||||
}
|
||||
payload = payload[sectionStart:]
|
||||
sectionEnd := strings.Index(payload, "关于计费")
|
||||
if sectionEnd == -1 {
|
||||
return nil, fmt.Errorf("unexpected lingyiwanwu pricing content: missing 关于计费")
|
||||
}
|
||||
section := payload[:sectionEnd]
|
||||
matches := lingyiwanwuPricingRowPattern.FindAllStringSubmatch(section, -1)
|
||||
if len(matches) == 0 {
|
||||
return nil, fmt.Errorf("unexpected lingyiwanwu pricing content: no model rows parsed")
|
||||
}
|
||||
|
||||
providerNameCn, providerCountry, providerWebsite := providerMetadata("Yi")
|
||||
records := make([]officialPricingRecord, 0, len(matches))
|
||||
for _, match := range matches {
|
||||
if len(match) != 4 {
|
||||
continue
|
||||
}
|
||||
modelName := strings.TrimSpace(match[1])
|
||||
contextLength := parseContextLengthCommon(match[2])
|
||||
price := mustParseSubscriptionPrice(match[3])
|
||||
records = append(records, officialPricingRecord{
|
||||
ModelID: normalizeExternalID("yi", modelName),
|
||||
ModelName: modelName,
|
||||
ProviderName: "Yi",
|
||||
ProviderNameCn: providerNameCn,
|
||||
ProviderCountry: providerCountry,
|
||||
ProviderWebsite: providerWebsite,
|
||||
OperatorName: "01.AI API",
|
||||
OperatorNameCn: "零一万物开放平台",
|
||||
OperatorCountry: "CN",
|
||||
OperatorWebsite: defaultLingyiwanwuPricingURL,
|
||||
OperatorType: "official",
|
||||
Region: "CN",
|
||||
Currency: "CNY",
|
||||
InputPrice: price,
|
||||
OutputPrice: price,
|
||||
ContextLength: contextLength,
|
||||
SourceURL: defaultLingyiwanwuPricingURL,
|
||||
ModelSourceURL: defaultLingyiwanwuPricingURL,
|
||||
DateConfidence: "unknown",
|
||||
DateSourceKind: "official_pricing",
|
||||
Modality: detectModality(modelName),
|
||||
})
|
||||
}
|
||||
if len(records) == 0 {
|
||||
return nil, fmt.Errorf("unexpected lingyiwanwu pricing content: empty records after parse")
|
||||
}
|
||||
return records, nil
|
||||
}
|
||||
|
||||
func lingyiwanwuPricingPayload(raw string) string {
|
||||
text := html.UnescapeString(raw)
|
||||
text = strings.ReplaceAll(text, `\u003c`, "<")
|
||||
text = strings.ReplaceAll(text, `\u003e`, ">")
|
||||
text = strings.ReplaceAll(text, `\n`, "\n")
|
||||
text = strings.ReplaceAll(text, `\t`, " ")
|
||||
text = strings.ReplaceAll(text, `\"`, `"`)
|
||||
text = regexp.MustCompile(`(?is)<[^>]+>`).ReplaceAllString(text, " ")
|
||||
text = regexp.MustCompile(`[ \t]+`).ReplaceAllString(text, " ")
|
||||
return strings.TrimSpace(text)
|
||||
}
|
||||
64
scripts/import_lingyiwanwu_pricing_test.go
Normal file
64
scripts/import_lingyiwanwu_pricing_test.go
Normal file
@@ -0,0 +1,64 @@
|
||||
//go:build llm_script
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestParseLingyiwanwuPricingCatalogBuildsRecords(t *testing.T) {
|
||||
raw, err := os.ReadFile(filepath.Join("testdata", "lingyiwanwu_pricing_sample.txt"))
|
||||
if err != nil {
|
||||
t.Fatalf("读取 fixture 失败: %v", err)
|
||||
}
|
||||
|
||||
records, err := parseLingyiwanwuPricingCatalog(string(raw))
|
||||
if err != nil {
|
||||
t.Fatalf("parseLingyiwanwuPricingCatalog 返回错误: %v", err)
|
||||
}
|
||||
if len(records) != 2 {
|
||||
t.Fatalf("期望 2 条零一万物价格记录,实际 %d", len(records))
|
||||
}
|
||||
if records[0].ModelID != "yi-yi-lightning" {
|
||||
t.Fatalf("首条 modelID 错误: %q", records[0].ModelID)
|
||||
}
|
||||
if records[0].InputPrice != 0.99 || records[0].OutputPrice != 0.99 {
|
||||
t.Fatalf("yi-lightning 定价错误: %v / %v", records[0].InputPrice, records[0].OutputPrice)
|
||||
}
|
||||
if records[1].ContextLength != 16000 {
|
||||
t.Fatalf("yi-vision-v2 context 错误: %d", records[1].ContextLength)
|
||||
}
|
||||
if records[1].Modality != "multimodal" {
|
||||
t.Fatalf("yi-vision-v2 modality 错误: %q", records[1].Modality)
|
||||
}
|
||||
if records[1].InputPrice != 6 || records[1].OutputPrice != 6 {
|
||||
t.Fatalf("yi-vision-v2 定价错误: %v / %v", records[1].InputPrice, records[1].OutputPrice)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunLingyiwanwuPricingImportDryRunPrintsSummary(t *testing.T) {
|
||||
var out bytes.Buffer
|
||||
err := runLingyiwanwuPricingImport(lingyiwanwuPricingImportConfig{
|
||||
URL: defaultLingyiwanwuPricingURL,
|
||||
Fixture: filepath.Join("testdata", "lingyiwanwu_pricing_sample.txt"),
|
||||
DryRun: true,
|
||||
}, nil, &out)
|
||||
if err != nil {
|
||||
t.Fatalf("runLingyiwanwuPricingImport 返回错误: %v", err)
|
||||
}
|
||||
output := out.String()
|
||||
for _, want := range []string{
|
||||
"source=lingyiwanwu-pricing-import",
|
||||
"models=2",
|
||||
"operator=01.AI API",
|
||||
"dry_run=true",
|
||||
} {
|
||||
if !strings.Contains(output, want) {
|
||||
t.Fatalf("输出缺少 %q,实际: %q", want, output)
|
||||
}
|
||||
}
|
||||
}
|
||||
378
scripts/import_sensenova_pricing.go
Normal file
378
scripts/import_sensenova_pricing.go
Normal file
@@ -0,0 +1,378 @@
|
||||
//go:build llm_script
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"flag"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"os"
|
||||
"os/exec"
|
||||
"regexp"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
const (
|
||||
defaultSensenovaDocsURL = "https://platform.sensenova.cn/docs"
|
||||
defaultSensenovaModelsURL = "https://www.sensenova.cn/models"
|
||||
)
|
||||
|
||||
type sensenovaPricingImportConfig struct {
|
||||
DocsURL string
|
||||
ModelsURL string
|
||||
Fixture string
|
||||
DryRun bool
|
||||
Timeout time.Duration
|
||||
}
|
||||
|
||||
type sensenovaPricingFixture struct {
|
||||
DocsHTML string
|
||||
ModelsText string
|
||||
}
|
||||
|
||||
type sensenovaPricingDocModel struct {
|
||||
ModelName string
|
||||
ModelID string
|
||||
QuotaPer5Hour int
|
||||
}
|
||||
|
||||
var (
|
||||
sensenovaFixtureSplitMarker = "\n===SENSENOVA_MODELS_BUNDLE===\n"
|
||||
sensenovaOverviewCardPattern = regexp.MustCompile(`(?s)<h4[^>]*>([^<]+)</h4>.*?调用次数限制</p><p[^>]*>每5小时([0-9]+)次</p>.*?MODEL ID</p><code[^>]*>([^<]+)</code>`)
|
||||
sensenovaModelsScriptPattern = regexp.MustCompile(`src="([^"]+/_next/static/chunks/[^"]+\.js|/_next/static/chunks/[^"]+\.js)"`)
|
||||
sensenovaPricingZeroPattern = regexp.MustCompile(`(?s)"pricing"\s*:\s*\{\s*"prompt"\s*:\s*"0"\s*,\s*"completion"\s*:\s*"0"\s*,\s*"image"\s*:\s*"0"\s*,\s*"request"\s*:\s*"0"`)
|
||||
)
|
||||
|
||||
func main() {
|
||||
loadSubscriptionImportEnv()
|
||||
|
||||
var docsURL string
|
||||
var modelsURL string
|
||||
var fixture string
|
||||
var dryRun bool
|
||||
var timeoutSeconds int
|
||||
|
||||
flag.StringVar(&docsURL, "docs-url", defaultSensenovaDocsURL, "商汤 SenseNova API 文档页")
|
||||
flag.StringVar(&modelsURL, "models-url", defaultSensenovaModelsURL, "商汤 SenseNova 模型页")
|
||||
flag.StringVar(&fixture, "fixture", "", "商汤 SenseNova 价格样例文件")
|
||||
flag.BoolVar(&dryRun, "dry-run", false, "仅解析并打印摘要,不写入数据库")
|
||||
flag.IntVar(&timeoutSeconds, "timeout", 45, "请求超时(秒)")
|
||||
flag.Parse()
|
||||
|
||||
cfg := sensenovaPricingImportConfig{
|
||||
DocsURL: docsURL,
|
||||
ModelsURL: modelsURL,
|
||||
Fixture: fixture,
|
||||
DryRun: dryRun,
|
||||
Timeout: time.Duration(timeoutSeconds) * time.Second,
|
||||
}
|
||||
|
||||
var db *sql.DB
|
||||
var err error
|
||||
if !cfg.DryRun {
|
||||
db, err = subscriptionImportDB()
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "open db: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
defer db.Close()
|
||||
}
|
||||
|
||||
if err := runSensenovaPricingImport(cfg, db, os.Stdout); err != nil {
|
||||
fmt.Fprintf(os.Stderr, "import_sensenova_pricing: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
func runSensenovaPricingImport(cfg sensenovaPricingImportConfig, db *sql.DB, out io.Writer) error {
|
||||
fixture, err := fetchSensenovaPricingFixture(cfg)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
records, err := parseSensenovaPricingCatalog(fixture)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
records = dedupeOfficialPricingRecords(records)
|
||||
if len(records) == 0 {
|
||||
return fmt.Errorf("unexpected sensenova pricing content: no records")
|
||||
}
|
||||
if cfg.DryRun {
|
||||
_, err = fmt.Fprintf(out, "source=sensenova-pricing-import models=%d operator=%s dry_run=true\n", len(records), records[0].OperatorName)
|
||||
return err
|
||||
}
|
||||
if db == nil {
|
||||
return fmt.Errorf("db is required when dry-run=false")
|
||||
}
|
||||
if err := upsertOfficialPricingRecords(db, records, "sensenova-pricing-import"); err != nil {
|
||||
return err
|
||||
}
|
||||
var tableRows int
|
||||
if err := db.QueryRow(`SELECT COUNT(*) FROM region_pricing`).Scan(&tableRows); err != nil {
|
||||
return fmt.Errorf("count region_pricing: %w", err)
|
||||
}
|
||||
_, err = fmt.Fprintf(out, "source=sensenova-pricing-import models=%d operator=%s table_rows=%d dry_run=false\n", len(records), records[0].OperatorName, tableRows)
|
||||
return err
|
||||
}
|
||||
|
||||
func fetchSensenovaPricingFixture(cfg sensenovaPricingImportConfig) (sensenovaPricingFixture, error) {
|
||||
if strings.TrimSpace(cfg.Fixture) != "" {
|
||||
data, err := os.ReadFile(cfg.Fixture)
|
||||
if err != nil {
|
||||
return sensenovaPricingFixture{}, fmt.Errorf("read fixture %s: %w", cfg.Fixture, err)
|
||||
}
|
||||
return splitSensenovaFixture(string(data))
|
||||
}
|
||||
|
||||
docsHTML, err := fetchRenderedPricingPageWithChromium(cfg.DocsURL, cfg.Timeout)
|
||||
if err != nil {
|
||||
return sensenovaPricingFixture{}, fmt.Errorf("fetch docs render: %w", err)
|
||||
}
|
||||
modelsText, err := fetchSensenovaModelsBundle(cfg.ModelsURL, cfg.Timeout)
|
||||
if err != nil {
|
||||
return sensenovaPricingFixture{}, err
|
||||
}
|
||||
return sensenovaPricingFixture{DocsHTML: docsHTML, ModelsText: modelsText}, nil
|
||||
}
|
||||
|
||||
func splitSensenovaFixture(raw string) (sensenovaPricingFixture, error) {
|
||||
parts := strings.SplitN(raw, sensenovaFixtureSplitMarker, 2)
|
||||
if len(parts) != 2 {
|
||||
return sensenovaPricingFixture{}, fmt.Errorf("unexpected sensenova fixture: missing models bundle marker")
|
||||
}
|
||||
docsHTML := strings.TrimSpace(parts[0])
|
||||
modelsText := strings.TrimSpace(parts[1])
|
||||
if docsHTML == "" || modelsText == "" {
|
||||
return sensenovaPricingFixture{}, fmt.Errorf("unexpected sensenova fixture: empty docs or models segment")
|
||||
}
|
||||
return sensenovaPricingFixture{DocsHTML: docsHTML, ModelsText: modelsText}, nil
|
||||
}
|
||||
|
||||
func fetchSensenovaModelsBundle(modelsURL string, timeout time.Duration) (string, error) {
|
||||
client := &http.Client{Timeout: timeout}
|
||||
html, err := fetchRawPricingPage(modelsURL, "", client)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("fetch models page shell: %w", err)
|
||||
}
|
||||
scripts := sensenovaModelsScriptPattern.FindAllStringSubmatch(html, -1)
|
||||
if len(scripts) == 0 {
|
||||
return "", fmt.Errorf("unexpected sensenova models page: no chunk scripts found")
|
||||
}
|
||||
seen := make(map[string]struct{}, len(scripts))
|
||||
for _, match := range scripts {
|
||||
if len(match) != 2 {
|
||||
continue
|
||||
}
|
||||
scriptURL, err := resolveSensenovaAssetURL(modelsURL, match[1])
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
if _, ok := seen[scriptURL]; ok {
|
||||
continue
|
||||
}
|
||||
seen[scriptURL] = struct{}{}
|
||||
bundle, err := fetchRawPricingPage(scriptURL, "", client)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
if sensenovaBundleConfirmsFreeBeta(bundle) {
|
||||
return bundle, nil
|
||||
}
|
||||
}
|
||||
return "", fmt.Errorf("unexpected sensenova models page: free-beta bundle not found")
|
||||
}
|
||||
|
||||
func resolveSensenovaAssetURL(baseURL string, assetPath string) (string, error) {
|
||||
parsedBase, err := url.Parse(baseURL)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
asset, err := url.Parse(assetPath)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return parsedBase.ResolveReference(asset).String(), nil
|
||||
}
|
||||
|
||||
func sensenovaBundleConfirmsFreeBeta(raw string) bool {
|
||||
hasFree := strings.Contains(raw, "公测期完全免费开放") || strings.Contains(raw, "free during public beta")
|
||||
hasAllModels := strings.Contains(raw, "所有模型完全开放") || strings.Contains(raw, "all models included")
|
||||
return hasFree && hasAllModels
|
||||
}
|
||||
|
||||
func fetchRenderedPricingPageWithChromium(pageURL string, timeout time.Duration) (string, error) {
|
||||
browserPath, err := lookupChromiumBinaryForSensenova()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
ctx, cancel := context.WithTimeout(context.Background(), timeout)
|
||||
defer cancel()
|
||||
cmd := exec.CommandContext(ctx, browserPath,
|
||||
"--headless",
|
||||
"--no-sandbox",
|
||||
"--disable-gpu",
|
||||
"--virtual-time-budget=8000",
|
||||
"--dump-dom",
|
||||
pageURL,
|
||||
)
|
||||
cmd.Stderr = io.Discard
|
||||
out, err := cmd.Output()
|
||||
if ctx.Err() == context.DeadlineExceeded {
|
||||
return "", fmt.Errorf("chromium render timeout after %s", timeout)
|
||||
}
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("chromium dump-dom: %w", err)
|
||||
}
|
||||
if len(out) == 0 {
|
||||
return "", fmt.Errorf("chromium dump-dom returned empty output")
|
||||
}
|
||||
return string(out), nil
|
||||
}
|
||||
|
||||
func lookupChromiumBinaryForSensenova() (string, error) {
|
||||
for _, name := range []string{"chromium", "chromium-browser", "google-chrome", "google-chrome-stable"} {
|
||||
if path, err := exec.LookPath(name); err == nil {
|
||||
return path, nil
|
||||
}
|
||||
}
|
||||
return "", fmt.Errorf("no chromium-compatible browser found in PATH")
|
||||
}
|
||||
|
||||
func parseSensenovaPricingCatalog(fixture sensenovaPricingFixture) ([]officialPricingRecord, error) {
|
||||
if !sensenovaBundleConfirmsFreeBeta(fixture.ModelsText) {
|
||||
return nil, fmt.Errorf("unexpected sensenova models bundle: missing public-beta free signal")
|
||||
}
|
||||
if !strings.Contains(fixture.DocsHTML, "GET https://token.sensenova.cn/v1/models") {
|
||||
return nil, fmt.Errorf("unexpected sensenova docs content: missing list models endpoint")
|
||||
}
|
||||
if !sensenovaPricingZeroPattern.MatchString(fixture.DocsHTML) {
|
||||
return nil, fmt.Errorf("unexpected sensenova docs content: missing zero pricing object example")
|
||||
}
|
||||
|
||||
matches := sensenovaOverviewCardPattern.FindAllStringSubmatch(fixture.DocsHTML, -1)
|
||||
if len(matches) == 0 {
|
||||
return nil, fmt.Errorf("unexpected sensenova docs content: no model overview cards parsed")
|
||||
}
|
||||
|
||||
providerNameCn, providerCountry, providerWebsite := providerMetadata("SenseTime")
|
||||
records := make([]officialPricingRecord, 0, len(matches))
|
||||
seenModelIDs := make(map[string]struct{}, len(matches))
|
||||
for _, match := range matches {
|
||||
if len(match) != 4 {
|
||||
continue
|
||||
}
|
||||
modelName := strings.TrimSpace(match[1])
|
||||
modelID := strings.TrimSpace(match[3])
|
||||
if modelName == "" || modelID == "" {
|
||||
continue
|
||||
}
|
||||
if _, ok := seenModelIDs[modelID]; ok {
|
||||
continue
|
||||
}
|
||||
seenModelIDs[modelID] = struct{}{}
|
||||
sectionID := sensenovaSectionIDForModel(modelID)
|
||||
section, err := extractHTMLSectionByID(fixture.DocsHTML, sectionID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
providerName := sensenovaProviderName(modelID)
|
||||
providerCn, providerCountryCode, providerSite := providerNameCn, providerCountry, providerWebsite
|
||||
if providerName != "SenseTime" {
|
||||
providerCn, providerCountryCode, providerSite = providerMetadata(providerName)
|
||||
}
|
||||
records = append(records, officialPricingRecord{
|
||||
ModelID: normalizeExternalID("sensenova", modelID),
|
||||
ModelName: modelName,
|
||||
ProviderName: providerName,
|
||||
ProviderNameCn: providerCn,
|
||||
ProviderCountry: providerCountryCode,
|
||||
ProviderWebsite: providerSite,
|
||||
OperatorName: "SenseNova API",
|
||||
OperatorNameCn: "日日新开放平台",
|
||||
OperatorCountry: "CN",
|
||||
OperatorWebsite: defaultSensenovaDocsURL,
|
||||
OperatorType: "official",
|
||||
Region: "CN",
|
||||
Currency: "CNY",
|
||||
InputPrice: 0,
|
||||
OutputPrice: 0,
|
||||
IsFree: true,
|
||||
ContextLength: sensenovaContextLength(modelID, section),
|
||||
SourceURL: defaultSensenovaDocsURL,
|
||||
ModelSourceURL: firstNonEmptyText(defaultSensenovaDocsURL+"#"+sectionID, defaultSensenovaDocsURL),
|
||||
DateConfidence: "unknown",
|
||||
DateSourceKind: "official_pricing",
|
||||
Modality: sensenovaModality(modelID, section),
|
||||
})
|
||||
}
|
||||
if len(records) == 0 {
|
||||
return nil, fmt.Errorf("unexpected sensenova pricing content: empty records after parse")
|
||||
}
|
||||
return records, nil
|
||||
}
|
||||
|
||||
func extractHTMLSectionByID(raw string, sectionID string) (string, error) {
|
||||
marker := fmt.Sprintf(`<section id="%s"`, sectionID)
|
||||
start := strings.Index(raw, marker)
|
||||
if start == -1 {
|
||||
return "", fmt.Errorf("unexpected sensenova docs content: missing section %s", sectionID)
|
||||
}
|
||||
remaining := raw[start:]
|
||||
next := strings.Index(remaining[len(marker):], "<section id=")
|
||||
if next == -1 {
|
||||
return remaining, nil
|
||||
}
|
||||
return remaining[:len(marker)+next], nil
|
||||
}
|
||||
|
||||
func sensenovaSectionIDForModel(modelID string) string {
|
||||
switch modelID {
|
||||
case "sensenova-6.7-flash-lite":
|
||||
return "model-flash"
|
||||
case "sensenova-u1-fast":
|
||||
return "model-u1"
|
||||
case "deepseek-v4-flash":
|
||||
return "model-deepseek-v4-flash"
|
||||
default:
|
||||
return ""
|
||||
}
|
||||
}
|
||||
|
||||
func sensenovaProviderName(modelID string) string {
|
||||
if strings.HasPrefix(strings.ToLower(strings.TrimSpace(modelID)), "deepseek") {
|
||||
return "DeepSeek"
|
||||
}
|
||||
return "SenseTime"
|
||||
}
|
||||
|
||||
func sensenovaContextLength(modelID string, section string) int {
|
||||
switch modelID {
|
||||
case "sensenova-6.7-flash-lite", "deepseek-v4-flash":
|
||||
if strings.Contains(section, "上下文长度 256K tokens") || strings.Contains(section, "256K 上下文") {
|
||||
return 256 * 1024
|
||||
}
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func sensenovaModality(modelID string, section string) string {
|
||||
switch modelID {
|
||||
case "sensenova-u1-fast":
|
||||
if strings.Contains(section, "/v1/images/generations") {
|
||||
return "image"
|
||||
}
|
||||
return "multimodal"
|
||||
case "sensenova-6.7-flash-lite":
|
||||
if strings.Contains(section, "图像输入理解") {
|
||||
return "multimodal"
|
||||
}
|
||||
return "text"
|
||||
default:
|
||||
return "text"
|
||||
}
|
||||
}
|
||||
69
scripts/import_sensenova_pricing_test.go
Normal file
69
scripts/import_sensenova_pricing_test.go
Normal file
@@ -0,0 +1,69 @@
|
||||
//go:build llm_script
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestParseSensenovaPricingCatalogBuildsRecords(t *testing.T) {
|
||||
raw, err := os.ReadFile(filepath.Join("testdata", "sensenova_pricing_sample.txt"))
|
||||
if err != nil {
|
||||
t.Fatalf("读取 fixture 失败: %v", err)
|
||||
}
|
||||
fixture, err := splitSensenovaFixture(string(raw))
|
||||
if err != nil {
|
||||
t.Fatalf("splitSensenovaFixture 返回错误: %v", err)
|
||||
}
|
||||
|
||||
records, err := parseSensenovaPricingCatalog(fixture)
|
||||
if err != nil {
|
||||
t.Fatalf("parseSensenovaPricingCatalog 返回错误: %v", err)
|
||||
}
|
||||
if len(records) != 3 {
|
||||
t.Fatalf("期望 3 条商汤价格记录,实际 %d", len(records))
|
||||
}
|
||||
if records[0].ModelID != "sensenova-sensenova-6-7-flash-lite" {
|
||||
t.Fatalf("首条 modelID 错误: %q", records[0].ModelID)
|
||||
}
|
||||
if !records[0].IsFree || records[0].InputPrice != 0 || records[0].OutputPrice != 0 {
|
||||
t.Fatalf("Flash-Lite 免费定价错误: free=%v input=%v output=%v", records[0].IsFree, records[0].InputPrice, records[0].OutputPrice)
|
||||
}
|
||||
if records[0].ContextLength != 262144 || records[0].Modality != "multimodal" {
|
||||
t.Fatalf("Flash-Lite 上下文/模态错误: ctx=%d modality=%q", records[0].ContextLength, records[0].Modality)
|
||||
}
|
||||
if records[1].ModelID != "sensenova-sensenova-u1-fast" || records[1].Modality != "image" {
|
||||
t.Fatalf("U1 Fast 记录错误: %+v", records[1])
|
||||
}
|
||||
if records[2].ProviderName != "DeepSeek" || records[2].ContextLength != 262144 {
|
||||
t.Fatalf("DeepSeek V4 Flash provider/context 错误: provider=%q ctx=%d", records[2].ProviderName, records[2].ContextLength)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunSensenovaPricingImportDryRunPrintsSummary(t *testing.T) {
|
||||
var out bytes.Buffer
|
||||
err := runSensenovaPricingImport(sensenovaPricingImportConfig{
|
||||
DocsURL: defaultSensenovaDocsURL,
|
||||
ModelsURL: defaultSensenovaModelsURL,
|
||||
Fixture: filepath.Join("testdata", "sensenova_pricing_sample.txt"),
|
||||
DryRun: true,
|
||||
}, nil, &out)
|
||||
if err != nil {
|
||||
t.Fatalf("runSensenovaPricingImport 返回错误: %v", err)
|
||||
}
|
||||
output := out.String()
|
||||
for _, want := range []string{
|
||||
"source=sensenova-pricing-import",
|
||||
"models=3",
|
||||
"operator=SenseNova API",
|
||||
"dry_run=true",
|
||||
} {
|
||||
if !strings.Contains(output, want) {
|
||||
t.Fatalf("输出缺少 %q,实际: %q", want, output)
|
||||
}
|
||||
}
|
||||
}
|
||||
217
scripts/import_xfyun_pricing.go
Normal file
217
scripts/import_xfyun_pricing.go
Normal file
@@ -0,0 +1,217 @@
|
||||
//go:build llm_script
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"flag"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"os"
|
||||
"os/exec"
|
||||
"regexp"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
const defaultXfyunPricingURL = "https://xinghuo.xfyun.cn/sparkapi?scr=price"
|
||||
|
||||
type xfyunPricingImportConfig struct {
|
||||
URL string
|
||||
Fixture string
|
||||
DryRun bool
|
||||
Timeout time.Duration
|
||||
}
|
||||
|
||||
var xfyunPricingCardPattern = regexp.MustCompile(`(?s)<div class="apiprice_cardTitle__[^"]+">([^<]+)</div><div class="apiprice_cardPrice__[^"]+"><span>([0-9]+(?:\.[0-9]+)?)</span><span class="[^"]+">元/百万tokens</span>`)
|
||||
|
||||
func main() {
|
||||
loadSubscriptionImportEnv()
|
||||
|
||||
var url string
|
||||
var fixture string
|
||||
var dryRun bool
|
||||
var timeoutSeconds int
|
||||
|
||||
flag.StringVar(&url, "url", defaultXfyunPricingURL, "讯飞官方价格页")
|
||||
flag.StringVar(&fixture, "fixture", "", "讯飞价格样例文件")
|
||||
flag.BoolVar(&dryRun, "dry-run", false, "仅解析并打印摘要,不写入数据库")
|
||||
flag.IntVar(&timeoutSeconds, "timeout", 30, "请求超时(秒)")
|
||||
flag.Parse()
|
||||
|
||||
cfg := xfyunPricingImportConfig{URL: url, Fixture: fixture, DryRun: dryRun, Timeout: time.Duration(timeoutSeconds) * time.Second}
|
||||
|
||||
var db *sql.DB
|
||||
var err error
|
||||
if !cfg.DryRun {
|
||||
db, err = subscriptionImportDB()
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "open db: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
defer db.Close()
|
||||
}
|
||||
|
||||
if err := runXfyunPricingImport(cfg, db, os.Stdout); err != nil {
|
||||
fmt.Fprintf(os.Stderr, "import_xfyun_pricing: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
func runXfyunPricingImport(cfg xfyunPricingImportConfig, db *sql.DB, out io.Writer) error {
|
||||
raw, err := fetchXfyunPricingPage(cfg)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
records, err := parseXfyunPricingCatalog(raw)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
records = dedupeOfficialPricingRecords(records)
|
||||
if len(records) == 0 {
|
||||
return fmt.Errorf("unexpected xfyun pricing content: no records")
|
||||
}
|
||||
if cfg.DryRun {
|
||||
_, err = fmt.Fprintf(out, "source=xfyun-pricing-import models=%d operator=%s dry_run=true\n", len(records), records[0].OperatorName)
|
||||
return err
|
||||
}
|
||||
if db == nil {
|
||||
return fmt.Errorf("db is required when dry-run=false")
|
||||
}
|
||||
if err := upsertOfficialPricingRecords(db, records, "xfyun-pricing-import"); err != nil {
|
||||
return err
|
||||
}
|
||||
var tableRows int
|
||||
if err := db.QueryRow(`SELECT COUNT(*) FROM region_pricing`).Scan(&tableRows); err != nil {
|
||||
return fmt.Errorf("count region_pricing: %w", err)
|
||||
}
|
||||
_, err = fmt.Fprintf(out, "source=xfyun-pricing-import models=%d operator=%s table_rows=%d dry_run=false\n", len(records), records[0].OperatorName, tableRows)
|
||||
return err
|
||||
}
|
||||
|
||||
func fetchXfyunPricingPage(cfg xfyunPricingImportConfig) (string, error) {
|
||||
if strings.TrimSpace(cfg.Fixture) != "" {
|
||||
data, err := os.ReadFile(cfg.Fixture)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("read fixture %s: %w", cfg.Fixture, err)
|
||||
}
|
||||
return string(data), nil
|
||||
}
|
||||
|
||||
client := &http.Client{Timeout: cfg.Timeout}
|
||||
raw, err := fetchRawPricingPage(cfg.URL, "", client)
|
||||
if err == nil && strings.Contains(raw, "apiprice_cardTitle__") {
|
||||
return raw, nil
|
||||
}
|
||||
|
||||
rendered, renderErr := fetchXfyunPricingPageWithChromium(cfg.URL, cfg.Timeout)
|
||||
if renderErr != nil {
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("fetch shell failed: %v; chromium render failed: %w", err, renderErr)
|
||||
}
|
||||
return "", renderErr
|
||||
}
|
||||
return rendered, nil
|
||||
}
|
||||
|
||||
func fetchXfyunPricingPageWithChromium(url string, timeout time.Duration) (string, error) {
|
||||
browserPath, err := lookupChromiumBinary()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
ctx, cancel := context.WithTimeout(context.Background(), timeout)
|
||||
defer cancel()
|
||||
cmd := exec.CommandContext(ctx, browserPath,
|
||||
"--headless",
|
||||
"--no-sandbox",
|
||||
"--disable-gpu",
|
||||
"--dump-dom",
|
||||
url,
|
||||
)
|
||||
cmd.Stderr = io.Discard
|
||||
out, err := cmd.Output()
|
||||
if ctx.Err() == context.DeadlineExceeded {
|
||||
return "", fmt.Errorf("chromium render timeout after %s", timeout)
|
||||
}
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("chromium dump-dom: %w", err)
|
||||
}
|
||||
if len(out) == 0 {
|
||||
return "", fmt.Errorf("chromium dump-dom returned empty output")
|
||||
}
|
||||
return string(out), nil
|
||||
}
|
||||
|
||||
func lookupChromiumBinary() (string, error) {
|
||||
for _, name := range []string{"chromium", "chromium-browser", "google-chrome", "google-chrome-stable"} {
|
||||
if path, err := exec.LookPath(name); err == nil {
|
||||
return path, nil
|
||||
}
|
||||
}
|
||||
return "", fmt.Errorf("no chromium-compatible browser found in PATH")
|
||||
}
|
||||
|
||||
func parseXfyunPricingCatalog(raw string) ([]officialPricingRecord, error) {
|
||||
matches := xfyunPricingCardPattern.FindAllStringSubmatch(raw, -1)
|
||||
if len(matches) == 0 {
|
||||
return nil, fmt.Errorf("unexpected xfyun pricing content: no pricing cards found")
|
||||
}
|
||||
|
||||
providerNameCn, providerCountry, providerWebsite := providerMetadata("iFlytek")
|
||||
records := make([]officialPricingRecord, 0, len(matches))
|
||||
for _, match := range matches {
|
||||
if len(match) != 3 {
|
||||
continue
|
||||
}
|
||||
title := strings.TrimSpace(match[1])
|
||||
modelName := xfyunCanonicalModelName(title)
|
||||
if modelName == "" {
|
||||
continue
|
||||
}
|
||||
price := mustParseSubscriptionPrice(match[2])
|
||||
records = append(records, officialPricingRecord{
|
||||
ModelID: normalizeExternalID("xfyun", modelName),
|
||||
ModelName: modelName,
|
||||
ProviderName: "iFlytek",
|
||||
ProviderNameCn: providerNameCn,
|
||||
ProviderCountry: providerCountry,
|
||||
ProviderWebsite: providerWebsite,
|
||||
OperatorName: "Spark API",
|
||||
OperatorNameCn: "讯飞星火 API",
|
||||
OperatorCountry: "CN",
|
||||
OperatorWebsite: defaultXfyunPricingURL,
|
||||
OperatorType: "official",
|
||||
Region: "CN",
|
||||
Currency: "CNY",
|
||||
InputPrice: price,
|
||||
OutputPrice: price,
|
||||
IsFree: price == 0,
|
||||
SourceURL: defaultXfyunPricingURL,
|
||||
ModelSourceURL: defaultXfyunPricingURL,
|
||||
DateConfidence: "unknown",
|
||||
DateSourceKind: "official_pricing",
|
||||
Modality: "text",
|
||||
})
|
||||
}
|
||||
if len(records) == 0 {
|
||||
return nil, fmt.Errorf("unexpected xfyun pricing content: empty records after canonical mapping")
|
||||
}
|
||||
return records, nil
|
||||
}
|
||||
|
||||
func xfyunCanonicalModelName(title string) string {
|
||||
switch strings.TrimSpace(title) {
|
||||
case "X2/X1.5模型":
|
||||
return "Spark X2/X1.5"
|
||||
case "Ultra模型":
|
||||
return "Spark Ultra"
|
||||
case "Pro模型":
|
||||
return "Spark Pro"
|
||||
case "Lite模型":
|
||||
return "Spark Lite"
|
||||
default:
|
||||
return ""
|
||||
}
|
||||
}
|
||||
61
scripts/import_xfyun_pricing_test.go
Normal file
61
scripts/import_xfyun_pricing_test.go
Normal file
@@ -0,0 +1,61 @@
|
||||
//go:build llm_script
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestParseXfyunPricingCatalogBuildsRecords(t *testing.T) {
|
||||
raw, err := os.ReadFile(filepath.Join("testdata", "xfyun_pricing_sample.html"))
|
||||
if err != nil {
|
||||
t.Fatalf("读取 fixture 失败: %v", err)
|
||||
}
|
||||
|
||||
records, err := parseXfyunPricingCatalog(string(raw))
|
||||
if err != nil {
|
||||
t.Fatalf("parseXfyunPricingCatalog 返回错误: %v", err)
|
||||
}
|
||||
if len(records) != 4 {
|
||||
t.Fatalf("期望 4 条讯飞价格记录,实际 %d", len(records))
|
||||
}
|
||||
if records[0].ModelID != "xfyun-spark-x2-x1-5" {
|
||||
t.Fatalf("首条 modelID 错误: %q", records[0].ModelID)
|
||||
}
|
||||
if records[0].InputPrice != 2 || records[0].OutputPrice != 2 {
|
||||
t.Fatalf("Spark X2/X1.5 定价错误: %v / %v", records[0].InputPrice, records[0].OutputPrice)
|
||||
}
|
||||
if records[1].ModelName != "Spark Ultra" || records[1].InputPrice != 0.8 {
|
||||
t.Fatalf("Spark Ultra 解析错误: %+v", records[1])
|
||||
}
|
||||
if !records[3].IsFree || records[3].InputPrice != 0 || records[3].OutputPrice != 0 {
|
||||
t.Fatalf("Spark Lite 免费定价错误: %+v", records[3])
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunXfyunPricingImportDryRunPrintsSummary(t *testing.T) {
|
||||
var out bytes.Buffer
|
||||
err := runXfyunPricingImport(xfyunPricingImportConfig{
|
||||
URL: defaultXfyunPricingURL,
|
||||
Fixture: filepath.Join("testdata", "xfyun_pricing_sample.html"),
|
||||
DryRun: true,
|
||||
}, nil, &out)
|
||||
if err != nil {
|
||||
t.Fatalf("runXfyunPricingImport 返回错误: %v", err)
|
||||
}
|
||||
output := out.String()
|
||||
for _, want := range []string{
|
||||
"source=xfyun-pricing-import",
|
||||
"models=4",
|
||||
"operator=Spark API",
|
||||
"dry_run=true",
|
||||
} {
|
||||
if !strings.Contains(output, want) {
|
||||
t.Fatalf("输出缺少 %q,实际: %q", want, output)
|
||||
}
|
||||
}
|
||||
}
|
||||
24
scripts/testdata/baichuan_pricing_sample.txt
vendored
Normal file
24
scripts/testdata/baichuan_pricing_sample.txt
vendored
Normal file
@@ -0,0 +1,24 @@
|
||||
价格说明
|
||||
计费模式
|
||||
按照实际使用的数据量(千tokens)收费。
|
||||
通用大模型
|
||||
模型调用 Baichuan-M3-Plus 32k 00:00 ~ 24:00 输入:0.005元/千tokens
|
||||
输出:0.009元/千tokens 包括对话全流程节点产生的Token总数
|
||||
模型调用 Baichuan-M3 32k 00:00 ~ 24:00 输入:0.01元/千tokens
|
||||
输出:0.03元/千tokens
|
||||
模型调用 Baichuan-M2-Plus 32k 00:00 ~ 24:00 输入:0.01元/千tokens
|
||||
输出:0.03元/千tokens 包括对话全流程节点产生的Token总数
|
||||
模型调用 Baichuan-M2 32k 00:00 ~ 24:00 输入:0.002元/千tokens
|
||||
输出:0.02元/千tokens
|
||||
模型调用 Baichuan4-Turbo 32k 00:00 ~ 24:00 0.015元/千tokens 包含输入和输出
|
||||
模型调用 Baichuan4-Air 32k 00:00 ~ 24:00 0.00098元/千tokens 包含输入和输出
|
||||
模型调用 Baichuan4 32k 00:00 ~ 24:00 0.1元/千tokens 包含输入和输出
|
||||
模型调用 Baichuan3-Turbo 32k 00:00 ~ 24:00 0.012元/千tokens 包含输入和输出
|
||||
模型调用 Baichuan3-Turbo-128k 128k 00:00 ~ 24:00 0.024元/千tokens 包含输入和输出
|
||||
模型调用 Baichuan2-Turbo 32k 00:00 ~ 24:00 0.008元/千tokens 包含输入和输出
|
||||
模型调用 Baichuan2-53B 32k 00:00 ~ 8:00 0.01元/千tokens 包含输入和输出
|
||||
8:00 ~ 24:00 0.02元/千tokens
|
||||
搜索增强服务 - 00:00 ~ 24:00 0.03元/次 开启 web_search 后,接口自动判断调用搜索增强服务的次数
|
||||
医疗搜索 - 00:00 ~ 24:00 0.03元/次 调用Baichuan-M2-Plus对话会自动触发医疗搜索
|
||||
知识库
|
||||
模型调用 Baichuan-Text-Embedding 00:00 ~ 24:00 0.0005元/千tokens
|
||||
1
scripts/testdata/lingyiwanwu_pricing_sample.txt
vendored
Normal file
1
scripts/testdata/lingyiwanwu_pricing_sample.txt
vendored
Normal file
@@ -0,0 +1 @@
|
||||
模型与计费","children":"模型与计费"}],"\n",["$","p",null,{"children":"零一万物 API 开放平台提供一系列具有不同功能和定价的 Yi 系列大模型。"}],"\n",["$","table",null,{"children":[["$","thead",null,{"children":["$","tr",null,{"children":[["$","th",null,{"children":"模型"}],["$","th",null,{"children":"上下文长度"}],["$","th",null,{"children":"特性"}],["$","th",null,{"children":"场景"}],["$","th",null,{"children":"价格/1M token"}]]}]}],["$","tbody",null,{"children":[["$","tr",null,{"children":[["$","td",null,{"children":"yi-lightning"}],["$","td",null,{"children":"16K"}],["$","td",null,{"children":"最新高性能模型,保证高质量输出同时,推理速度大幅提升。"}],["$","td",null,{"children":"适用于实时交互,高复杂推理场景。"}],["$","td",null,{"children":"¥0.99"}]]}],["$","tr",null,{"children":[["$","td",null,{"children":"yi-vision-v2"}],["$","td",null,{"children":"16K"}],["$","td",null,{"children":"复杂视觉任务模型,提供基于多张图片的高性能理解、分析能力。"}],["$","td",null,{"children":"适合图片问答、OCR、视觉推理。"}],["$","td",null,{"children":"¥6"}]]}]]}]]}],"\n",["$","p",null,{"children":["$","strong",null,{"children":"关于计费"}]}]
|
||||
44
scripts/testdata/sensenova_pricing_sample.txt
vendored
Normal file
44
scripts/testdata/sensenova_pricing_sample.txt
vendored
Normal file
@@ -0,0 +1,44 @@
|
||||
<section id="model-overview" class="scroll-mt-doc-anchor pt-10">
|
||||
<div class="flex h-full flex-col rounded-2xl border border-slate-200/95 bg-white p-5 shadow-[0_1px_3px_rgba(15,23,42,0.06)]">
|
||||
<div class="flex flex-wrap items-start justify-between gap-2 gap-y-2"><h4 class="text-lg font-semibold tracking-tight text-slate-900">SenseNova 6.7 Flash-Lite</h4></div>
|
||||
<div class="mt-3 flex min-h-0 flex-1 flex-col"><p class="text-[15px] leading-relaxed text-slate-600">面向真实工作流的轻量多模态智能体模型,支持文本对话与图像输入理解</p><div class="mt-4 rounded-lg bg-slate-50/80 px-3 py-2.5"><p class="text-[11px] font-semibold uppercase tracking-[0.12em] text-slate-500">调用次数限制</p><p class="mt-1 text-[14px] font-medium text-slate-800">每5小时1500次</p></div></div>
|
||||
<div class="mt-[12px] flex items-start justify-between gap-3 rounded-xl bg-slate-100 px-3.5 py-2.5"><div class="min-w-0 flex-1"><p class="text-[10px] font-semibold uppercase tracking-[0.14em] text-slate-500">MODEL ID</p><code class="mt-1 block break-all font-mono text-[14px] font-semibold text-indigo-950">sensenova-6.7-flash-lite</code></div></div>
|
||||
</div>
|
||||
<div class="flex h-full flex-col rounded-2xl border border-slate-200/95 bg-white p-5 shadow-[0_1px_3px_rgba(15,23,42,0.06)]">
|
||||
<div class="flex flex-wrap items-start justify-between gap-2 gap-y-2"><h4 class="text-lg font-semibold tracking-tight text-slate-900">SenseNova U1 Fast</h4></div>
|
||||
<div class="mt-3 flex min-h-0 flex-1 flex-col"><p class="text-[15px] leading-relaxed text-slate-600">基于 SenseNova U1 的加速版本,专供信息图(Infographics)生成</p><div class="mt-4 rounded-lg bg-slate-50/80 px-3 py-2.5"><p class="text-[11px] font-semibold uppercase tracking-[0.12em] text-slate-500">调用次数限制</p><p class="mt-1 text-[14px] font-medium text-slate-800">每5小时1500次</p></div></div>
|
||||
<div class="mt-[12px] flex items-start justify-between gap-3 rounded-xl bg-slate-100 px-3.5 py-2.5"><div class="min-w-0 flex-1"><p class="text-[10px] font-semibold uppercase tracking-[0.14em] text-slate-500">MODEL ID</p><code class="mt-1 block break-all font-mono text-[14px] font-semibold text-indigo-950">sensenova-u1-fast</code></div></div>
|
||||
</div>
|
||||
<div class="flex h-full flex-col rounded-2xl border border-slate-200/95 bg-white p-5 shadow-[0_1px_3px_rgba(15,23,42,0.06)]">
|
||||
<div class="flex flex-wrap items-start justify-between gap-2 gap-y-2"><h4 class="text-lg font-semibold tracking-tight text-slate-900">DeepSeek V4 Flash</h4></div>
|
||||
<div class="mt-3 flex min-h-0 flex-1 flex-col"><p class="text-[15px] leading-relaxed text-slate-600">DeepSeek 高性能对话模型,支持思考/非思考模式、256K 上下文、工具调用</p><div class="mt-4 rounded-lg bg-slate-50/80 px-3 py-2.5"><p class="text-[11px] font-semibold uppercase tracking-[0.12em] text-slate-500">调用次数限制</p><p class="mt-1 text-[14px] font-medium text-slate-800">每5小时150次</p></div></div>
|
||||
<div class="mt-[12px] flex items-start justify-between gap-3 rounded-xl bg-slate-100 px-3.5 py-2.5"><div class="min-w-0 flex-1"><p class="text-[10px] font-semibold uppercase tracking-[0.14em] text-slate-500">MODEL ID</p><code class="mt-1 block break-all font-mono text-[14px] font-semibold text-indigo-950">deepseek-v4-flash</code></div></div>
|
||||
</div>
|
||||
</section>
|
||||
<section id="model-flash" class="scroll-mt-doc-anchor pt-10"><h2 class="text-2xl font-bold text-slate-900">SenseNova 6.7 Flash-Lite</h2><p class="mt-3 text-slate-600">面向真实工作流的轻量多模态智能体模型,支持文本对话与图像输入理解。</p><ul class="mt-3 list-disc space-y-1.5 pl-6 text-slate-700"><li>轻量高效,兼顾效果、成本与落地性</li><li>原生多模态架构,支持图像输入理解(OCR、图表解读等)</li><li>上下文长度 256K tokens(最大输入 252K,最大输出 64K)</li></ul><p class="mt-4 text-slate-700"><strong class="font-semibold text-slate-900">MODEL ID:</strong> <code class="rounded bg-slate-100 px-1 font-mono text-xs">sensenova-6.7-flash-lite</code></p></section>
|
||||
<section id="model-u1" class="scroll-mt-doc-anchor pt-10"><h2 class="text-2xl font-bold text-slate-900">SenseNova U1 Fast</h2><p class="mt-3 text-slate-600">SenseNova U1 Fast 基于 SenseNova U1 的加速版本,专供信息图(Infographics)生成场景。</p><p class="mt-3 text-slate-700"><strong class="font-semibold text-slate-900">MODEL ID:</strong> <code class="rounded bg-slate-100 px-1 font-mono text-xs">sensenova-u1-fast</code></p><p class="mt-3 text-slate-700"><strong class="font-semibold text-slate-900">注意:</strong> U1 Fast 使用独立的图像生成接口 POST /v1/images/generations,不是 Chat Completions;不支持图像输入。</p></section>
|
||||
<section id="model-deepseek-v4-flash" class="scroll-mt-doc-anchor pt-10"><h2 class="text-2xl font-bold text-slate-900">DeepSeek V4 Flash</h2><p class="mt-3 text-slate-600">DeepSeek 高性能对话模型,支持思考模式与非思考模式,上下文长度 256K tokens,最大输出 64K tokens,内置 JSON Output、Tool Calls等功能。</p><p class="mt-3 text-slate-700"><strong class="font-semibold text-slate-900">MODEL ID:</strong> <code class="rounded bg-slate-100 px-1 font-mono text-xs">deepseek-v4-flash</code></p></section>
|
||||
<div class="relative rounded-xl border border-slate-700/70 bg-slate-900 mt-3 w-full"><div class="codeblock-scroll max-h-[350px] overflow-auto"><div><pre class="m-0 overflow-x-auto whitespace-pre font-mono text-[0.8125rem] leading-relaxed text-slate-100 px-4 py-3"><code>GET https://token.sensenova.cn/v1/models</code></pre></div></div></div>
|
||||
<pre><code>{
|
||||
"data": [
|
||||
{
|
||||
"id": "sensenova-6.7-flash-lite",
|
||||
"name": "sensenova-6.7-flash-lite",
|
||||
"created": 1777392000,
|
||||
"input_modalities": ["text", "image"],
|
||||
"output_modalities": ["text"],
|
||||
"quantization": "fp8",
|
||||
"context_length": 262144,
|
||||
"max_output_length": 65536,
|
||||
"pricing": {
|
||||
"prompt": "0",
|
||||
"completion": "0",
|
||||
"image": "0",
|
||||
"request": "0",
|
||||
"input_cache_read": "0"
|
||||
}
|
||||
}
|
||||
]
|
||||
}</code></pre>
|
||||
===SENSENOVA_MODELS_BUNDLE===
|
||||
{"subtitle":"兼容 OpenAI 接口,按量透明计费,公测期内免费开放","freeDesc":"公测期完全免费开放","promoLine2":",所有模型完全开放"}
|
||||
1
scripts/testdata/xfyun_pricing_sample.html
vendored
Normal file
1
scripts/testdata/xfyun_pricing_sample.html
vendored
Normal file
@@ -0,0 +1 @@
|
||||
<div class="apiprice_priceCards__3gD4l"><div class="apiprice_priceCardItem__gMfGL"><div class="apiprice_cardTitle__NBzoB">X2/X1.5模型</div><div class="apiprice_cardPrice__UdXTV"><span>2</span><span class="apiprice_unit__DWiuh">元/百万tokens</span></div></div><div class="apiprice_priceCardItem__gMfGL"><div class="apiprice_cardTitle__NBzoB">Ultra模型</div><div class="apiprice_cardPrice__UdXTV"><span>0.8</span><span class="apiprice_unit__DWiuh">元/百万tokens</span></div></div><div class="apiprice_priceCardItem__gMfGL"><div class="apiprice_cardTitle__NBzoB">Pro模型</div><div class="apiprice_cardPrice__UdXTV"><span>5</span><span class="apiprice_unit__DWiuh">元/百万tokens</span></div></div><div class="apiprice_priceCardItem__gMfGL"><div class="apiprice_cardTitle__NBzoB">Lite模型</div><div class="apiprice_cardPrice__UdXTV"><span>0</span><span class="apiprice_unit__DWiuh">元/百万tokens</span></div></div></div>
|
||||
Reference in New Issue
Block a user