321 lines
9.6 KiB
Go
321 lines
9.6 KiB
Go
|
|
//go:build llm_script
|
||
|
|
|
||
|
|
package main
|
||
|
|
|
||
|
|
import (
|
||
|
|
"database/sql"
|
||
|
|
"flag"
|
||
|
|
"fmt"
|
||
|
|
"html"
|
||
|
|
"io"
|
||
|
|
"net/http"
|
||
|
|
"os"
|
||
|
|
"regexp"
|
||
|
|
"strings"
|
||
|
|
"time"
|
||
|
|
)
|
||
|
|
|
||
|
|
const defaultCUCloudPricingURL = "https://support.cucloud.cn/document/127/591/2357.html?id=2357&folderid=3236"
|
||
|
|
|
||
|
|
type cucloudPricingImportConfig struct {
|
||
|
|
URL string
|
||
|
|
Fixture string
|
||
|
|
DryRun bool
|
||
|
|
Timeout time.Duration
|
||
|
|
}
|
||
|
|
|
||
|
|
type cucloudPricingSummary struct {
|
||
|
|
Models int
|
||
|
|
Records int
|
||
|
|
Regions int
|
||
|
|
PaygModeConfirmed bool
|
||
|
|
PaygPriceTablePublic bool
|
||
|
|
}
|
||
|
|
|
||
|
|
var cucloudRequiredModels = []string{"DeepSeek-V4-Pro", "DeepSeek-V4-Flash", "MiniMax-M2.5"}
|
||
|
|
|
||
|
|
func main() {
|
||
|
|
loadSubscriptionImportEnv()
|
||
|
|
|
||
|
|
var url string
|
||
|
|
var fixture string
|
||
|
|
var dryRun bool
|
||
|
|
var timeoutSeconds int
|
||
|
|
|
||
|
|
flag.StringVar(&url, "url", defaultCUCloudPricingURL, "联通云 AISP Token Plan 页面")
|
||
|
|
flag.StringVar(&fixture, "fixture", "", "联通云价格样例文件")
|
||
|
|
flag.BoolVar(&dryRun, "dry-run", false, "仅解析并打印摘要,不写入数据库")
|
||
|
|
flag.IntVar(&timeoutSeconds, "timeout", 20, "请求超时(秒)")
|
||
|
|
flag.Parse()
|
||
|
|
|
||
|
|
cfg := cucloudPricingImportConfig{URL: url, Fixture: fixture, DryRun: dryRun, Timeout: time.Duration(timeoutSeconds) * time.Second}
|
||
|
|
|
||
|
|
var db *sql.DB
|
||
|
|
var err error
|
||
|
|
if !cfg.DryRun {
|
||
|
|
db, err = subscriptionImportDB()
|
||
|
|
if err != nil {
|
||
|
|
fmt.Fprintf(os.Stderr, "open db: %v\n", err)
|
||
|
|
os.Exit(1)
|
||
|
|
}
|
||
|
|
defer db.Close()
|
||
|
|
}
|
||
|
|
|
||
|
|
if err := runCUCloudPricingImport(cfg, db, os.Stdout); err != nil {
|
||
|
|
fmt.Fprintf(os.Stderr, "import_cucloud_pricing: %v\n", err)
|
||
|
|
os.Exit(1)
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
func runCUCloudPricingImport(cfg cucloudPricingImportConfig, db *sql.DB, out io.Writer) error {
|
||
|
|
client := &http.Client{Timeout: cfg.Timeout}
|
||
|
|
raw, err := fetchRawPricingPage(cfg.URL, cfg.Fixture, client)
|
||
|
|
if err != nil {
|
||
|
|
return err
|
||
|
|
}
|
||
|
|
records, summary, err := parseCUCloudPricingCatalog(raw, cfg.URL)
|
||
|
|
if err != nil {
|
||
|
|
return err
|
||
|
|
}
|
||
|
|
records = dedupeOfficialPricingRecords(records)
|
||
|
|
if cfg.DryRun {
|
||
|
|
_, err = fmt.Fprintf(out, "source=cucloud-pricing-import models=%d records=%d regions=%d operator=%s payg_mode_confirmed=%t payg_price_table_public=%t dry_run=true\n",
|
||
|
|
summary.Models, summary.Records, summary.Regions, records[0].OperatorName, summary.PaygModeConfirmed, summary.PaygPriceTablePublic)
|
||
|
|
return err
|
||
|
|
}
|
||
|
|
if db == nil {
|
||
|
|
return fmt.Errorf("db is required when dry-run=false")
|
||
|
|
}
|
||
|
|
if err := upsertOfficialPricingRecords(db, records, "cucloud-pricing-import"); err != nil {
|
||
|
|
return err
|
||
|
|
}
|
||
|
|
var tableRows int
|
||
|
|
if err := db.QueryRow(`SELECT COUNT(*) FROM region_pricing`).Scan(&tableRows); err != nil {
|
||
|
|
return fmt.Errorf("count region_pricing: %w", err)
|
||
|
|
}
|
||
|
|
_, err = fmt.Fprintf(out, "source=cucloud-pricing-import models=%d records=%d regions=%d operator=%s table_rows=%d payg_mode_confirmed=%t payg_price_table_public=%t dry_run=false\n",
|
||
|
|
summary.Models, summary.Records, summary.Regions, records[0].OperatorName, tableRows, summary.PaygModeConfirmed, summary.PaygPriceTablePublic)
|
||
|
|
return err
|
||
|
|
}
|
||
|
|
|
||
|
|
func parseCUCloudPricingCatalog(raw string, sourceURL string) ([]officialPricingRecord, cucloudPricingSummary, error) {
|
||
|
|
normalized := normalizeCUCloudRaw(raw)
|
||
|
|
priceMap, err := extractCUCloudBlendedPrices(normalized)
|
||
|
|
if err != nil {
|
||
|
|
return nil, cucloudPricingSummary{}, err
|
||
|
|
}
|
||
|
|
regionMap, err := extractCUCloudRegionSupport(normalized)
|
||
|
|
if err != nil {
|
||
|
|
return nil, cucloudPricingSummary{}, err
|
||
|
|
}
|
||
|
|
|
||
|
|
records := make([]officialPricingRecord, 0)
|
||
|
|
modelSet := make(map[string]struct{})
|
||
|
|
regionSet := make(map[string]struct{})
|
||
|
|
for _, modelName := range cucloudRequiredModels {
|
||
|
|
price, ok := priceMap[modelName]
|
||
|
|
if !ok {
|
||
|
|
return nil, cucloudPricingSummary{}, fmt.Errorf("missing blended price for %s", modelName)
|
||
|
|
}
|
||
|
|
regions := regionMap[modelName]
|
||
|
|
if len(regions) == 0 {
|
||
|
|
return nil, cucloudPricingSummary{}, fmt.Errorf("missing supported regions for %s", modelName)
|
||
|
|
}
|
||
|
|
providerName := cucloudProviderName(modelName)
|
||
|
|
providerNameCn, providerCountry, providerWebsite := providerMetadata(providerName)
|
||
|
|
for _, region := range regions {
|
||
|
|
records = append(records, officialPricingRecord{
|
||
|
|
ModelID: normalizeExternalID("cucloud", "aisp", modelName),
|
||
|
|
ModelName: modelName,
|
||
|
|
ProviderName: providerName,
|
||
|
|
ProviderNameCn: providerNameCn,
|
||
|
|
ProviderCountry: providerCountry,
|
||
|
|
ProviderWebsite: providerWebsite,
|
||
|
|
OperatorName: "Unicom AISP",
|
||
|
|
OperatorNameCn: "联通云 AI服务平台AISP",
|
||
|
|
OperatorCountry: "CN",
|
||
|
|
OperatorWebsite: "https://www.cucloud.cn",
|
||
|
|
OperatorType: "official",
|
||
|
|
Region: region,
|
||
|
|
Currency: "CNY",
|
||
|
|
InputPrice: price,
|
||
|
|
OutputPrice: price,
|
||
|
|
SourceURL: sourceURL,
|
||
|
|
ModelSourceURL: sourceURL,
|
||
|
|
DateConfidence: "unknown",
|
||
|
|
DateSourceKind: "official_pricing",
|
||
|
|
Modality: detectModality(modelName),
|
||
|
|
})
|
||
|
|
regionSet[region] = struct{}{}
|
||
|
|
}
|
||
|
|
modelSet[modelName] = struct{}{}
|
||
|
|
}
|
||
|
|
if len(records) == 0 {
|
||
|
|
return nil, cucloudPricingSummary{}, fmt.Errorf("no cucloud pricing records found")
|
||
|
|
}
|
||
|
|
summary := cucloudPricingSummary{
|
||
|
|
Models: len(modelSet),
|
||
|
|
Records: len(records),
|
||
|
|
Regions: len(regionSet),
|
||
|
|
PaygModeConfirmed: cucloudPaygModeConfirmed(normalized),
|
||
|
|
PaygPriceTablePublic: cucloudHasPublicPaygPriceTable(normalized),
|
||
|
|
}
|
||
|
|
return records, summary, nil
|
||
|
|
}
|
||
|
|
|
||
|
|
func normalizeCUCloudRaw(raw string) string {
|
||
|
|
raw = strings.ReplaceAll(raw, `\u003c`, "<")
|
||
|
|
raw = strings.ReplaceAll(raw, `\u003e`, ">")
|
||
|
|
raw = strings.ReplaceAll(raw, `\u0026nbsp;`, " ")
|
||
|
|
raw = strings.ReplaceAll(raw, `\n`, "\n")
|
||
|
|
raw = strings.ReplaceAll(raw, `\t`, " ")
|
||
|
|
raw = strings.ReplaceAll(raw, `\r`, "\n")
|
||
|
|
raw = html.UnescapeString(raw)
|
||
|
|
return raw
|
||
|
|
}
|
||
|
|
|
||
|
|
func extractCUCloudBlendedPrices(raw string) (map[string]float64, error) {
|
||
|
|
for _, table := range cucloudTableBlocks(raw) {
|
||
|
|
rows := cucloudTableRows(table)
|
||
|
|
if len(rows) == 0 {
|
||
|
|
continue
|
||
|
|
}
|
||
|
|
prices := make(map[string]float64)
|
||
|
|
for _, cell := range rows[0] {
|
||
|
|
modelName, price, ok := cucloudBlendedPriceCell(cell)
|
||
|
|
if ok {
|
||
|
|
prices[modelName] = price
|
||
|
|
}
|
||
|
|
}
|
||
|
|
if cucloudHasAllRequiredModels(prices) {
|
||
|
|
return prices, nil
|
||
|
|
}
|
||
|
|
}
|
||
|
|
return nil, fmt.Errorf("unexpected cucloud blended price table")
|
||
|
|
}
|
||
|
|
|
||
|
|
func cucloudBlendedPriceCell(raw string) (string, float64, bool) {
|
||
|
|
cleaned := strings.TrimSpace(cleanHTMLText(raw))
|
||
|
|
match := regexp.MustCompile(`^(.*?)\s*综合单价\s*([0-9]+(?:\.[0-9]+)?)元/百万tokens$`).FindStringSubmatch(cleaned)
|
||
|
|
if len(match) != 3 {
|
||
|
|
return "", 0, false
|
||
|
|
}
|
||
|
|
modelName := strings.TrimSpace(match[1])
|
||
|
|
if modelName == "" {
|
||
|
|
return "", 0, false
|
||
|
|
}
|
||
|
|
return modelName, mustParseSubscriptionPrice(match[2]), true
|
||
|
|
}
|
||
|
|
|
||
|
|
func extractCUCloudRegionSupport(raw string) (map[string][]string, error) {
|
||
|
|
for _, table := range cucloudTableBlocks(raw) {
|
||
|
|
rows := cucloudTableRows(table)
|
||
|
|
if len(rows) < 2 {
|
||
|
|
continue
|
||
|
|
}
|
||
|
|
headers := rows[0]
|
||
|
|
if len(headers) < 2 || strings.TrimSpace(headers[0]) != "模型" {
|
||
|
|
continue
|
||
|
|
}
|
||
|
|
if !strings.Contains(strings.Join(headers, "|"), "贵阳基地二区") {
|
||
|
|
continue
|
||
|
|
}
|
||
|
|
regionMap := make(map[string][]string)
|
||
|
|
regions := headers[1:]
|
||
|
|
for _, row := range rows[1:] {
|
||
|
|
if len(row) < len(regions)+1 {
|
||
|
|
continue
|
||
|
|
}
|
||
|
|
modelName := strings.TrimSpace(row[0])
|
||
|
|
if modelName == "" {
|
||
|
|
continue
|
||
|
|
}
|
||
|
|
supported := make([]string, 0)
|
||
|
|
for idx, region := range regions {
|
||
|
|
if strings.Contains(strings.TrimSpace(row[idx+1]), "支持") {
|
||
|
|
supported = append(supported, strings.TrimSpace(region))
|
||
|
|
}
|
||
|
|
}
|
||
|
|
if len(supported) > 0 {
|
||
|
|
regionMap[modelName] = supported
|
||
|
|
}
|
||
|
|
}
|
||
|
|
if cucloudHasAllRequiredRegionRows(regionMap) {
|
||
|
|
return regionMap, nil
|
||
|
|
}
|
||
|
|
}
|
||
|
|
return nil, fmt.Errorf("unexpected cucloud region support table")
|
||
|
|
}
|
||
|
|
|
||
|
|
func cucloudTableBlocks(raw string) []string {
|
||
|
|
pattern := regexp.MustCompile(`(?is)<table[^>]*>.*?</table>`)
|
||
|
|
return pattern.FindAllString(raw, -1)
|
||
|
|
}
|
||
|
|
|
||
|
|
func cucloudTableRows(table string) [][]string {
|
||
|
|
rowPattern := regexp.MustCompile(`(?is)<tr[^>]*>(.*?)</tr>`)
|
||
|
|
cellPattern := regexp.MustCompile(`(?is)<t[dh][^>]*>(.*?)</t[dh]>`)
|
||
|
|
matches := rowPattern.FindAllStringSubmatch(table, -1)
|
||
|
|
rows := make([][]string, 0, len(matches))
|
||
|
|
for _, rowMatch := range matches {
|
||
|
|
cells := cellPattern.FindAllStringSubmatch(rowMatch[1], -1)
|
||
|
|
if len(cells) == 0 {
|
||
|
|
continue
|
||
|
|
}
|
||
|
|
row := make([]string, 0, len(cells))
|
||
|
|
for _, cell := range cells {
|
||
|
|
row = append(row, strings.TrimSpace(cleanHTMLText(cell[1])))
|
||
|
|
}
|
||
|
|
rows = append(rows, row)
|
||
|
|
}
|
||
|
|
return rows
|
||
|
|
}
|
||
|
|
|
||
|
|
func cucloudPaygModeConfirmed(raw string) bool {
|
||
|
|
text := cleanHTMLText(raw)
|
||
|
|
return strings.Contains(text, "按量计费") && (strings.Contains(text, "元/千 Tokens") || strings.Contains(text, "元/千Tokens"))
|
||
|
|
}
|
||
|
|
|
||
|
|
func cucloudHasPublicPaygPriceTable(raw string) bool {
|
||
|
|
for _, table := range cucloudTableBlocks(raw) {
|
||
|
|
text := cleanHTMLText(table)
|
||
|
|
if !(strings.Contains(text, "元/千 Tokens") || strings.Contains(text, "元/千Tokens")) {
|
||
|
|
continue
|
||
|
|
}
|
||
|
|
if strings.Contains(text, "DeepSeek-V4-Pro") || strings.Contains(text, "MiniMax-M2.5") || strings.Contains(text, "DeepSeek-V4-Flash") {
|
||
|
|
return true
|
||
|
|
}
|
||
|
|
}
|
||
|
|
return false
|
||
|
|
}
|
||
|
|
|
||
|
|
func cucloudProviderName(modelName string) string {
|
||
|
|
lower := strings.ToLower(strings.TrimSpace(modelName))
|
||
|
|
switch {
|
||
|
|
case strings.HasPrefix(lower, "deepseek"):
|
||
|
|
return "DeepSeek"
|
||
|
|
case strings.HasPrefix(lower, "minimax"):
|
||
|
|
return "MiniMax"
|
||
|
|
default:
|
||
|
|
return "unknown"
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
func cucloudHasAllRequiredModels(prices map[string]float64) bool {
|
||
|
|
for _, modelName := range cucloudRequiredModels {
|
||
|
|
if _, ok := prices[modelName]; !ok {
|
||
|
|
return false
|
||
|
|
}
|
||
|
|
}
|
||
|
|
return true
|
||
|
|
}
|
||
|
|
|
||
|
|
func cucloudHasAllRequiredRegionRows(regionMap map[string][]string) bool {
|
||
|
|
for _, modelName := range cucloudRequiredModels {
|
||
|
|
if len(regionMap[modelName]) == 0 {
|
||
|
|
return false
|
||
|
|
}
|
||
|
|
}
|
||
|
|
return true
|
||
|
|
}
|