- ctyun_subscription_lib.go: extend CTYun subscription data extraction - import_ctyun_subscription_test.go: update tests for CTYun - ctyun_token_plan_sample.txt: updated test fixture
410 lines
14 KiB
Go
410 lines
14 KiB
Go
//go:build llm_script
|
||
|
||
package main
|
||
|
||
import (
|
||
"fmt"
|
||
"regexp"
|
||
"strings"
|
||
)
|
||
|
||
const (
|
||
defaultCTYunCodingPlanURL = "https://www.ctyun.cn/document/11061839/11092368"
|
||
defaultCTYunTokenPlanURL = "https://www.ctyun.cn/act/AI/zhuanxiang"
|
||
)
|
||
|
||
func parseCTYunSubscriptionCatalog(codingRaw string, tokenRaw string) ([]subscriptionImportRecord, error) {
|
||
publishedAt, known := publishedAtFromText(firstNonEmptyText(codingRaw, tokenRaw))
|
||
codingRecords, err := parseCTYunCodingPlan(codingRaw, publishedAt)
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
tokenRecords, err := parseCTYunTokenPlan(tokenRaw, publishedAt)
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
records := append(codingRecords, tokenRecords...)
|
||
for i := range records {
|
||
records[i].PublishedAtKnown = known
|
||
}
|
||
return records, nil
|
||
}
|
||
|
||
func parseCTYunCodingPlan(raw string, publishedAt string) ([]subscriptionImportRecord, error) {
|
||
if !strings.Contains(raw, "GLM Lite") || !strings.Contains(raw, "GLM Max") {
|
||
return nil, fmt.Errorf("ctyun coding plan tiers not found")
|
||
}
|
||
pricePattern := regexp.MustCompile(`包月价格\s+(\d+)元/月\s+(\d+)元/月\s+(\d+)元/月`)
|
||
priceMatch := pricePattern.FindStringSubmatch(raw)
|
||
if len(priceMatch) != 4 {
|
||
return nil, fmt.Errorf("ctyun coding plan monthly prices not found")
|
||
}
|
||
limitPattern := regexp.MustCompile(`每月最多约([\d,]+)次prompts`)
|
||
limitMatches := limitPattern.FindAllStringSubmatch(raw, -1)
|
||
if len(limitMatches) < 3 {
|
||
return nil, fmt.Errorf("ctyun coding plan monthly limits not found")
|
||
}
|
||
modelScope := extractCTYunCodingModels(raw)
|
||
|
||
records := []subscriptionImportRecord{
|
||
{
|
||
ProviderName: "Telecom",
|
||
ProviderNameCn: "中国电信",
|
||
ProviderCountry: "CN",
|
||
ProviderWebsite: "https://www.ctyun.cn",
|
||
OperatorName: "CTYun",
|
||
OperatorNameCn: "天翼云",
|
||
OperatorCountry: "CN",
|
||
OperatorWebsite: "https://www.ctyun.cn",
|
||
OperatorType: "cloud",
|
||
PlanFamily: "coding_plan",
|
||
PlanCode: "ctyun-coding-plan-lite-monthly",
|
||
PlanName: "天翼云 Coding Plan Lite(月付)",
|
||
Tier: "Lite",
|
||
BillingCycle: "monthly",
|
||
Currency: "CNY",
|
||
ListPrice: mustParseSubscriptionPrice(priceMatch[1]),
|
||
PriceUnit: "CNY/month",
|
||
QuotaValue: mustParseSubscriptionInt64(limitMatches[0][1]),
|
||
QuotaUnit: "prompts/month",
|
||
PlanScope: "Coding Plan",
|
||
ModelScope: modelScope,
|
||
SourceURL: defaultCTYunCodingPlanURL,
|
||
PublishedAt: publishedAt,
|
||
EffectiveDate: effectiveDateFromPublishedAt(publishedAt),
|
||
Notes: "每 5 小时约 80 次 prompts;每周约 400 次 prompts。",
|
||
},
|
||
{
|
||
ProviderName: "Telecom",
|
||
ProviderNameCn: "中国电信",
|
||
ProviderCountry: "CN",
|
||
ProviderWebsite: "https://www.ctyun.cn",
|
||
OperatorName: "CTYun",
|
||
OperatorNameCn: "天翼云",
|
||
OperatorCountry: "CN",
|
||
OperatorWebsite: "https://www.ctyun.cn",
|
||
OperatorType: "cloud",
|
||
PlanFamily: "coding_plan",
|
||
PlanCode: "ctyun-coding-plan-pro-monthly",
|
||
PlanName: "天翼云 Coding Plan Pro(月付)",
|
||
Tier: "Pro",
|
||
BillingCycle: "monthly",
|
||
Currency: "CNY",
|
||
ListPrice: mustParseSubscriptionPrice(priceMatch[2]),
|
||
PriceUnit: "CNY/month",
|
||
QuotaValue: mustParseSubscriptionInt64(limitMatches[1][1]),
|
||
QuotaUnit: "prompts/month",
|
||
PlanScope: "Coding Plan",
|
||
ModelScope: modelScope,
|
||
SourceURL: defaultCTYunCodingPlanURL,
|
||
PublishedAt: publishedAt,
|
||
EffectiveDate: effectiveDateFromPublishedAt(publishedAt),
|
||
Notes: "每 5 小时约 400 次 prompts;每周约 2,000 次 prompts。",
|
||
},
|
||
{
|
||
ProviderName: "Telecom",
|
||
ProviderNameCn: "中国电信",
|
||
ProviderCountry: "CN",
|
||
ProviderWebsite: "https://www.ctyun.cn",
|
||
OperatorName: "CTYun",
|
||
OperatorNameCn: "天翼云",
|
||
OperatorCountry: "CN",
|
||
OperatorWebsite: "https://www.ctyun.cn",
|
||
OperatorType: "cloud",
|
||
PlanFamily: "coding_plan",
|
||
PlanCode: "ctyun-coding-plan-max-monthly",
|
||
PlanName: "天翼云 Coding Plan Max(月付)",
|
||
Tier: "Max",
|
||
BillingCycle: "monthly",
|
||
Currency: "CNY",
|
||
ListPrice: mustParseSubscriptionPrice(priceMatch[3]),
|
||
PriceUnit: "CNY/month",
|
||
QuotaValue: mustParseSubscriptionInt64(limitMatches[2][1]),
|
||
QuotaUnit: "prompts/month",
|
||
PlanScope: "Coding Plan",
|
||
ModelScope: modelScope,
|
||
SourceURL: defaultCTYunCodingPlanURL,
|
||
PublishedAt: publishedAt,
|
||
EffectiveDate: effectiveDateFromPublishedAt(publishedAt),
|
||
Notes: "每 5 小时约 1,600 次 prompts;每周约 8,000 次 prompts。",
|
||
},
|
||
}
|
||
return records, nil
|
||
}
|
||
|
||
func parseCTYunTokenPlan(raw string, publishedAt string) ([]subscriptionImportRecord, error) {
|
||
if records, ok := parseCTYunTokenPlanNormalizedLayout(raw, publishedAt); ok {
|
||
return records, nil
|
||
}
|
||
if records, ok := parseCTYunTokenPlanCardLayout(raw, publishedAt); ok {
|
||
return records, nil
|
||
}
|
||
return parseCTYunTokenPlanLegacyLayout(raw, publishedAt)
|
||
}
|
||
|
||
func parseCTYunTokenPlanNormalizedLayout(raw string, publishedAt string) ([]subscriptionImportRecord, bool) {
|
||
lines := strings.Split(raw, "\n")
|
||
codeByTier := map[string]string{
|
||
"基础版": "basic",
|
||
"专业版": "pro",
|
||
"旗舰版": "flagship",
|
||
"轻享版": "starter",
|
||
"畅享版": "plus",
|
||
"尊享版": "vip",
|
||
}
|
||
|
||
records := make([]subscriptionImportRecord, 0, 6)
|
||
for i := 0; i < len(lines); i++ {
|
||
line := strings.TrimSpace(lines[i])
|
||
if !strings.HasPrefix(line, "Token Plan") {
|
||
continue
|
||
}
|
||
rawTier := strings.TrimSpace(strings.TrimPrefix(line, "Token Plan"))
|
||
tierCode, ok := codeByTier[rawTier]
|
||
if !ok {
|
||
continue
|
||
}
|
||
|
||
j := i + 1
|
||
block := make([]string, 0, 12)
|
||
for ; j < len(lines); j++ {
|
||
next := strings.TrimSpace(lines[j])
|
||
if strings.HasPrefix(next, "Token Plan") {
|
||
break
|
||
}
|
||
if next != "" {
|
||
block = append(block, next)
|
||
}
|
||
}
|
||
model := ""
|
||
quota := ""
|
||
price := ""
|
||
notesParts := make([]string, 0, 4)
|
||
for k := 0; k < len(block); k++ {
|
||
item := block[k]
|
||
switch {
|
||
case strings.HasPrefix(item, "支持模型:"):
|
||
model = strings.TrimSpace(strings.TrimPrefix(item, "支持模型:"))
|
||
case strings.Contains(item, "Tokens"):
|
||
quota = strings.TrimSpace(strings.TrimSuffix(item, "Tokens"))
|
||
case regexp.MustCompile(`^[0-9]+$`).MatchString(item) && k+2 < len(block) && regexp.MustCompile(`^\.[0-9]+$`).MatchString(block[k+1]) && block[k+2] == "元/个/月":
|
||
price = item + block[k+1]
|
||
case item == "产品优势", item == "立即订购", strings.HasPrefix(item, "支持工具:"), strings.HasPrefix(item, "已抢购"), strings.HasSuffix(item, "用户"), item == "展开更多", item == "免费领取Tokens":
|
||
continue
|
||
default:
|
||
notesParts = append(notesParts, item)
|
||
}
|
||
}
|
||
if model == "" || quota == "" || price == "" {
|
||
continue
|
||
}
|
||
notes := "天翼云大模型 AI 专项活动页套餐。"
|
||
if len(notesParts) > 0 {
|
||
notes = strings.Join(notesParts, ";")
|
||
}
|
||
records = append(records, subscriptionImportRecord{
|
||
ProviderName: "Telecom",
|
||
ProviderNameCn: "中国电信",
|
||
ProviderCountry: "CN",
|
||
ProviderWebsite: "https://www.ctyun.cn",
|
||
OperatorName: "CTYun",
|
||
OperatorNameCn: "天翼云",
|
||
OperatorCountry: "CN",
|
||
OperatorWebsite: "https://www.ctyun.cn",
|
||
OperatorType: "cloud",
|
||
PlanFamily: "token_plan",
|
||
PlanCode: "ctyun-token-plan-" + tierCode,
|
||
PlanName: "天翼云 Token Plan " + rawTier,
|
||
Tier: rawTier,
|
||
BillingCycle: "monthly",
|
||
Currency: "CNY",
|
||
ListPrice: mustParseSubscriptionPrice(price),
|
||
PriceUnit: "CNY/month",
|
||
QuotaValue: parseChineseTokenQuota(quota),
|
||
QuotaUnit: "tokens/month",
|
||
PlanScope: "Token Plan",
|
||
ModelScope: []string{model},
|
||
SourceURL: defaultCTYunTokenPlanURL,
|
||
PublishedAt: publishedAt,
|
||
EffectiveDate: effectiveDateFromPublishedAt(publishedAt),
|
||
Notes: notes,
|
||
})
|
||
i = j - 1
|
||
}
|
||
if len(records) == 0 {
|
||
return nil, false
|
||
}
|
||
return records, true
|
||
}
|
||
|
||
func parseCTYunTokenPlanCardLayout(raw string, publishedAt string) ([]subscriptionImportRecord, bool) {
|
||
cardPattern := regexp.MustCompile(`(?s)<span title="(Token Plan [^"]+)" class="card-header-title-text".*?</span>(.*?)<div class="card-btns-wrap"`)
|
||
cards := cardPattern.FindAllStringSubmatch(raw, -1)
|
||
if len(cards) == 0 {
|
||
return nil, false
|
||
}
|
||
|
||
codeByTier := map[string]string{
|
||
"基础版": "basic",
|
||
"专业版": "pro",
|
||
"旗舰版": "flagship",
|
||
}
|
||
|
||
records := make([]subscriptionImportRecord, 0, len(cards))
|
||
for _, card := range cards {
|
||
title := strings.TrimSpace(card[1])
|
||
body := card[2]
|
||
rawTier := strings.TrimSpace(strings.TrimPrefix(title, "Token Plan "))
|
||
tierCode, ok := codeByTier[rawTier]
|
||
if !ok {
|
||
return nil, false
|
||
}
|
||
|
||
modelMatch := regexp.MustCompile(`支持模型:([^<]+)</span>`).FindStringSubmatch(body)
|
||
if len(modelMatch) != 2 {
|
||
return nil, false
|
||
}
|
||
quotaMatch := regexp.MustCompile(`([0-9]+(?:\.[0-9]+)?亿|[0-9]+万)Tokens`).FindStringSubmatch(body)
|
||
if len(quotaMatch) != 2 {
|
||
return nil, false
|
||
}
|
||
priceMatch := regexp.MustCompile(`<span class="price-new-big"[^>]*>\s*([0-9]+)\s*</span>\s*<span class="price-new-big"[^>]*>\s*\.([0-9]+)\s*</span>\s*<span class="price-new-unit"[^>]*>元/个/月</span>`).FindStringSubmatch(body)
|
||
if len(priceMatch) != 3 {
|
||
return nil, false
|
||
}
|
||
notes := "天翼云大模型 AI 专项活动页套餐。"
|
||
if featureLines := regexp.MustCompile(`card-content-gou-content"[^>]*>([^<]+)</span>`).FindAllStringSubmatch(body, -1); len(featureLines) > 0 {
|
||
parts := make([]string, 0, len(featureLines))
|
||
for _, line := range featureLines {
|
||
text := strings.TrimSpace(line[1])
|
||
if text == "" || strings.HasPrefix(text, "支持模型:") || strings.Contains(text, "Tokens") {
|
||
continue
|
||
}
|
||
parts = append(parts, text)
|
||
}
|
||
if len(parts) > 0 {
|
||
notes = strings.Join(parts, ";")
|
||
}
|
||
}
|
||
|
||
records = append(records, subscriptionImportRecord{
|
||
ProviderName: "Telecom",
|
||
ProviderNameCn: "中国电信",
|
||
ProviderCountry: "CN",
|
||
ProviderWebsite: "https://www.ctyun.cn",
|
||
OperatorName: "CTYun",
|
||
OperatorNameCn: "天翼云",
|
||
OperatorCountry: "CN",
|
||
OperatorWebsite: "https://www.ctyun.cn",
|
||
OperatorType: "cloud",
|
||
PlanFamily: "token_plan",
|
||
PlanCode: "ctyun-token-plan-" + tierCode,
|
||
PlanName: "天翼云 " + title,
|
||
Tier: rawTier,
|
||
BillingCycle: "monthly",
|
||
Currency: "CNY",
|
||
ListPrice: mustParseSubscriptionPrice(priceMatch[1] + "." + priceMatch[2]),
|
||
PriceUnit: "CNY/month",
|
||
QuotaValue: parseChineseTokenQuota(quotaMatch[1]),
|
||
QuotaUnit: "tokens/month",
|
||
PlanScope: "Token Plan",
|
||
ModelScope: []string{strings.TrimSpace(modelMatch[1])},
|
||
SourceURL: defaultCTYunTokenPlanURL,
|
||
PublishedAt: publishedAt,
|
||
EffectiveDate: effectiveDateFromPublishedAt(publishedAt),
|
||
Notes: notes,
|
||
})
|
||
}
|
||
return records, true
|
||
}
|
||
|
||
func parseCTYunTokenPlanLegacyLayout(raw string, publishedAt string) ([]subscriptionImportRecord, error) {
|
||
pattern := regexp.MustCompile(`Token Plan ([^\n]+?)(\d+(?:\.\d+)?亿|\d+万)Tokens包[\s\S]*?支持模型:([^\n]+)[\s\S]*?(\d+\s*\.\s*\d+)\s*元/个`)
|
||
matches := pattern.FindAllStringSubmatch(raw, -1)
|
||
if len(matches) == 0 {
|
||
return nil, fmt.Errorf("unexpected ctyun token plan count: 0")
|
||
}
|
||
|
||
codeByTier := map[string]string{
|
||
"Lite": "lite",
|
||
"Pro": "pro",
|
||
"Max": "max",
|
||
"轻享包": "starter",
|
||
"畅享包": "plus",
|
||
"尊享包": "vip",
|
||
}
|
||
|
||
records := make([]subscriptionImportRecord, 0, len(matches))
|
||
for _, match := range matches {
|
||
rawTier := strings.TrimSpace(match[1])
|
||
tierCode, ok := codeByTier[rawTier]
|
||
if !ok {
|
||
return nil, fmt.Errorf("unexpected ctyun token plan tier: %s", rawTier)
|
||
}
|
||
quotaValue := parseChineseTokenQuota(match[2])
|
||
price := mustParseSubscriptionPrice(strings.ReplaceAll(match[4], " ", ""))
|
||
records = append(records, subscriptionImportRecord{
|
||
ProviderName: "Telecom",
|
||
ProviderNameCn: "中国电信",
|
||
ProviderCountry: "CN",
|
||
ProviderWebsite: "https://www.ctyun.cn",
|
||
OperatorName: "CTYun",
|
||
OperatorNameCn: "天翼云",
|
||
OperatorCountry: "CN",
|
||
OperatorWebsite: "https://www.ctyun.cn",
|
||
OperatorType: "cloud",
|
||
PlanFamily: "token_plan",
|
||
PlanCode: "ctyun-token-plan-" + tierCode,
|
||
PlanName: "天翼云 Token Plan " + rawTier,
|
||
Tier: rawTier,
|
||
BillingCycle: "monthly",
|
||
Currency: "CNY",
|
||
ListPrice: price,
|
||
PriceUnit: "CNY/pack",
|
||
QuotaValue: quotaValue,
|
||
QuotaUnit: "tokens/pack",
|
||
PlanScope: "Token Plan",
|
||
ModelScope: []string{strings.TrimSpace(match[3])},
|
||
SourceURL: defaultCTYunTokenPlanURL,
|
||
PublishedAt: publishedAt,
|
||
EffectiveDate: effectiveDateFromPublishedAt(publishedAt),
|
||
Notes: "天翼云大模型 AI 专项活动页套餐。",
|
||
})
|
||
}
|
||
return records, nil
|
||
}
|
||
|
||
func parseChineseTokenQuota(raw string) int64 {
|
||
cleaned := strings.TrimSpace(strings.TrimSuffix(raw, "Tokens包"))
|
||
cleaned = strings.ReplaceAll(cleaned, " ", "")
|
||
switch {
|
||
case strings.Contains(cleaned, "亿"):
|
||
return parseDecimalMultiplier(strings.TrimSuffix(cleaned, "亿"), 100000000)
|
||
case strings.Contains(cleaned, "万"):
|
||
return parseDecimalMultiplier(strings.TrimSuffix(cleaned, "万"), 10000)
|
||
default:
|
||
return mustParseSubscriptionInt64(cleaned)
|
||
}
|
||
}
|
||
|
||
func extractCTYunCodingModels(raw string) []string {
|
||
lines := strings.Split(raw, "\n")
|
||
models := make([]string, 0, 8)
|
||
capturing := false
|
||
for _, line := range lines {
|
||
line = strings.TrimSpace(line)
|
||
switch {
|
||
case line == "支持模型":
|
||
capturing = true
|
||
continue
|
||
case line == "用量限制":
|
||
return models
|
||
case !capturing || line == "":
|
||
continue
|
||
default:
|
||
models = append(models, line)
|
||
}
|
||
}
|
||
return models
|
||
}
|