feat(import): extend CTYun subscription collector

- ctyun_subscription_lib.go: extend CTYun subscription data extraction
- import_ctyun_subscription_test.go: update tests for CTYun
- ctyun_token_plan_sample.txt: updated test fixture
This commit is contained in:
phamnazage-jpg
2026-05-22 07:33:38 +08:00
parent 0de4402a11
commit 8d1312203f
3 changed files with 251 additions and 43 deletions

View File

@@ -133,10 +133,196 @@ func parseCTYunCodingPlan(raw string, publishedAt string) ([]subscriptionImportR
}
func parseCTYunTokenPlan(raw string, publishedAt string) ([]subscriptionImportRecord, error) {
if records, ok := parseCTYunTokenPlanNormalizedLayout(raw, publishedAt); ok {
return records, nil
}
if records, ok := parseCTYunTokenPlanCardLayout(raw, publishedAt); ok {
return records, nil
}
return parseCTYunTokenPlanLegacyLayout(raw, publishedAt)
}
func parseCTYunTokenPlanNormalizedLayout(raw string, publishedAt string) ([]subscriptionImportRecord, bool) {
lines := strings.Split(raw, "\n")
codeByTier := map[string]string{
"基础版": "basic",
"专业版": "pro",
"旗舰版": "flagship",
"轻享版": "starter",
"畅享版": "plus",
"尊享版": "vip",
}
records := make([]subscriptionImportRecord, 0, 6)
for i := 0; i < len(lines); i++ {
line := strings.TrimSpace(lines[i])
if !strings.HasPrefix(line, "Token Plan") {
continue
}
rawTier := strings.TrimSpace(strings.TrimPrefix(line, "Token Plan"))
tierCode, ok := codeByTier[rawTier]
if !ok {
continue
}
j := i + 1
block := make([]string, 0, 12)
for ; j < len(lines); j++ {
next := strings.TrimSpace(lines[j])
if strings.HasPrefix(next, "Token Plan") {
break
}
if next != "" {
block = append(block, next)
}
}
model := ""
quota := ""
price := ""
notesParts := make([]string, 0, 4)
for k := 0; k < len(block); k++ {
item := block[k]
switch {
case strings.HasPrefix(item, "支持模型:"):
model = strings.TrimSpace(strings.TrimPrefix(item, "支持模型:"))
case strings.Contains(item, "Tokens"):
quota = strings.TrimSpace(strings.TrimSuffix(item, "Tokens"))
case regexp.MustCompile(`^[0-9]+$`).MatchString(item) && k+2 < len(block) && regexp.MustCompile(`^\.[0-9]+$`).MatchString(block[k+1]) && block[k+2] == "元/个/月":
price = item + block[k+1]
case item == "产品优势", item == "立即订购", strings.HasPrefix(item, "支持工具:"), strings.HasPrefix(item, "已抢购"), strings.HasSuffix(item, "用户"), item == "展开更多", item == "免费领取Tokens":
continue
default:
notesParts = append(notesParts, item)
}
}
if model == "" || quota == "" || price == "" {
continue
}
notes := "天翼云大模型 AI 专项活动页套餐。"
if len(notesParts) > 0 {
notes = strings.Join(notesParts, "")
}
records = append(records, subscriptionImportRecord{
ProviderName: "Telecom",
ProviderNameCn: "中国电信",
ProviderCountry: "CN",
ProviderWebsite: "https://www.ctyun.cn",
OperatorName: "CTYun",
OperatorNameCn: "天翼云",
OperatorCountry: "CN",
OperatorWebsite: "https://www.ctyun.cn",
OperatorType: "cloud",
PlanFamily: "token_plan",
PlanCode: "ctyun-token-plan-" + tierCode,
PlanName: "天翼云 Token Plan " + rawTier,
Tier: rawTier,
BillingCycle: "monthly",
Currency: "CNY",
ListPrice: mustParseSubscriptionPrice(price),
PriceUnit: "CNY/month",
QuotaValue: parseChineseTokenQuota(quota),
QuotaUnit: "tokens/month",
PlanScope: "Token Plan",
ModelScope: []string{model},
SourceURL: defaultCTYunTokenPlanURL,
PublishedAt: publishedAt,
EffectiveDate: effectiveDateFromPublishedAt(publishedAt),
Notes: notes,
})
i = j - 1
}
if len(records) == 0 {
return nil, false
}
return records, true
}
func parseCTYunTokenPlanCardLayout(raw string, publishedAt string) ([]subscriptionImportRecord, bool) {
cardPattern := regexp.MustCompile(`(?s)<span title="(Token Plan [^"]+)" class="card-header-title-text".*?</span>(.*?)<div class="card-btns-wrap"`)
cards := cardPattern.FindAllStringSubmatch(raw, -1)
if len(cards) == 0 {
return nil, false
}
codeByTier := map[string]string{
"基础版": "basic",
"专业版": "pro",
"旗舰版": "flagship",
}
records := make([]subscriptionImportRecord, 0, len(cards))
for _, card := range cards {
title := strings.TrimSpace(card[1])
body := card[2]
rawTier := strings.TrimSpace(strings.TrimPrefix(title, "Token Plan "))
tierCode, ok := codeByTier[rawTier]
if !ok {
return nil, false
}
modelMatch := regexp.MustCompile(`支持模型:([^<]+)</span>`).FindStringSubmatch(body)
if len(modelMatch) != 2 {
return nil, false
}
quotaMatch := regexp.MustCompile(`([0-9]+(?:\.[0-9]+)?亿|[0-9]+万)Tokens`).FindStringSubmatch(body)
if len(quotaMatch) != 2 {
return nil, false
}
priceMatch := regexp.MustCompile(`<span class="price-new-big"[^>]*>\s*([0-9]+)\s*</span>\s*<span class="price-new-big"[^>]*>\s*\.([0-9]+)\s*</span>\s*<span class="price-new-unit"[^>]*>元/个/月</span>`).FindStringSubmatch(body)
if len(priceMatch) != 3 {
return nil, false
}
notes := "天翼云大模型 AI 专项活动页套餐。"
if featureLines := regexp.MustCompile(`card-content-gou-content"[^>]*>([^<]+)</span>`).FindAllStringSubmatch(body, -1); len(featureLines) > 0 {
parts := make([]string, 0, len(featureLines))
for _, line := range featureLines {
text := strings.TrimSpace(line[1])
if text == "" || strings.HasPrefix(text, "支持模型:") || strings.Contains(text, "Tokens") {
continue
}
parts = append(parts, text)
}
if len(parts) > 0 {
notes = strings.Join(parts, "")
}
}
records = append(records, subscriptionImportRecord{
ProviderName: "Telecom",
ProviderNameCn: "中国电信",
ProviderCountry: "CN",
ProviderWebsite: "https://www.ctyun.cn",
OperatorName: "CTYun",
OperatorNameCn: "天翼云",
OperatorCountry: "CN",
OperatorWebsite: "https://www.ctyun.cn",
OperatorType: "cloud",
PlanFamily: "token_plan",
PlanCode: "ctyun-token-plan-" + tierCode,
PlanName: "天翼云 " + title,
Tier: rawTier,
BillingCycle: "monthly",
Currency: "CNY",
ListPrice: mustParseSubscriptionPrice(priceMatch[1] + "." + priceMatch[2]),
PriceUnit: "CNY/month",
QuotaValue: parseChineseTokenQuota(quotaMatch[1]),
QuotaUnit: "tokens/month",
PlanScope: "Token Plan",
ModelScope: []string{strings.TrimSpace(modelMatch[1])},
SourceURL: defaultCTYunTokenPlanURL,
PublishedAt: publishedAt,
EffectiveDate: effectiveDateFromPublishedAt(publishedAt),
Notes: notes,
})
}
return records, true
}
func parseCTYunTokenPlanLegacyLayout(raw string, publishedAt string) ([]subscriptionImportRecord, error) {
pattern := regexp.MustCompile(`Token Plan ([^\n]+?)(\d+(?:\.\d+)?亿|\d+万)Tokens包[\s\S]*?支持模型:([^\n]+)[\s\S]*?(\d+\s*\.\s*\d+)\s*元/个`)
matches := pattern.FindAllStringSubmatch(raw, -1)
if len(matches) != 6 {
return nil, fmt.Errorf("unexpected ctyun token plan count: %d", len(matches))
if len(matches) == 0 {
return nil, fmt.Errorf("unexpected ctyun token plan count: 0")
}
codeByTier := map[string]string{
@@ -151,13 +337,12 @@ func parseCTYunTokenPlan(raw string, publishedAt string) ([]subscriptionImportRe
records := make([]subscriptionImportRecord, 0, len(matches))
for _, match := range matches {
rawTier := strings.TrimSpace(match[1])
tierCode := codeByTier[rawTier]
tierCode, ok := codeByTier[rawTier]
if !ok {
return nil, fmt.Errorf("unexpected ctyun token plan tier: %s", rawTier)
}
quotaValue := parseChineseTokenQuota(match[2])
price := mustParseSubscriptionPrice(strings.ReplaceAll(match[4], " ", ""))
planName := "天翼云 Token Plan " + rawTier
if rawTier == "Lite" || rawTier == "Pro" || rawTier == "Max" {
planName = "天翼云 Token Plan " + rawTier
}
records = append(records, subscriptionImportRecord{
ProviderName: "Telecom",
ProviderNameCn: "中国电信",
@@ -170,7 +355,7 @@ func parseCTYunTokenPlan(raw string, publishedAt string) ([]subscriptionImportRe
OperatorType: "cloud",
PlanFamily: "token_plan",
PlanCode: "ctyun-token-plan-" + tierCode,
PlanName: planName,
PlanName: "天翼云 Token Plan " + rawTier,
Tier: rawTier,
BillingCycle: "monthly",
Currency: "CNY",

View File

@@ -24,8 +24,8 @@ func TestParseCTYunSubscriptionBuildsPlans(t *testing.T) {
if err != nil {
t.Fatalf("parseCTYunSubscriptionCatalog 失败: %v", err)
}
if len(plans) != 9 {
t.Fatalf("期望 9 条天翼云套餐记录,实际 %d", len(plans))
if len(plans) != 6 {
t.Fatalf("期望 6 条天翼云套餐记录,实际 %d", len(plans))
}
if plans[0].PlanCode != "ctyun-coding-plan-lite-monthly" {
@@ -34,14 +34,40 @@ func TestParseCTYunSubscriptionBuildsPlans(t *testing.T) {
if plans[0].ListPrice != 49 {
t.Fatalf("GLM Lite 月价错误: %v", plans[0].ListPrice)
}
if plans[3].PlanCode != "ctyun-token-plan-lite" {
if plans[3].PlanCode != "ctyun-token-plan-basic" {
t.Fatalf("首条 token planCode 错误: %q", plans[3].PlanCode)
}
if plans[len(plans)-1].PlanCode != "ctyun-token-plan-vip" {
if plans[3].QuotaValue != 15000000 || plans[3].PriceUnit != "CNY/month" || plans[3].QuotaUnit != "tokens/month" {
t.Fatalf("基础版 token plan 解析错误: %+v", plans[3])
}
if plans[len(plans)-1].PlanCode != "ctyun-token-plan-flagship" {
t.Fatalf("末条 token planCode 错误: %q", plans[len(plans)-1].PlanCode)
}
}
func TestParseCTYunTokenPlanLegacyLayout(t *testing.T) {
legacy := `Token Plan Lite1500万Tokens包
支持模型GLM5
39 .90 元/个
Token Plan 轻享包1000万Tokens包
支持模型Deepseek v3.2
9 .90 元/个`
records, err := parseCTYunTokenPlan(legacy, "2026-05-18")
if err != nil {
t.Fatalf("legacy token plan 解析失败: %v", err)
}
if len(records) != 2 {
t.Fatalf("期望 2 条 legacy token 记录,实际 %d", len(records))
}
if records[0].PlanCode != "ctyun-token-plan-lite" || records[0].PriceUnit != "CNY/pack" || records[0].QuotaUnit != "tokens/pack" {
t.Fatalf("legacy Lite 解析错误: %+v", records[0])
}
if records[1].PlanCode != "ctyun-token-plan-starter" {
t.Fatalf("legacy 轻享包解析错误: %+v", records[1])
}
}
func TestRunCTYunSubscriptionImportDryRunPrintsSummary(t *testing.T) {
var out bytes.Buffer
err := runCTYunSubscriptionImport(ctyunSubscriptionImportConfig{
@@ -56,7 +82,7 @@ func TestRunCTYunSubscriptionImportDryRunPrintsSummary(t *testing.T) {
output := out.String()
for _, want := range []string{
"source=ctyun-subscription-import",
"plans=9",
"plans=6",
"provider=Telecom",
"operator=CTYun",
"dry_run=true",

View File

@@ -1,32 +1,29 @@
# 天翼云大模型AI专项
<div class="card-header-title"><span title="Token Plan 基础版" class="card-header-title-text">
Token Plan 基础版
</span></div>
<div class="card-content-gou-item"><span class="card-content-gou-content">包月固定高额请求额度、模型免费畅用、全编码工具无缝接入</span></div>
<div class="card-content-gou-item"><span class="card-content-gou-content">支持模型GLM5</span></div>
<div class="card-content-gou-item"><span class="card-content-gou-content">支持工具TeleClaw、OpenClaw、OpenCode、Cursor、Cline、Chatbox、Codebuddy、Trae等</span></div>
<div class="card-content-gou-item"><span class="card-content-gou-content">1500万Tokens</span></div>
<div class="price-new"><span class="price-new-big">39</span><span class="price-new-big">.90</span><span class="price-new-unit">元/个/月</span></div>
<div class="card-btns-wrap"></div>
Token Plan Lite1500万Tokens包
面向开发者/中小企业,适用于项目开发迭代,大幅提升编码效率与代码质量。
支持模型GLM5
支持工具OpenClaw、OpenCode、Cursor、Cline、Chatbox、Codebuddy、Trae等
39 .90 元/个
<div class="card-header-title"><span title="Token Plan 专业版" class="card-header-title-text">
Token Plan 专业版
</span></div>
<div class="card-content-gou-item"><span class="card-content-gou-content">包月固定高额请求额度、模型免费畅用、全编码工具无缝接入</span></div>
<div class="card-content-gou-item"><span class="card-content-gou-content">支持模型GLM5</span></div>
<div class="card-content-gou-item"><span class="card-content-gou-content">支持工具TeleClaw、OpenClaw、OpenCode、Cursor、Cline、Chatbox、Codebuddy、Trae等</span></div>
<div class="card-content-gou-item"><span class="card-content-gou-content">7000万Tokens</span></div>
<div class="price-new"><span class="price-new-big">159</span><span class="price-new-big">.90</span><span class="price-new-unit">元/个/月</span></div>
<div class="card-btns-wrap"></div>
Token Plan Pro7000万Tokens包
面向开发者/中小企业,适用于项目开发迭代,大幅提升编码效率与代码质量。
支持模型GLM5
159 .90 元/个
Token Plan Max1.5亿Tokens包
面向开发者/中小企业,适用于项目开发迭代,大幅提升编码效率与代码质量。
支持模型GLM5
299 .90 元/个
Token Plan 轻享包1000万Tokens包
适用于个人/家庭 API 及业务调用,有效解决按需单价高、预算难控等问题。
支持模型Deepseek v3.2
9 .90 元/个
Token Plan 畅享包4000万Tokens包
适用于个人/家庭 API 及业务调用,有效解决按需单价高、预算难控等问题。
支持模型Deepseek v3.2
29 .90 元/个
Token Plan 尊享包8000万Tokens包
适用于个人/家庭 API 及业务调用,有效解决按需单价高、预算难控等问题。
支持模型Deepseek v3.2
49 .90 元/个
<div class="card-header-title"><span title="Token Plan 旗舰版" class="card-header-title-text">
Token Plan 旗舰版
</span></div>
<div class="card-content-gou-item"><span class="card-content-gou-content">包月固定高额请求额度、模型免费畅用、全编码工具无缝接入</span></div>
<div class="card-content-gou-item"><span class="card-content-gou-content">支持模型GLM5</span></div>
<div class="card-content-gou-item"><span class="card-content-gou-content">支持工具TeleClaw、OpenClaw、OpenCode、Cursor、Cline、Chatbox、Codebuddy、Trae等</span></div>
<div class="card-content-gou-item"><span class="card-content-gou-content">1.5亿Tokens</span></div>
<div class="price-new"><span class="price-new-big">299</span><span class="price-new-big">.90</span><span class="price-new-unit">元/个/月</span></div>
<div class="card-btns-wrap"></div>