Add plan catalog and subscription schema support, seed baselines, and real importers for core domestic subscriptions plus stable official pricing sources. This commit also hardens the shared fetch layers so the importers can support live collection and database writes instead of relying on manual placeholders alone.
324 lines
9.0 KiB
Go
324 lines
9.0 KiB
Go
//go:build llm_script
|
|
|
|
package main
|
|
|
|
import (
|
|
"fmt"
|
|
"regexp"
|
|
"strings"
|
|
)
|
|
|
|
const defaultBedrockPricingURL = "https://aws.amazon.com/bedrock/pricing/"
|
|
|
|
var (
|
|
bedrockRegionPattern = regexp.MustCompile(`(?s)<p><b>Regions?: ([^<]+)</b></p>`)
|
|
bedrockTablePattern = regexp.MustCompile(`(?s)<table[^>]*>(.*?)</table>`)
|
|
bedrockRowPattern = regexp.MustCompile(`(?s)<tr>(.*?)</tr>`)
|
|
bedrockCellPattern = regexp.MustCompile(`(?s)<t[dh][^>]*>(.*?)</t[dh]>`)
|
|
)
|
|
|
|
func parseBedrockPricingCatalog(raw string) ([]officialPricingRecord, error) {
|
|
section := extractBetween(raw, `<h3 id="Model_Pricing"`, `<h2 id="Pricing_examples"`)
|
|
if strings.TrimSpace(section) == "" {
|
|
section = raw
|
|
}
|
|
|
|
blocks := splitBedrockProviderBlocks(section)
|
|
records := make([]officialPricingRecord, 0)
|
|
for _, block := range blocks {
|
|
records = append(records, parseBedrockProviderBlock(block.providerLabel, block.content)...)
|
|
}
|
|
if len(records) == 0 {
|
|
records = append(records, parseBedrockPricingTextFallback(cleanHTMLText(section))...)
|
|
}
|
|
if len(records) == 0 {
|
|
return nil, fmt.Errorf("no bedrock pricing rows found")
|
|
}
|
|
return records, nil
|
|
}
|
|
|
|
func parseBedrockProviderBlock(providerLabel string, raw string) []officialPricingRecord {
|
|
providerName := normalizeBedrockProvider(providerLabel)
|
|
providerNameCn, providerCountry, providerWebsite := providerMetadata(providerName)
|
|
|
|
regionMatches := bedrockRegionPattern.FindAllStringSubmatchIndex(raw, -1)
|
|
tables := bedrockTablePattern.FindAllStringSubmatchIndex(raw, -1)
|
|
records := make([]officialPricingRecord, 0)
|
|
seenModelRegion := make(map[string]struct{})
|
|
for _, tableIndex := range tables {
|
|
tableHTML := raw[tableIndex[2]:tableIndex[3]]
|
|
if !strings.Contains(tableHTML, "Price per 1M input tokens") || !strings.Contains(tableHTML, "$") {
|
|
continue
|
|
}
|
|
region := "global"
|
|
for _, regionIndex := range regionMatches {
|
|
if regionIndex[0] < tableIndex[0] {
|
|
region = cleanHTMLText(raw[regionIndex[2]:regionIndex[3]])
|
|
}
|
|
}
|
|
rows := parseBedrockTableRows(tableHTML)
|
|
for _, row := range rows {
|
|
dedupeKey := strings.Join([]string{region, row.ModelName}, "|")
|
|
if _, exists := seenModelRegion[dedupeKey]; exists {
|
|
continue
|
|
}
|
|
record := officialPricingRecord{
|
|
ModelID: normalizeExternalID("bedrock", providerName, row.ModelName),
|
|
ModelName: row.ModelName,
|
|
ProviderName: providerName,
|
|
ProviderNameCn: providerNameCn,
|
|
ProviderCountry: providerCountry,
|
|
ProviderWebsite: providerWebsite,
|
|
OperatorName: "Amazon Bedrock",
|
|
OperatorNameCn: "Amazon Bedrock",
|
|
OperatorCountry: "US",
|
|
OperatorWebsite: "https://aws.amazon.com/bedrock/",
|
|
OperatorType: "cloud",
|
|
Region: region,
|
|
Currency: "USD",
|
|
InputPrice: row.InputPrice,
|
|
OutputPrice: row.OutputPrice,
|
|
SourceURL: defaultBedrockPricingURL,
|
|
ModelSourceURL: defaultBedrockPricingURL,
|
|
DateConfidence: "unknown",
|
|
DateSourceKind: "official_pricing",
|
|
Modality: detectModality(row.ModelName),
|
|
}
|
|
record.IsFree = false
|
|
seenModelRegion[dedupeKey] = struct{}{}
|
|
records = append(records, record)
|
|
}
|
|
}
|
|
return records
|
|
}
|
|
|
|
type bedrockProviderBlock struct {
|
|
providerLabel string
|
|
content string
|
|
}
|
|
|
|
func splitBedrockProviderBlocks(raw string) []bedrockProviderBlock {
|
|
marker := `<h2 id="`
|
|
indices := make([]int, 0)
|
|
for offset := 0; ; {
|
|
next := strings.Index(raw[offset:], marker)
|
|
if next == -1 {
|
|
break
|
|
}
|
|
indices = append(indices, offset+next)
|
|
offset += next + len(marker)
|
|
}
|
|
blocks := make([]bedrockProviderBlock, 0, len(indices))
|
|
for i, start := range indices {
|
|
end := len(raw)
|
|
if i+1 < len(indices) {
|
|
end = indices[i+1]
|
|
}
|
|
chunk := raw[start:end]
|
|
h2End := strings.Index(chunk, "</h2>")
|
|
if h2End == -1 {
|
|
continue
|
|
}
|
|
openEnd := strings.Index(chunk, ">")
|
|
if openEnd == -1 || openEnd >= h2End {
|
|
continue
|
|
}
|
|
label := cleanHTMLText(chunk[openEnd+1 : h2End])
|
|
if strings.TrimSpace(label) == "" {
|
|
continue
|
|
}
|
|
blocks = append(blocks, bedrockProviderBlock{
|
|
providerLabel: label,
|
|
content: chunk,
|
|
})
|
|
}
|
|
return blocks
|
|
}
|
|
|
|
func extractBetween(raw string, startMarker string, endMarker string) string {
|
|
start := strings.Index(raw, startMarker)
|
|
if start == -1 {
|
|
return ""
|
|
}
|
|
segment := raw[start:]
|
|
if endMarker == "" {
|
|
return segment
|
|
}
|
|
end := strings.Index(segment, endMarker)
|
|
if end == -1 {
|
|
return segment
|
|
}
|
|
return segment[:end]
|
|
}
|
|
|
|
type bedrockPriceRow struct {
|
|
ModelName string
|
|
InputPrice float64
|
|
OutputPrice float64
|
|
}
|
|
|
|
func parseBedrockTableRows(tableHTML string) []bedrockPriceRow {
|
|
rows := bedrockRowPattern.FindAllStringSubmatch(tableHTML, -1)
|
|
parsed := make([]bedrockPriceRow, 0)
|
|
for _, row := range rows {
|
|
cells := bedrockCellPattern.FindAllStringSubmatch(row[1], -1)
|
|
if len(cells) < 3 {
|
|
continue
|
|
}
|
|
values := make([]string, 0, len(cells))
|
|
for _, cell := range cells {
|
|
values = append(values, cleanHTMLText(cell[1]))
|
|
}
|
|
if strings.Contains(strings.ToLower(values[0]), "models") {
|
|
continue
|
|
}
|
|
|
|
modelName := values[0]
|
|
inputCell := values[1]
|
|
outputCell := values[2]
|
|
if len(values) >= 6 && strings.Contains(strings.ToLower(values[5]), "$") {
|
|
outputCell = values[5]
|
|
}
|
|
inputPrice, ok := firstDollarPrice(inputCell)
|
|
if !ok {
|
|
continue
|
|
}
|
|
outputPrice, ok := firstDollarPrice(outputCell)
|
|
if !ok {
|
|
continue
|
|
}
|
|
parsed = append(parsed, bedrockPriceRow{
|
|
ModelName: modelName,
|
|
InputPrice: inputPrice,
|
|
OutputPrice: outputPrice,
|
|
})
|
|
}
|
|
return parsed
|
|
}
|
|
|
|
func normalizeBedrockProvider(raw string) string {
|
|
switch strings.TrimSpace(raw) {
|
|
case "Amazon Nova":
|
|
return "Amazon"
|
|
case "Anthropic":
|
|
return "Anthropic"
|
|
case "Cohere":
|
|
return "Cohere"
|
|
case "DeepSeek":
|
|
return "DeepSeek"
|
|
case "Meta":
|
|
return "Meta"
|
|
case "Mistral AI":
|
|
return "Mistral AI"
|
|
case "Moonshot AI":
|
|
return "Moonshot AI"
|
|
case "Kimi":
|
|
return "Moonshot AI"
|
|
case "NVIDIA":
|
|
return "NVIDIA"
|
|
case "OpenAI OSS Models":
|
|
return "OpenAI"
|
|
case "Qwen":
|
|
return "Qwen"
|
|
case "Writer":
|
|
return "Writer"
|
|
case "Z AI":
|
|
return "Zhipu AI"
|
|
default:
|
|
return strings.TrimSpace(raw)
|
|
}
|
|
}
|
|
|
|
var bedrockTextProviderHeaderPattern = regexp.MustCompile(`([A-Za-z][A-Za-z0-9 .&-]+)\s+models\s+Pr(?:i)?ce per 1M input tokens`)
|
|
var bedrockTextRowPattern = regexp.MustCompile(`([A-Za-z0-9 .:+-]+?)\s+\$\s*([0-9.]+)\s+\$\s*([0-9.]+)`)
|
|
|
|
func parseBedrockPricingTextFallback(raw string) []officialPricingRecord {
|
|
matches := bedrockTextProviderHeaderPattern.FindAllStringSubmatchIndex(raw, -1)
|
|
records := make([]officialPricingRecord, 0)
|
|
seen := make(map[string]struct{})
|
|
for i, match := range matches {
|
|
if len(match) < 4 {
|
|
continue
|
|
}
|
|
start := match[0]
|
|
end := len(raw)
|
|
if i+1 < len(matches) {
|
|
end = matches[i+1][0]
|
|
}
|
|
block := raw[start:end]
|
|
region := normalizeBedrockRegionText(findBedrockTextRegion(raw, start))
|
|
providerName := normalizeBedrockProvider(raw[match[2]:match[3]])
|
|
providerNameCn, providerCountry, providerWebsite := providerMetadata(providerName)
|
|
rows := bedrockTextRowPattern.FindAllStringSubmatch(block, -1)
|
|
for _, row := range rows {
|
|
if len(row) != 4 {
|
|
continue
|
|
}
|
|
modelName := strings.TrimSpace(row[1])
|
|
key := strings.Join([]string{providerName, region, modelName}, "|")
|
|
if _, exists := seen[key]; exists {
|
|
continue
|
|
}
|
|
seen[key] = struct{}{}
|
|
records = append(records, officialPricingRecord{
|
|
ModelID: normalizeExternalID("bedrock", providerName, modelName),
|
|
ModelName: modelName,
|
|
ProviderName: providerName,
|
|
ProviderNameCn: providerNameCn,
|
|
ProviderCountry: providerCountry,
|
|
ProviderWebsite: providerWebsite,
|
|
OperatorName: "Amazon Bedrock",
|
|
OperatorNameCn: "Amazon Bedrock",
|
|
OperatorCountry: "US",
|
|
OperatorWebsite: "https://aws.amazon.com/bedrock/",
|
|
OperatorType: "cloud",
|
|
Region: region,
|
|
Currency: "USD",
|
|
InputPrice: mustParseSubscriptionPrice(row[2]),
|
|
OutputPrice: mustParseSubscriptionPrice(row[3]),
|
|
SourceURL: defaultBedrockPricingURL,
|
|
ModelSourceURL: defaultBedrockPricingURL,
|
|
DateConfidence: "unknown",
|
|
DateSourceKind: "official_pricing",
|
|
Modality: detectModality(modelName),
|
|
})
|
|
}
|
|
}
|
|
return records
|
|
}
|
|
|
|
func findBedrockTextRegion(raw string, headerStart int) string {
|
|
prefixStart := headerStart - 300
|
|
if prefixStart < 0 {
|
|
prefixStart = 0
|
|
}
|
|
prefix := raw[prefixStart:headerStart]
|
|
lastPlural := strings.LastIndex(prefix, "Regions:")
|
|
lastSingular := strings.LastIndex(prefix, "Region:")
|
|
lastIndex := lastPlural
|
|
marker := "Regions:"
|
|
if lastSingular > lastIndex {
|
|
lastIndex = lastSingular
|
|
marker = "Region:"
|
|
}
|
|
if lastIndex == -1 {
|
|
return ""
|
|
}
|
|
region := strings.TrimSpace(prefix[lastIndex+len(marker):])
|
|
for _, stopMarker := range []string{" Priority ", " Flex ", " Batch ", " models "} {
|
|
if stop := strings.Index(region, stopMarker); stop != -1 {
|
|
region = strings.TrimSpace(region[:stop])
|
|
}
|
|
}
|
|
return region
|
|
}
|
|
|
|
func normalizeBedrockRegionText(raw string) string {
|
|
trimmed := strings.TrimSpace(raw)
|
|
if trimmed == "" {
|
|
return "global"
|
|
}
|
|
trimmed = strings.TrimSuffix(trimmed, ",")
|
|
return strings.Join(strings.Fields(trimmed), " ")
|
|
}
|