278 lines
8.2 KiB
Go
278 lines
8.2 KiB
Go
|
|
//go:build llm_script
|
||
|
|
|
||
|
|
package main
|
||
|
|
|
||
|
|
import (
|
||
|
|
"fmt"
|
||
|
|
"html"
|
||
|
|
"regexp"
|
||
|
|
"strings"
|
||
|
|
)
|
||
|
|
|
||
|
|
const defaultVertexPricingURL = "https://cloud.google.com/gemini-enterprise-agent-platform/generative-ai/pricing"
|
||
|
|
|
||
|
|
var (
|
||
|
|
vertexRowPattern = regexp.MustCompile(`(?s)<tr>(.*?)</tr>`)
|
||
|
|
vertexCellPattern = regexp.MustCompile(`(?s)<t[dh][^>]*>(.*?)</t[dh]>`)
|
||
|
|
vertexHeadingPattern = regexp.MustCompile(`(?is)<h[2-4][^>]*>(.*?)</h[2-4]>`)
|
||
|
|
vertexTablePattern = regexp.MustCompile(`(?is)<table[^>]*>(.*?)</table>`)
|
||
|
|
vertexStandardHeadingPattern = regexp.MustCompile(`(?is)<h[2-5][^>]*>\s*(standard|标准)\s*</h[2-5]>`)
|
||
|
|
)
|
||
|
|
|
||
|
|
func parseVertexPricingCatalog(raw string) ([]officialPricingRecord, error) {
|
||
|
|
familyBlocks := splitVertexFamilyBlocks(raw)
|
||
|
|
records := make([]officialPricingRecord, 0)
|
||
|
|
if len(familyBlocks) > 0 {
|
||
|
|
for _, block := range familyBlocks {
|
||
|
|
tableHTML := extractVertexStandardTable(block)
|
||
|
|
if strings.TrimSpace(tableHTML) == "" {
|
||
|
|
continue
|
||
|
|
}
|
||
|
|
records = append(records, parseVertexStandardTable(tableHTML)...)
|
||
|
|
}
|
||
|
|
}
|
||
|
|
if len(records) > 0 {
|
||
|
|
return records, nil
|
||
|
|
}
|
||
|
|
records = parseVertexStandardTextBlocks(raw)
|
||
|
|
if len(records) > 0 {
|
||
|
|
return records, nil
|
||
|
|
}
|
||
|
|
if len(familyBlocks) == 0 {
|
||
|
|
return nil, fmt.Errorf("unexpected vertex pricing content")
|
||
|
|
}
|
||
|
|
return nil, fmt.Errorf("no vertex standard pricing rows found")
|
||
|
|
}
|
||
|
|
|
||
|
|
func parseVertexStandardTable(table string) []officialPricingRecord {
|
||
|
|
rows := vertexRowPattern.FindAllStringSubmatch(table, -1)
|
||
|
|
records := make([]officialPricingRecord, 0)
|
||
|
|
currentModel := ""
|
||
|
|
currentInput := 0.0
|
||
|
|
|
||
|
|
for _, row := range rows {
|
||
|
|
cells := vertexCellPattern.FindAllStringSubmatch(row[1], -1)
|
||
|
|
if len(cells) == 0 {
|
||
|
|
continue
|
||
|
|
}
|
||
|
|
|
||
|
|
values := make([]string, 0, len(cells))
|
||
|
|
for _, cell := range cells {
|
||
|
|
values = append(values, cleanHTMLText(cell[1]))
|
||
|
|
}
|
||
|
|
if len(values) == 1 && !strings.Contains(values[0], "Model") {
|
||
|
|
currentModel = values[0]
|
||
|
|
currentInput = 0
|
||
|
|
continue
|
||
|
|
}
|
||
|
|
if len(values) < 2 {
|
||
|
|
continue
|
||
|
|
}
|
||
|
|
|
||
|
|
rowType := values[0]
|
||
|
|
priceCell := values[1]
|
||
|
|
if len(values) > 2 && strings.Contains(strings.ToLower(values[0]), "gemini") {
|
||
|
|
currentModel = values[0]
|
||
|
|
rowType = values[1]
|
||
|
|
priceCell = values[2]
|
||
|
|
}
|
||
|
|
if strings.TrimSpace(currentModel) == "" || strings.EqualFold(currentModel, "Model") {
|
||
|
|
continue
|
||
|
|
}
|
||
|
|
|
||
|
|
switch {
|
||
|
|
case strings.HasPrefix(rowType, "Input (text"), strings.HasPrefix(rowType, "输入(文本"):
|
||
|
|
price, ok := firstDollarPrice(priceCell)
|
||
|
|
if ok {
|
||
|
|
currentInput = price
|
||
|
|
}
|
||
|
|
case strings.HasPrefix(rowType, "Text output"), strings.HasPrefix(rowType, "文本输出"):
|
||
|
|
outputPrice, ok := firstDollarPrice(priceCell)
|
||
|
|
if !ok || currentInput == 0 {
|
||
|
|
continue
|
||
|
|
}
|
||
|
|
providerNameCn, providerCountry, providerWebsite := providerMetadata("Google")
|
||
|
|
record := officialPricingRecord{
|
||
|
|
ModelID: normalizeExternalID("vertex", currentModel),
|
||
|
|
ModelName: currentModel,
|
||
|
|
ProviderName: "Google",
|
||
|
|
ProviderNameCn: providerNameCn,
|
||
|
|
ProviderCountry: providerCountry,
|
||
|
|
ProviderWebsite: providerWebsite,
|
||
|
|
OperatorName: "Google Cloud Vertex AI",
|
||
|
|
OperatorNameCn: "Google Cloud Vertex AI",
|
||
|
|
OperatorCountry: "US",
|
||
|
|
OperatorWebsite: "https://cloud.google.com/vertex-ai",
|
||
|
|
OperatorType: "cloud",
|
||
|
|
Region: "global",
|
||
|
|
Currency: "USD",
|
||
|
|
InputPrice: currentInput,
|
||
|
|
OutputPrice: outputPrice,
|
||
|
|
SourceURL: defaultVertexPricingURL,
|
||
|
|
ModelSourceURL: defaultVertexPricingURL,
|
||
|
|
DateConfidence: "unknown",
|
||
|
|
DateSourceKind: "official_pricing",
|
||
|
|
Modality: detectModality(currentModel),
|
||
|
|
}
|
||
|
|
record.IsFree = record.InputPrice == 0 && record.OutputPrice == 0
|
||
|
|
records = append(records, record)
|
||
|
|
}
|
||
|
|
}
|
||
|
|
return records
|
||
|
|
}
|
||
|
|
|
||
|
|
func splitVertexFamilyBlocks(raw string) []string {
|
||
|
|
indices := make([]int, 0)
|
||
|
|
matches := vertexHeadingPattern.FindAllStringSubmatchIndex(raw, -1)
|
||
|
|
for _, match := range matches {
|
||
|
|
label := cleanHTMLText(raw[match[2]:match[3]])
|
||
|
|
if !strings.Contains(strings.ToLower(label), "gemini") {
|
||
|
|
continue
|
||
|
|
}
|
||
|
|
indices = append(indices, match[0])
|
||
|
|
}
|
||
|
|
blocks := make([]string, 0, len(indices))
|
||
|
|
for i, start := range indices {
|
||
|
|
end := len(raw)
|
||
|
|
if i+1 < len(indices) {
|
||
|
|
end = indices[i+1]
|
||
|
|
}
|
||
|
|
blocks = append(blocks, raw[start:end])
|
||
|
|
}
|
||
|
|
return blocks
|
||
|
|
}
|
||
|
|
|
||
|
|
func extractVertexStandardTable(raw string) string {
|
||
|
|
heading := vertexStandardHeadingPattern.FindStringIndex(raw)
|
||
|
|
if heading == nil {
|
||
|
|
return ""
|
||
|
|
}
|
||
|
|
segment := raw[heading[1]:]
|
||
|
|
table := vertexTablePattern.FindStringSubmatch(segment)
|
||
|
|
if len(table) != 2 {
|
||
|
|
return ""
|
||
|
|
}
|
||
|
|
return table[1]
|
||
|
|
}
|
||
|
|
|
||
|
|
func parseVertexStandardTextBlocks(raw string) []officialPricingRecord {
|
||
|
|
lines := htmlLines(raw)
|
||
|
|
records := make([]officialPricingRecord, 0)
|
||
|
|
currentModelParts := make([]string, 0)
|
||
|
|
currentInput := 0.0
|
||
|
|
inStandard := false
|
||
|
|
|
||
|
|
for _, line := range lines {
|
||
|
|
lower := strings.ToLower(line)
|
||
|
|
sectionTitle := normalizeVertexSectionTitle(lower)
|
||
|
|
switch {
|
||
|
|
case sectionTitle != "":
|
||
|
|
inStandard = sectionTitle == "standard" || sectionTitle == "标准"
|
||
|
|
currentModelParts = currentModelParts[:0]
|
||
|
|
currentInput = 0
|
||
|
|
continue
|
||
|
|
case !inStandard:
|
||
|
|
continue
|
||
|
|
case strings.Contains(lower, "model type price"):
|
||
|
|
continue
|
||
|
|
case strings.Contains(line, "$"):
|
||
|
|
modelName := strings.TrimSpace(strings.Join(currentModelParts, " "))
|
||
|
|
if modelName == "" {
|
||
|
|
continue
|
||
|
|
}
|
||
|
|
switch {
|
||
|
|
case strings.HasPrefix(lower, "input (text"), strings.HasPrefix(lower, "1m input text tokens"):
|
||
|
|
if price, ok := firstDollarPrice(line); ok {
|
||
|
|
currentInput = price
|
||
|
|
}
|
||
|
|
case strings.HasPrefix(lower, "text output"), strings.HasPrefix(lower, "1m output text tokens"):
|
||
|
|
outputPrice, ok := firstDollarPrice(line)
|
||
|
|
if !ok || currentInput == 0 {
|
||
|
|
continue
|
||
|
|
}
|
||
|
|
providerNameCn, providerCountry, providerWebsite := providerMetadata("Google")
|
||
|
|
record := officialPricingRecord{
|
||
|
|
ModelID: normalizeExternalID("vertex", modelName),
|
||
|
|
ModelName: modelName,
|
||
|
|
ProviderName: "Google",
|
||
|
|
ProviderNameCn: providerNameCn,
|
||
|
|
ProviderCountry: providerCountry,
|
||
|
|
ProviderWebsite: providerWebsite,
|
||
|
|
OperatorName: "Google Cloud Vertex AI",
|
||
|
|
OperatorNameCn: "Google Cloud Vertex AI",
|
||
|
|
OperatorCountry: "US",
|
||
|
|
OperatorWebsite: "https://cloud.google.com/vertex-ai",
|
||
|
|
OperatorType: "cloud",
|
||
|
|
Region: "global",
|
||
|
|
Currency: "USD",
|
||
|
|
InputPrice: currentInput,
|
||
|
|
OutputPrice: outputPrice,
|
||
|
|
SourceURL: defaultVertexPricingURL,
|
||
|
|
ModelSourceURL: defaultVertexPricingURL,
|
||
|
|
DateConfidence: "unknown",
|
||
|
|
DateSourceKind: "official_pricing",
|
||
|
|
Modality: detectModality(modelName),
|
||
|
|
}
|
||
|
|
record.IsFree = record.InputPrice == 0 && record.OutputPrice == 0
|
||
|
|
records = append(records, record)
|
||
|
|
currentModelParts = currentModelParts[:0]
|
||
|
|
currentInput = 0
|
||
|
|
}
|
||
|
|
default:
|
||
|
|
currentModelParts = append(currentModelParts, line)
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
return dedupeOfficialPricingRecords(records)
|
||
|
|
}
|
||
|
|
|
||
|
|
func normalizeVertexSectionTitle(line string) string {
|
||
|
|
title := strings.TrimSpace(strings.TrimLeft(line, "#"))
|
||
|
|
title = strings.TrimSpace(title)
|
||
|
|
switch title {
|
||
|
|
case "standard", "标准", "priority", "优先级", "flex/batch", "灵活/批处理", "batch api", "live api":
|
||
|
|
return title
|
||
|
|
default:
|
||
|
|
return ""
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
func htmlLines(raw string) []string {
|
||
|
|
replacer := strings.NewReplacer(
|
||
|
|
"<br>", "\n",
|
||
|
|
"<br/>", "\n",
|
||
|
|
"<br />", "\n",
|
||
|
|
"</p>", "\n",
|
||
|
|
"</div>", "\n",
|
||
|
|
"</section>", "\n",
|
||
|
|
"</tr>", "\n",
|
||
|
|
"</td>", "\n",
|
||
|
|
"</th>", "\n",
|
||
|
|
"</li>", "\n",
|
||
|
|
"</h1>", "\n",
|
||
|
|
"</h2>", "\n",
|
||
|
|
"</h3>", "\n",
|
||
|
|
"</h4>", "\n",
|
||
|
|
"</h5>", "\n",
|
||
|
|
"</h6>", "\n",
|
||
|
|
)
|
||
|
|
withBreaks := replacer.Replace(raw)
|
||
|
|
tagPattern := regexp.MustCompile(`(?is)<[^>]+>`)
|
||
|
|
spacePattern := regexp.MustCompile(`[ \t]+`)
|
||
|
|
cleaned := html.UnescapeString(withBreaks)
|
||
|
|
cleaned = strings.ReplaceAll(cleaned, "\r\n", "\n")
|
||
|
|
cleaned = strings.ReplaceAll(cleaned, "\r", "\n")
|
||
|
|
cleaned = strings.ReplaceAll(cleaned, "\u00a0", " ")
|
||
|
|
cleaned = tagPattern.ReplaceAllString(cleaned, "")
|
||
|
|
rawLines := strings.Split(cleaned, "\n")
|
||
|
|
lines := make([]string, 0, len(rawLines))
|
||
|
|
for _, line := range rawLines {
|
||
|
|
line = strings.TrimSpace(spacePattern.ReplaceAllString(line, " "))
|
||
|
|
if line == "" {
|
||
|
|
continue
|
||
|
|
}
|
||
|
|
lines = append(lines, line)
|
||
|
|
}
|
||
|
|
return lines
|
||
|
|
}
|