Files
llm-intelligence/scripts/import_phase2_data.go

236 lines
6.6 KiB
Go
Raw Permalink Normal View History

//go:build llm_script
package main
import (
"database/sql"
"encoding/json"
"fmt"
"log"
"os"
"strings"
_ "github.com/lib/pq"
)
type RawData struct {
Zhipu []struct {
Model string `json:"model"`
Context string `json:"context"`
InputPrice string `json:"inputPrice"`
OutputPrice string `json:"outputPrice"`
Operator string `json:"operator"`
Region string `json:"region"`
Currency string `json:"currency"`
} `json:"zhipu"`
Baidu []struct {
Model string `json:"model"`
Type string `json:"type"`
InputPrice *float64 `json:"inputPrice"`
OutputPrice *float64 `json:"outputPrice"`
Operator string `json:"operator"`
Region string `json:"region"`
Currency string `json:"currency"`
} `json:"baidu"`
}
type ModelPricing struct {
ModelID string
ModelName string
ProviderName string
ProviderCountry string
OperatorName string
OperatorType string
Region string
Currency string
InputPrice float64
OutputPrice float64
ContextLength int
IsFree bool
SourceURL string
Modality string
SceneTags []string
}
func parseZhipuPrice(s string) float64 {
// Extract price from strings like "6元", "免费", "限时免费"
if strings.Contains(s, "免费") {
return 0
}
var f float64
fmt.Sscanf(s, "%f", &f)
return f
}
func extractContextLength(context string) int {
if strings.Contains(context, "1M") || strings.Contains(context, "1000K") {
return 1000000
}
if strings.Contains(context, "200K") {
return 200000
}
if strings.Contains(context, "128K") {
return 128000
}
if strings.Contains(context, "32K") {
return 32000
}
if strings.Contains(context, "8K") {
return 8000
}
if strings.Contains(context, "262144") || strings.Contains(context, "256K") {
return 262144
}
if strings.Contains(context, "8192") {
return 8192
}
return 0
}
func main() {
dsn := os.Getenv("DATABASE_URL")
if dsn == "" {
dsn = "postgres://long@/llm_intelligence?host=/var/run/postgresql"
}
db, err := sql.Open("postgres", dsn)
if err != nil {
log.Fatal(err)
}
defer db.Close()
// Read raw data
data, err := os.ReadFile("/tmp/phase2_raw_data.json")
if err != nil {
log.Fatal("Failed to read raw data:", err)
}
var raw RawData
if err := json.Unmarshal(data, &raw); err != nil {
log.Fatal("Failed to parse raw data:", err)
}
var prices []ModelPricing
batchID := "manual-seed"
// Process Baidu data
modelPrices := make(map[string]map[string]float64) // model -> type -> price
for _, b := range raw.Baidu {
if modelPrices[b.Model] == nil {
modelPrices[b.Model] = make(map[string]float64)
}
if b.InputPrice != nil {
if strings.Contains(b.Type, "输入") {
modelPrices[b.Model]["input"] = *b.InputPrice * 1000000 // Convert to per 1M
}
if strings.Contains(b.Type, "输出") {
modelPrices[b.Model]["output"] = *b.InputPrice * 1000000
}
}
if b.OutputPrice != nil {
if strings.Contains(b.Type, "输出") {
modelPrices[b.Model]["output"] = *b.OutputPrice * 1000000
}
}
}
for model, pricesMap := range modelPrices {
prices = append(prices, ModelPricing{
ModelID: "baidu-" + strings.ToLower(strings.ReplaceAll(model, " ", "-")),
ModelName: model,
ProviderName: "Baidu",
ProviderCountry: "CN",
OperatorName: "Baidu Qianfan",
OperatorType: "official",
Region: "CN",
Currency: "CNY",
InputPrice: pricesMap["input"],
OutputPrice: pricesMap["output"],
IsFree: pricesMap["input"] == 0 && pricesMap["output"] == 0,
SourceURL: "https://cloud.baidu.com/doc/qianfan/s/wmh4sv6ya",
Modality: "text",
})
}
log.Printf("Parsed %d unique models from Baidu", len(prices))
// Save to database
for _, p := range prices {
// Find or create provider
var providerID int64
err := db.QueryRow("SELECT id FROM model_provider WHERE name = $1", p.ProviderName).Scan(&providerID)
if err == sql.ErrNoRows {
err = db.QueryRow(
"INSERT INTO model_provider (name, country, website, status) VALUES ($1, $2, $3, 'active') RETURNING id",
p.ProviderName, p.ProviderCountry, "",
).Scan(&providerID)
}
if err != nil {
log.Printf("Provider error: %v", err)
continue
}
// Find or create operator
var operatorID int64
err = db.QueryRow("SELECT id FROM operator WHERE name = $1", p.OperatorName).Scan(&operatorID)
if err == sql.ErrNoRows {
err = db.QueryRow(
"INSERT INTO operator (name, country, status) VALUES ($1, $2, 'active') RETURNING id",
p.OperatorName, p.ProviderCountry,
).Scan(&operatorID)
}
if err != nil {
log.Printf("Operator error: %v", err)
continue
}
// Find or create model
var modelID int64
err = db.QueryRow("SELECT id FROM models WHERE external_id = $1", p.ModelID).Scan(&modelID)
if err == sql.ErrNoRows {
err = db.QueryRow(
`INSERT INTO models (external_id, name, provider_id, modality, context_length, status, source, batch_id)
VALUES ($1, $2, $3, $4, $5, 'active', $6, $7) RETURNING id`,
p.ModelID, p.ModelName, providerID, p.Modality, p.ContextLength, p.OperatorName, batchID,
).Scan(&modelID)
}
if err != nil {
log.Printf("Model error: %v", err)
continue
}
// Insert pricing
sourceType := p.OperatorType
freeQuota := ""
freeLimitations := "[]"
rateLimit := "{}"
if p.IsFree {
sourceType = "free_tier"
freeQuota = "Imported free-tier pricing entry"
freeLimitations = `["See source_url for current quota and policy"]`
}
_, err = db.Exec(
`INSERT INTO region_pricing
(model_id, operator_id, region, currency, input_price_per_mtok, output_price_per_mtok, is_free, effective_date, source_url, source_type, free_quota, free_limitations, rate_limit)
VALUES ($1, $2, $3, $4, $5, $6, $7, CURRENT_DATE, $8, $9, $10, $11, $12)
ON CONFLICT (model_id, operator_id, region, currency, effective_date)
DO UPDATE SET input_price_per_mtok = EXCLUDED.input_price_per_mtok,
output_price_per_mtok = EXCLUDED.output_price_per_mtok,
is_free = EXCLUDED.is_free,
source_type = EXCLUDED.source_type,
free_quota = EXCLUDED.free_quota,
free_limitations = EXCLUDED.free_limitations,
rate_limit = EXCLUDED.rate_limit,
updated_at = CURRENT_TIMESTAMP`,
modelID, operatorID, p.Region, p.Currency, p.InputPrice, p.OutputPrice, p.IsFree, p.SourceURL,
sourceType, freeQuota, freeLimitations, rateLimit,
)
if err != nil {
log.Printf("Pricing error for %s: %v", p.ModelID, err)
continue
}
}
log.Printf("Successfully imported %d models into database", len(prices))
}