//go:build llm_script package main import ( "database/sql" "encoding/json" "fmt" "log" "os" "strings" _ "github.com/lib/pq" ) type RawData struct { Zhipu []struct { Model string `json:"model"` Context string `json:"context"` InputPrice string `json:"inputPrice"` OutputPrice string `json:"outputPrice"` Operator string `json:"operator"` Region string `json:"region"` Currency string `json:"currency"` } `json:"zhipu"` Baidu []struct { Model string `json:"model"` Type string `json:"type"` InputPrice *float64 `json:"inputPrice"` OutputPrice *float64 `json:"outputPrice"` Operator string `json:"operator"` Region string `json:"region"` Currency string `json:"currency"` } `json:"baidu"` } type ModelPricing struct { ModelID string ModelName string ProviderName string ProviderCountry string OperatorName string OperatorType string Region string Currency string InputPrice float64 OutputPrice float64 ContextLength int IsFree bool SourceURL string Modality string SceneTags []string } func parseZhipuPrice(s string) float64 { // Extract price from strings like "6元", "免费", "限时免费" if strings.Contains(s, "免费") { return 0 } var f float64 fmt.Sscanf(s, "%f", &f) return f } func extractContextLength(context string) int { if strings.Contains(context, "1M") || strings.Contains(context, "1000K") { return 1000000 } if strings.Contains(context, "200K") { return 200000 } if strings.Contains(context, "128K") { return 128000 } if strings.Contains(context, "32K") { return 32000 } if strings.Contains(context, "8K") { return 8000 } if strings.Contains(context, "262144") || strings.Contains(context, "256K") { return 262144 } if strings.Contains(context, "8192") { return 8192 } return 0 } func main() { dsn := os.Getenv("DATABASE_URL") if dsn == "" { dsn = "postgres://long@/llm_intelligence?host=/var/run/postgresql" } db, err := sql.Open("postgres", dsn) if err != nil { log.Fatal(err) } defer db.Close() // Read raw data data, err := os.ReadFile("/tmp/phase2_raw_data.json") if err != nil { log.Fatal("Failed to read raw data:", err) } var raw RawData if err := json.Unmarshal(data, &raw); err != nil { log.Fatal("Failed to parse raw data:", err) } var prices []ModelPricing batchID := "manual-seed" // Process Baidu data modelPrices := make(map[string]map[string]float64) // model -> type -> price for _, b := range raw.Baidu { if modelPrices[b.Model] == nil { modelPrices[b.Model] = make(map[string]float64) } if b.InputPrice != nil { if strings.Contains(b.Type, "输入") { modelPrices[b.Model]["input"] = *b.InputPrice * 1000000 // Convert to per 1M } if strings.Contains(b.Type, "输出") { modelPrices[b.Model]["output"] = *b.InputPrice * 1000000 } } if b.OutputPrice != nil { if strings.Contains(b.Type, "输出") { modelPrices[b.Model]["output"] = *b.OutputPrice * 1000000 } } } for model, pricesMap := range modelPrices { prices = append(prices, ModelPricing{ ModelID: "baidu-" + strings.ToLower(strings.ReplaceAll(model, " ", "-")), ModelName: model, ProviderName: "Baidu", ProviderCountry: "CN", OperatorName: "Baidu Qianfan", OperatorType: "official", Region: "CN", Currency: "CNY", InputPrice: pricesMap["input"], OutputPrice: pricesMap["output"], IsFree: pricesMap["input"] == 0 && pricesMap["output"] == 0, SourceURL: "https://cloud.baidu.com/doc/qianfan/s/wmh4sv6ya", Modality: "text", }) } log.Printf("Parsed %d unique models from Baidu", len(prices)) // Save to database for _, p := range prices { // Find or create provider var providerID int64 err := db.QueryRow("SELECT id FROM model_provider WHERE name = $1", p.ProviderName).Scan(&providerID) if err == sql.ErrNoRows { err = db.QueryRow( "INSERT INTO model_provider (name, country, website, status) VALUES ($1, $2, $3, 'active') RETURNING id", p.ProviderName, p.ProviderCountry, "", ).Scan(&providerID) } if err != nil { log.Printf("Provider error: %v", err) continue } // Find or create operator var operatorID int64 err = db.QueryRow("SELECT id FROM operator WHERE name = $1", p.OperatorName).Scan(&operatorID) if err == sql.ErrNoRows { err = db.QueryRow( "INSERT INTO operator (name, country, status) VALUES ($1, $2, 'active') RETURNING id", p.OperatorName, p.ProviderCountry, ).Scan(&operatorID) } if err != nil { log.Printf("Operator error: %v", err) continue } // Find or create model var modelID int64 err = db.QueryRow("SELECT id FROM models WHERE external_id = $1", p.ModelID).Scan(&modelID) if err == sql.ErrNoRows { err = db.QueryRow( `INSERT INTO models (external_id, name, provider_id, modality, context_length, status, source, batch_id) VALUES ($1, $2, $3, $4, $5, 'active', $6, $7) RETURNING id`, p.ModelID, p.ModelName, providerID, p.Modality, p.ContextLength, p.OperatorName, batchID, ).Scan(&modelID) } if err != nil { log.Printf("Model error: %v", err) continue } // Insert pricing sourceType := p.OperatorType freeQuota := "" freeLimitations := "[]" rateLimit := "{}" if p.IsFree { sourceType = "free_tier" freeQuota = "Imported free-tier pricing entry" freeLimitations = `["See source_url for current quota and policy"]` } _, err = db.Exec( `INSERT INTO region_pricing (model_id, operator_id, region, currency, input_price_per_mtok, output_price_per_mtok, is_free, effective_date, source_url, source_type, free_quota, free_limitations, rate_limit) VALUES ($1, $2, $3, $4, $5, $6, $7, CURRENT_DATE, $8, $9, $10, $11, $12) ON CONFLICT (model_id, operator_id, region, currency, effective_date) DO UPDATE SET input_price_per_mtok = EXCLUDED.input_price_per_mtok, output_price_per_mtok = EXCLUDED.output_price_per_mtok, is_free = EXCLUDED.is_free, source_type = EXCLUDED.source_type, free_quota = EXCLUDED.free_quota, free_limitations = EXCLUDED.free_limitations, rate_limit = EXCLUDED.rate_limit, updated_at = CURRENT_TIMESTAMP`, modelID, operatorID, p.Region, p.Currency, p.InputPrice, p.OutputPrice, p.IsFree, p.SourceURL, sourceType, freeQuota, freeLimitations, rateLimit, ) if err != nil { log.Printf("Pricing error for %s: %v", p.ModelID, err) continue } } log.Printf("Successfully imported %d models into database", len(prices)) }