Files
llm-intelligence/scripts/import_bytedance_data.go

368 lines
11 KiB
Go
Raw Normal View History

//go:build llm_script
package main
import (
"database/sql"
"encoding/json"
"log"
"os"
"strings"
"time"
_ "github.com/lib/pq"
)
type ModelPricing struct {
ModelID string
ModelName string
ProviderName string
ProviderCountry string
OperatorName string
OperatorType string
Region string
Currency string
InputPrice float64
OutputPrice float64
ContextLength int
IsFree bool
SourceURL string
ModelSourceURL string
ReleaseDate string
DateConfidence string
DateSourceKind string
Modality string
}
func releaseDateValue(raw string) any {
if raw == "" {
return nil
}
parsed, err := time.Parse("2006-01-02", raw)
if err != nil {
return nil
}
return parsed
}
type bytedanceModelMetadata struct {
Prefix string
ReleaseDate string
ModelSourceURL string
DateConfidence string
DateSourceKind string
}
var bytedanceModelMetadataRules = []bytedanceModelMetadata{
{
Prefix: "bytedance-doubao-1.5-vision-pro",
ReleaseDate: "2025-01-22",
ModelSourceURL: "https://developer.volcengine.com/articles/7462939272262189083",
DateConfidence: "official_primary",
DateSourceKind: "official_announcement",
},
{
Prefix: "bytedance-doubao-1.5-pro",
ReleaseDate: "2025-01-22",
ModelSourceURL: "https://developer.volcengine.com/articles/7462939272262189083",
DateConfidence: "official_primary",
DateSourceKind: "official_announcement",
},
{
Prefix: "bytedance-doubao-1.5-lite",
ReleaseDate: "2025-01-22",
ModelSourceURL: "https://developer.volcengine.com/articles/7462939272262189083",
DateConfidence: "official_primary",
DateSourceKind: "official_announcement",
},
{
Prefix: "bytedance-doubao-1.5-thinking",
ReleaseDate: "2025-04-17",
ModelSourceURL: "https://developer.volcengine.com/articles/7496718897794039827",
DateConfidence: "official_primary",
DateSourceKind: "official_announcement",
},
{
Prefix: "bytedance-doubao-seed-1.6",
ReleaseDate: "2025-06-11",
ModelSourceURL: "https://developer.volcengine.com/articles/7517188354606104612",
DateConfidence: "official_primary",
DateSourceKind: "official_announcement",
},
{
Prefix: "bytedance-doubao-seed-1.8",
ReleaseDate: "2025-12-18",
ModelSourceURL: "https://developer.volcengine.com/articles/7601918680544641034",
DateConfidence: "secondary_authoritative",
DateSourceKind: "secondary_authoritative_report",
},
{
Prefix: "bytedance-doubao-seed-2.0-code",
ReleaseDate: "2026-02-14",
ModelSourceURL: "https://developer.volcengine.com/articles/7610285824933445675",
DateConfidence: "official_primary",
DateSourceKind: "official_announcement",
},
{
Prefix: "bytedance-doubao-seed-2.0-pro",
ReleaseDate: "2026-02-14",
ModelSourceURL: "https://developer.volcengine.com/articles/7610285824933445675",
DateConfidence: "official_primary",
DateSourceKind: "official_announcement",
},
{
Prefix: "bytedance-doubao-seed-2.0-mini",
ReleaseDate: "2026-02-14",
ModelSourceURL: "https://developer.volcengine.com/articles/7610285824933445675",
DateConfidence: "official_primary",
DateSourceKind: "official_announcement",
},
{
Prefix: "bytedance-doubao-seed-2.0-lite",
ReleaseDate: "2026-02-14",
ModelSourceURL: "https://developer.volcengine.com/articles/7610285824933445675",
DateConfidence: "official_primary",
DateSourceKind: "official_announcement",
},
{
Prefix: "bytedance-doubao-seed-code",
ReleaseDate: "2024-06-26",
ModelSourceURL: "https://developer.volcengine.com/articles/7383101327527641125",
DateConfidence: "official_primary",
DateSourceKind: "official_announcement",
},
{
Prefix: "bytedance-doubao-pro",
ReleaseDate: "2024-05-15",
DateConfidence: "secondary_authoritative",
DateSourceKind: "secondary_authoritative_report",
},
{
Prefix: "bytedance-doubao-seed-character",
ReleaseDate: "2024-05-15",
DateConfidence: "secondary_authoritative",
DateSourceKind: "secondary_authoritative_report",
},
{
Prefix: "bytedance-glm-4.7",
DateConfidence: "unknown",
DateSourceKind: "catalog_backfill",
},
{
Prefix: "bytedance-deepseek-",
DateConfidence: "unknown",
DateSourceKind: "catalog_backfill",
},
{
Prefix: "bytedance-seedance-1.0-lite",
ReleaseDate: "2025-05-13",
ModelSourceURL: "https://developer.volcengine.com/articles/7504284064976502823",
DateConfidence: "official_primary",
DateSourceKind: "official_announcement",
},
}
func enrichBytedanceModelMetadata(model ModelPricing) ModelPricing {
normalizedID := strings.ToLower(model.ModelID)
for _, metadata := range bytedanceModelMetadataRules {
if strings.HasPrefix(normalizedID, metadata.Prefix) {
if metadata.ReleaseDate != "" {
model.ReleaseDate = metadata.ReleaseDate
}
if metadata.ModelSourceURL != "" {
model.ModelSourceURL = metadata.ModelSourceURL
}
if metadata.DateConfidence != "" {
model.DateConfidence = metadata.DateConfidence
}
if metadata.DateSourceKind != "" {
model.DateSourceKind = metadata.DateSourceKind
}
return model
}
}
if model.ModelSourceURL == "" {
model.ModelSourceURL = model.SourceURL
}
if model.DateConfidence == "" {
model.DateConfidence = "unknown"
}
if model.DateSourceKind == "" {
model.DateSourceKind = "unknown"
}
return model
}
func hasExplicitModelMetadata(model ModelPricing) bool {
return strings.TrimSpace(model.ReleaseDate) != "" ||
firstNonEmpty(model.ModelSourceURL) != "" && model.ModelSourceURL != model.SourceURL ||
strings.TrimSpace(model.DateConfidence) != "" && model.DateConfidence != "unknown" ||
strings.TrimSpace(model.DateSourceKind) != "" && model.DateSourceKind != "unknown"
}
func main() {
dsn := os.Getenv("DATABASE_URL")
if dsn == "" {
dsn = "postgres://long@/llm_intelligence?host=/var/run/postgresql"
}
db, err := sql.Open("postgres", dsn)
if err != nil {
log.Fatal(err)
}
defer db.Close()
// Read raw data
data, err := os.ReadFile("/tmp/bytedance_raw.json")
if err != nil {
log.Fatal("Failed to read raw data:", err)
}
var raw struct {
Bytedance []struct {
Model string `json:"model"`
InputPrice float64 `json:"inputPrice"`
OutputPrice float64 `json:"outputPrice"`
ContextLength int `json:"contextLength"`
Operator string `json:"operator"`
Region string `json:"region"`
Currency string `json:"currency"`
} `json:"bytedance"`
}
if err := json.Unmarshal(data, &raw); err != nil {
log.Fatal("Failed to parse raw data:", err)
}
log.Printf("Importing %d ByteDance models...", len(raw.Bytedance))
batchID := "manual-seed"
for _, b := range raw.Bytedance {
p := enrichBytedanceModelMetadata(ModelPricing{
ModelID: "bytedance-" + b.Model,
ModelName: b.Model,
ProviderName: "ByteDance",
ProviderCountry: "CN",
OperatorName: "ByteDance Volcano",
OperatorType: "official",
Region: "CN",
Currency: "CNY",
InputPrice: b.InputPrice,
OutputPrice: b.OutputPrice,
ContextLength: b.ContextLength,
IsFree: b.InputPrice == 0,
SourceURL: "https://www.volcengine.com/docs/82379/1099320",
Modality: "text",
})
// Find or create provider
var providerID int64
err := db.QueryRow("SELECT id FROM model_provider WHERE name = $1", p.ProviderName).Scan(&providerID)
if err == sql.ErrNoRows {
err = db.QueryRow(
"INSERT INTO model_provider (name, country, website, status) VALUES ($1, $2, $3, 'active') RETURNING id",
p.ProviderName, p.ProviderCountry, "",
).Scan(&providerID)
}
if err != nil {
log.Printf("Provider error: %v", err)
continue
}
// Find or create operator
var operatorID int64
err = db.QueryRow("SELECT id FROM operator WHERE name = $1", p.OperatorName).Scan(&operatorID)
if err == sql.ErrNoRows {
err = db.QueryRow(
"INSERT INTO operator (name, country, status) VALUES ($1, $2, 'active') RETURNING id",
p.OperatorName, p.ProviderCountry,
).Scan(&operatorID)
}
if err != nil {
log.Printf("Operator error: %v", err)
continue
}
// Find or create model
var modelID int64
err = db.QueryRow("SELECT id FROM models WHERE external_id = $1", p.ModelID).Scan(&modelID)
if err == sql.ErrNoRows {
err = db.QueryRow(
`INSERT INTO models (external_id, name, provider_id, modality, context_length, status, source, batch_id, source_url, release_date, date_confidence, date_source_kind)
VALUES ($1, $2, $3, $4, $5, 'active', $6, $7, $8, $9, $10, $11) RETURNING id`,
p.ModelID, p.ModelName, providerID, p.Modality, p.ContextLength, p.OperatorName, batchID, firstNonEmpty(p.ModelSourceURL, p.SourceURL), releaseDateValue(p.ReleaseDate), p.DateConfidence, p.DateSourceKind,
).Scan(&modelID)
}
if err != nil {
log.Printf("Model error for %s: %v", p.ModelID, err)
continue
}
if _, err := db.Exec(
`UPDATE models
SET source_url = CASE
WHEN $4 THEN $2
ELSE COALESCE(NULLIF(source_url, ''), $2)
END,
release_date = CASE
WHEN $4 THEN $3::date
ELSE COALESCE(release_date, $3::date)
END,
date_confidence = CASE
WHEN $4 THEN $5
ELSE COALESCE(NULLIF(date_confidence, ''), $5, 'unknown')
END,
date_source_kind = CASE
WHEN $4 THEN $6
ELSE COALESCE(NULLIF(date_source_kind, ''), $6, 'unknown')
END,
updated_at = CURRENT_TIMESTAMP
WHERE id = $1`,
modelID, firstNonEmpty(p.ModelSourceURL, p.SourceURL), releaseDateValue(p.ReleaseDate), hasExplicitModelMetadata(p), p.DateConfidence, p.DateSourceKind,
); err != nil {
log.Printf("Model metadata update error for %s: %v", p.ModelID, err)
}
// Insert pricing
sourceType := p.OperatorType
freeQuota := ""
freeLimitations := "[]"
rateLimit := "{}"
if p.IsFree {
sourceType = "free_tier"
freeQuota = "Imported free-tier pricing entry"
freeLimitations = `["See source_url for current quota and policy"]`
}
_, err = db.Exec(
`INSERT INTO region_pricing
(model_id, operator_id, region, currency, input_price_per_mtok, output_price_per_mtok, is_free, effective_date, source_url, source_type, free_quota, free_limitations, rate_limit)
VALUES ($1, $2, $3, $4, $5, $6, $7, CURRENT_DATE, $8, $9, $10, $11, $12)
ON CONFLICT (model_id, operator_id, region, currency, effective_date)
DO UPDATE SET input_price_per_mtok = EXCLUDED.input_price_per_mtok,
output_price_per_mtok = EXCLUDED.output_price_per_mtok,
is_free = EXCLUDED.is_free,
source_type = EXCLUDED.source_type,
free_quota = EXCLUDED.free_quota,
free_limitations = EXCLUDED.free_limitations,
rate_limit = EXCLUDED.rate_limit,
updated_at = CURRENT_TIMESTAMP`,
modelID, operatorID, p.Region, p.Currency, p.InputPrice, p.OutputPrice, p.IsFree, p.SourceURL,
sourceType, freeQuota, freeLimitations, rateLimit,
)
if err != nil {
log.Printf("Pricing error for %s: %v", p.ModelID, err)
continue
}
}
log.Printf("Successfully imported %d ByteDance models", len(raw.Bytedance))
}
func firstNonEmpty(values ...string) string {
for _, value := range values {
if value != "" {
return value
}
}
return ""
}