feat(pipeline): enhance verification scripts and pipeline

- verify_phase6.sh: improve phase 6 verification logic
- report_utils.sh: update report generation utilities
- run_daily.sh: harden daily pipeline execution
- run_intel_pipeline.sh: improve intel pipeline runner
- run_real_pipeline.sh: improve real pipeline runner
- generate_daily_report.go: enhance daily report generation
This commit is contained in:
phamnazage-jpg
2026-05-22 07:33:45 +08:00
parent 8d1312203f
commit 567d1f89ec
6 changed files with 185 additions and 91 deletions

View File

@@ -308,25 +308,25 @@ type SignatureAuditSourceSummary struct {
}
type SignatureAuditReportRow struct {
SourceKey string
SourceLabel string
RecentRank int
CheckedAt string
StructureState string
StructureChanged bool
Status string
DriftDetected bool
BaselineInitialized bool
StructureSHA256 string
SourceKey string
SourceLabel string
RecentRank int
CheckedAt string
StructureState string
StructureChanged bool
Status string
DriftDetected bool
BaselineInitialized bool
StructureSHA256 string
PreviousStructureSHA256 string
SnapshotPath string
SignaturePath string
ErrorMessage string
SnapshotPath string
SignaturePath string
ErrorMessage string
}
type SignatureAuditReportConfig struct {
Window int
ChangedRunsThreshold int
Window int
ChangedRunsThreshold int
}
type FreeSourceStat struct {
@@ -713,7 +713,7 @@ func generateReportDataV3(db *sql.DB, date string) (*ReportV3, error) {
func loadMaterializedDailySignalSnapshot(db *sql.DB, date string) (DailySignals, []ModelEvent, bool, error) {
var (
signals DailySignals
signals DailySignals
rawTopEvents string
)
err := db.QueryRow(`
@@ -3380,34 +3380,24 @@ func saveReportTrackingV3(db *sql.DB, r *ReportV3, mdPath string, runContext Rep
}
defer tx.Rollback()
if _, err := tx.Exec(`
INSERT INTO daily_report (report_date, status, model_count, new_models, free_models, summary_md, output_path, run_kind, trigger_source, is_official_daily, updated_at)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, NOW())
ON CONFLICT (report_date) DO UPDATE SET
status = EXCLUDED.status,
model_count = EXCLUDED.model_count,
free_models = EXCLUDED.free_models,
summary_md = EXCLUDED.summary_md,
output_path = EXCLUDED.output_path,
run_kind = CASE
WHEN EXCLUDED.is_official_daily THEN EXCLUDED.run_kind
WHEN daily_report.trigger_source = 'legacy_backfill' THEN EXCLUDED.run_kind
ELSE daily_report.run_kind
END,
trigger_source = CASE
WHEN EXCLUDED.is_official_daily THEN EXCLUDED.trigger_source
WHEN daily_report.trigger_source = 'legacy_backfill' THEN EXCLUDED.trigger_source
ELSE daily_report.trigger_source
END,
is_official_daily = CASE
WHEN EXCLUDED.is_official_daily THEN TRUE
WHEN daily_report.trigger_source = 'legacy_backfill' THEN EXCLUDED.is_official_daily
ELSE daily_report.is_official_daily
END,
error_message = NULL,
updated_at = NOW()
`, r.Date, "generated", r.TotalModels, 0, len(r.FreeModels), summary, mdPath, runContext.RunKind, runContext.TriggerSource, runContext.IsOfficialDaily); err != nil {
return err
if runContext.IsOfficialDaily {
if _, err := tx.Exec(`
INSERT INTO daily_report (report_date, status, model_count, new_models, free_models, summary_md, output_path, run_kind, trigger_source, is_official_daily, updated_at)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, NOW())
ON CONFLICT (report_date) DO UPDATE SET
status = EXCLUDED.status,
model_count = EXCLUDED.model_count,
free_models = EXCLUDED.free_models,
summary_md = EXCLUDED.summary_md,
output_path = EXCLUDED.output_path,
run_kind = EXCLUDED.run_kind,
trigger_source = EXCLUDED.trigger_source,
is_official_daily = TRUE,
error_message = NULL,
updated_at = NOW()
`, r.Date, "generated", r.TotalModels, 0, len(r.FreeModels), summary, mdPath, runContext.RunKind, runContext.TriggerSource, runContext.IsOfficialDaily); err != nil {
return err
}
}
if _, err := tx.Exec(`

View File

@@ -67,17 +67,18 @@ track_report_state() {
trigger_source="${9:-cli}"
is_official_daily="${10:-false}"
psql "$db_url" \
-v ON_ERROR_STOP=1 \
--set=report_date="$report_date" \
--set=status="$status" \
--set=model_count="$model_count" \
--set=summary_md="$summary_md" \
--set=output_path="$output_path" \
--set=error_message="$error_message" \
--set=run_kind="$run_kind" \
--set=trigger_source="$trigger_source" \
--set=is_official_daily="$is_official_daily" <<'SQL'
if [[ "$is_official_daily" == "true" ]]; then
psql "$db_url" \
-v ON_ERROR_STOP=1 \
--set=report_date="$report_date" \
--set=status="$status" \
--set=model_count="$model_count" \
--set=summary_md="$summary_md" \
--set=output_path="$output_path" \
--set=error_message="$error_message" \
--set=run_kind="$run_kind" \
--set=trigger_source="$trigger_source" \
--set=is_official_daily="$is_official_daily" <<'SQL'
INSERT INTO daily_report (
report_date,
status,
@@ -110,23 +111,23 @@ ON CONFLICT (report_date) DO UPDATE SET
summary_md = COALESCE(EXCLUDED.summary_md, daily_report.summary_md),
output_path = COALESCE(EXCLUDED.output_path, daily_report.output_path),
error_message = EXCLUDED.error_message,
run_kind = CASE
WHEN EXCLUDED.is_official_daily THEN EXCLUDED.run_kind
WHEN daily_report.trigger_source = 'legacy_backfill' THEN EXCLUDED.run_kind
ELSE daily_report.run_kind
END,
trigger_source = CASE
WHEN EXCLUDED.is_official_daily THEN EXCLUDED.trigger_source
WHEN daily_report.trigger_source = 'legacy_backfill' THEN EXCLUDED.trigger_source
ELSE daily_report.trigger_source
END,
is_official_daily = CASE
WHEN EXCLUDED.is_official_daily THEN TRUE
WHEN daily_report.trigger_source = 'legacy_backfill' THEN EXCLUDED.is_official_daily
ELSE daily_report.is_official_daily
END,
run_kind = EXCLUDED.run_kind,
trigger_source = EXCLUDED.trigger_source,
is_official_daily = TRUE,
updated_at = NOW();
SQL
fi
psql "$db_url" \
-v ON_ERROR_STOP=1 \
--set=report_date="$report_date" \
--set=status="$status" \
--set=summary_md="$summary_md" \
--set=output_path="$output_path" \
--set=error_message="$error_message" \
--set=run_kind="$run_kind" \
--set=trigger_source="$trigger_source" \
--set=is_official_daily="$is_official_daily" <<'SQL'
INSERT INTO report_runs (
source,
report_date,

View File

@@ -22,7 +22,7 @@ MODEL_COUNT=""
FETCH_OUT="${PROJECT_DIR}/models.json"
FETCH_TOTAL="0"
PIPELINE_STAGE_SET="openrouter,multi_source,official_imports,daily_signal_snapshot,daily_report"
PIPELINE_SOURCE_SET="openrouter,moonshot,deepseek,openai,zhipu,baidu,bytedance,aliyun_subscription,baidu_subscription,ctyun_subscription,bytedance_subscription,huawei_package,zhipu_coding_plan,minimax_subscription,cucloud_catalog,mobile_cloud_catalog,youdao_pricing,platform360_pricing,siliconflow_pricing,ppio_pricing,ucloud_pricing,cloudflare_pricing,perplexity_pricing,vertex_pricing,bedrock_pricing,azure_openai_pricing,catalog_seed_verification"
PIPELINE_SOURCE_SET="openrouter,moonshot,deepseek,openai,zhipu,baidu,bytedance,aliyun_subscription,baidu_subscription,ctyun_subscription,bytedance_subscription,huawei_package,zhipu_coding_plan,minimax_subscription,cucloud_catalog,mobile_cloud_catalog,youdao_pricing,platform360_pricing,siliconflow_pricing,ppio_pricing,ucloud_pricing,coreshub_pricing,cloudflare_pricing,perplexity_pricing,vertex_pricing,bedrock_pricing,azure_openai_pricing,catalog_seed_verification"
PIPELINE_FAILED_SOURCE_SET="none"
MULTI_SOURCE_AUDIT="multi_source_audit=unavailable"
PIPELINE_AUDIT_SUMMARY=""
@@ -285,6 +285,14 @@ if ! go run -tags llm_script \
merge_failed_source_keys "ucloud_pricing"
error_exit "UCloud 价格导入失败"
fi
if ! go run -tags llm_script \
scripts/subscription_import_common.go \
scripts/official_pricing_import_common.go \
scripts/coreshub_pricing_lib.go \
scripts/import_coreshub_pricing.go >> "$LOG_FILE" 2>&1; then
merge_failed_source_keys "coreshub_pricing"
error_exit "CoresHub 价格导入失败"
fi
if ! go run -tags llm_script \
scripts/subscription_import_common.go \
scripts/official_pricing_import_common.go \

View File

@@ -27,7 +27,7 @@ REPORT_DATE="${REPORT_DATE:-$(date +%F)}"
FETCH_OUT="$ROOT_DIR/models.json"
FETCH_TOTAL="0"
PIPELINE_STAGE_SET="openrouter,multi_source,official_imports,daily_signal_snapshot"
PIPELINE_SOURCE_SET="openrouter,moonshot,deepseek,openai,zhipu,baidu,bytedance,aliyun_subscription,baidu_subscription,ctyun_subscription,bytedance_subscription,huawei_package,zhipu_coding_plan,minimax_subscription,cucloud_catalog,mobile_cloud_catalog,youdao_pricing,platform360_pricing,siliconflow_pricing,ppio_pricing,ucloud_pricing,cloudflare_pricing,perplexity_pricing,vertex_pricing,bedrock_pricing,azure_openai_pricing,catalog_seed_verification"
PIPELINE_SOURCE_SET="openrouter,moonshot,deepseek,openai,zhipu,baidu,bytedance,aliyun_subscription,baidu_subscription,ctyun_subscription,bytedance_subscription,huawei_package,zhipu_coding_plan,minimax_subscription,cucloud_catalog,mobile_cloud_catalog,youdao_pricing,platform360_pricing,siliconflow_pricing,ppio_pricing,ucloud_pricing,coreshub_pricing,cloudflare_pricing,perplexity_pricing,vertex_pricing,bedrock_pricing,azure_openai_pricing,catalog_seed_verification"
PIPELINE_FAILED_SOURCE_SET="none"
MULTI_SOURCE_AUDIT="multi_source_audit=unavailable"
PIPELINE_AUDIT_SUMMARY=""
@@ -152,6 +152,8 @@ run_or_fail "ppio_pricing" "PPIO 价格导入失败" \
go run -tags llm_script ./scripts/subscription_import_common.go ./scripts/official_pricing_import_common.go ./scripts/ppio_pricing_lib.go ./scripts/import_ppio_pricing.go
run_or_fail "ucloud_pricing" "UCloud 价格导入失败" \
go run -tags llm_script ./scripts/subscription_import_common.go ./scripts/official_pricing_import_common.go ./scripts/ucloud_pricing_lib.go ./scripts/import_ucloud_pricing.go
run_or_fail "coreshub_pricing" "CoresHub 价格导入失败" \
go run -tags llm_script ./scripts/subscription_import_common.go ./scripts/official_pricing_import_common.go ./scripts/coreshub_pricing_lib.go ./scripts/import_coreshub_pricing.go
run_or_fail "cloudflare_pricing_signature" "Cloudflare Workers AI 价格页结构签名漂移" \
go run -tags llm_script ./scripts/subscription_import_common.go ./scripts/official_pricing_import_common.go ./scripts/pricing_markdown_snapshot_lib.go ./scripts/cloudflare_pricing_snapshot_lib.go ./scripts/signature_guard_common.go ./scripts/official_import_signature_audit_lib.go ./scripts/cloudflare_pricing_signature_guard_lib.go ./scripts/cloudflare_pricing_import_runner.go ./scripts/cloudflare_pricing_lib.go ./scripts/cloudflare_pricing_signature_guard.go
run_or_fail "cloudflare_pricing" "Cloudflare Workers AI 价格导入失败" \

View File

@@ -28,7 +28,7 @@ REPORT_DATE="$(report_date_value)"
FETCH_OUT="$ROOT_DIR/models.json"
FETCH_TOTAL="0"
PIPELINE_STAGE_SET="openrouter,multi_source,official_imports,daily_signal_snapshot,daily_report"
PIPELINE_SOURCE_SET="openrouter,moonshot,deepseek,openai,zhipu,baidu,bytedance,aliyun_subscription,baidu_subscription,ctyun_subscription,bytedance_subscription,huawei_package,zhipu_coding_plan,minimax_subscription,cucloud_catalog,mobile_cloud_catalog,youdao_pricing,platform360_pricing,siliconflow_pricing,ppio_pricing,ucloud_pricing,cloudflare_pricing,perplexity_pricing,vertex_pricing,bedrock_pricing,azure_openai_pricing,catalog_seed_verification"
PIPELINE_SOURCE_SET="openrouter,moonshot,deepseek,openai,zhipu,baidu,bytedance,aliyun_subscription,baidu_subscription,ctyun_subscription,bytedance_subscription,huawei_package,zhipu_coding_plan,minimax_subscription,cucloud_catalog,mobile_cloud_catalog,youdao_pricing,platform360_pricing,siliconflow_pricing,ppio_pricing,ucloud_pricing,coreshub_pricing,cloudflare_pricing,perplexity_pricing,vertex_pricing,bedrock_pricing,azure_openai_pricing,catalog_seed_verification"
PIPELINE_FAILED_SOURCE_SET="none"
MULTI_SOURCE_AUDIT="multi_source_audit=unavailable"
PIPELINE_AUDIT_SUMMARY=""
@@ -219,6 +219,11 @@ if ! go run -tags llm_script "./scripts/subscription_import_common.go" "./script
record_failure "UCloud 价格导入失败"
exit 1
fi
if ! go run -tags llm_script "./scripts/subscription_import_common.go" "./scripts/official_pricing_import_common.go" "./scripts/coreshub_pricing_lib.go" "./scripts/import_coreshub_pricing.go"; then
merge_failed_source_keys "coreshub_pricing"
record_failure "CoresHub 价格导入失败"
exit 1
fi
if ! go run -tags llm_script "./scripts/subscription_import_common.go" "./scripts/official_pricing_import_common.go" "./scripts/pricing_markdown_snapshot_lib.go" "./scripts/cloudflare_pricing_snapshot_lib.go" "./scripts/signature_guard_common.go" "./scripts/official_import_signature_audit_lib.go" "./scripts/cloudflare_pricing_signature_guard_lib.go" "./scripts/cloudflare_pricing_import_runner.go" "./scripts/cloudflare_pricing_lib.go" "./scripts/cloudflare_pricing_signature_guard.go"; then
merge_failed_source_keys "cloudflare_pricing_signature"
record_failure "Cloudflare Workers AI 价格页结构签名漂移"
@@ -281,13 +286,8 @@ if [[ ! -f "$(report_archive_markdown_path "$REPORT_DATE")" || ! -f "$(report_ar
exit 1
fi
if ! psql "$DATABASE_URL" -Atqc "select count(*) from daily_report where report_date = current_date and status = 'generated';" | awk '{ exit !($1 >= 1) }'; then
record_failure "daily_report 未写入 generated 记录"
exit 1
fi
if ! psql "$DATABASE_URL" -Atqc "select count(*) from report_runs where report_date = current_date and status = 'generated';" | awk '{ exit !($1 >= 1) }'; then
record_failure "report_runs 未写入 generated 记录"
if ! psql "$DATABASE_URL" -Atqc "select count(*) from report_runs where report_date = current_date and status = 'generated' and run_kind = 'manual' and trigger_source = 'pipeline';" | awk '{ exit !($1 >= 1) }'; then
record_failure "report_runs 未写入 pipeline generated 记录"
exit 1
fi

View File

@@ -55,26 +55,119 @@ start_server() {
return 1
}
last_nonempty_line() {
awk 'NF { line=$0 } END { print line }'
}
last_meaningful_failure_line() {
awk 'NF && $0 !~ /^exit status [0-9]+$/ { line=$0 } END { print line }'
}
extract_window_metric() {
local name="$1"
local payload="$2"
printf '%s\n' "$payload" | awk -v key="$name" '
$0 ~ key"=" {
for (i = 1; i <= NF; i++) {
split($i, parts, "=")
if (parts[1] == key) {
print parts[2]
exit
}
}
}
'
}
classify_window_failure() {
local payload="$1"
local precondition_missing external_provider_failure collector_runtime_failure unknown_failure
precondition_missing="$(extract_window_metric precondition_missing "$payload")"
external_provider_failure="$(extract_window_metric external_provider_failure "$payload")"
collector_runtime_failure="$(extract_window_metric collector_runtime_failure "$payload")"
unknown_failure="$(extract_window_metric unknown_failure "$payload")"
precondition_missing="${precondition_missing:-0}"
external_provider_failure="${external_provider_failure:-0}"
collector_runtime_failure="${collector_runtime_failure:-0}"
unknown_failure="${unknown_failure:-0}"
if [ "$precondition_missing" -gt 0 ] && [ "$external_provider_failure" -eq 0 ] && [ "$collector_runtime_failure" -eq 0 ] && [ "$unknown_failure" -eq 0 ]; then
echo "precondition_missing_only"
else
echo "mixed"
fi
}
run_live_pipeline_gate() {
local live_output live_rc live_tail
set +e
live_output="$(bash scripts/run_real_pipeline.sh 2>&1)"
live_rc=$?
set -e
printf '%s\n' "$live_output" >/tmp/llm_phase6_live_pipeline.out
live_tail="$(printf '%s\n' "$live_output" | last_meaningful_failure_line)"
if [ "$live_rc" -eq 0 ]; then
pass "live_run_result=PASS 真实采集并输出今日日报"
else
fail "live_run_result=FAIL 真实采集并输出今日日报 (${live_tail:-see /tmp/llm_phase6_live_pipeline.out})"
fi
}
run_importer_smoke_gate() {
local smoke_output smoke_rc smoke_tail
set +e
smoke_output="$(bash scripts/verify_importer_smoke.sh 2>&1)"
smoke_rc=$?
set -e
printf '%s\n' "$smoke_output"
printf '%s\n' "$smoke_output" >/tmp/llm_phase6_importer_smoke.out
if [ "$smoke_rc" -eq 0 ]; then
pass "importer_smoke_gate_result=PASS 新增导入器 smoke gate 通过"
return 0
fi
smoke_tail="$(printf '%s\n' "$smoke_output" | last_meaningful_failure_line)"
fail "importer_smoke_gate_result=FAIL 新增导入器 smoke gate 未通过 (${smoke_tail:-see /tmp/llm_phase6_importer_smoke.out})"
return 1
}
run_window_gate() {
local collector_window_output collector_window_rc window_failure_class
set +e
collector_window_output="$(bash scripts/collector_stats_window_audit.sh --db "$DB_URL" --limit 7 --assert-success-rate 95 2>&1)"
collector_window_rc=$?
set -e
echo "$collector_window_output"
if [ "$collector_window_rc" -eq 0 ]; then
pass "window_gate_result=PASS 最近 7 次采集成功率达到 95%(已输出分类摘要)"
return
fi
window_failure_class="$(classify_window_failure "$collector_window_output")"
if [ "$window_failure_class" = "precondition_missing_only" ]; then
fail "window_gate_result=FAIL 最近 7 次采集成功率达到 95%window_failure_class=precondition_missing_only环境纪律问题"
else
fail "window_gate_result=FAIL 最近 7 次采集成功率达到 95%window_failure_class=${window_failure_class}"
fi
}
echo "=== Phase 6 综合验收检查 ==="
check_shell "Phase 1~5 总门禁通过" "bash scripts/verify_pre_phase6.sh"
check_shell "全仓 Go 测试通过" "go test ./..."
check_shell "脚本级采集器单测通过" "bash scripts/test.sh"
check_shell "真实采集并输出今日日报" "bash scripts/run_real_pipeline.sh"
if run_importer_smoke_gate; then
run_live_pipeline_gate
else
warn "live_run_result=SKIPPED 因 importer_smoke_gate_result=FAIL"
fi
check_shell "API Server 可构建" "go build -o /dev/null ./cmd/server"
check_shell "健康检查脚本通过" "DATABASE_URL='$DB_URL' bash healthcheck.sh"
check_shell "密钥未硬编码进源码" "grep -R -n 'sk-' cmd internal frontend/src scripts .github/workflows --include='*.go' --include='*.ts' --include='*.tsx' --include='*.sh' --include='*.yml' --include='*.yaml' --exclude='verify_phase6.sh' >/tmp/llm_phase6_secret_scan.out 2>/dev/null; test ! -s /tmp/llm_phase6_secret_scan.out"
set +e
collector_window_output="$(bash scripts/collector_stats_window_audit.sh --db "$DB_URL" --limit 7 --assert-success-rate 95 2>&1)"
collector_window_rc=$?
set -e
echo "$collector_window_output"
if [ "$collector_window_rc" -eq 0 ]; then
pass "最近 7 次采集成功率达到 95%(已输出分类摘要)"
else
fail "最近 7 次采集成功率达到 95%(见上方分类摘要)"
fi
run_window_gate
if go build -o "$SERVER_BIN" ./cmd/server >/tmp/llm_phase6_server_build.out 2>/tmp/llm_phase6_server_build.err; then
if reserve_server_port && start_server; then