feat(intraday): monitor DeepSeek official page drift
Some checks failed
CI / go-test (push) Has been cancelled
CI / scripts-regression (push) Has been cancelled
CI / frontend-build (push) Has been cancelled
CI / docker-build (push) Has been cancelled

This commit is contained in:
phamnazage-jpg
2026-05-27 22:01:20 +08:00
parent 475401bcbe
commit 88833fac8b
16 changed files with 1399 additions and 33 deletions

View File

@@ -55,6 +55,6 @@
## 下一步建议
1. `run_intraday_discovery_watch.sh` 补充生产级 provider adapter 和调度说明
2. 给前端查询页增加“最近一次价格追踪时间 / 最近一次 discovery 验证时间”提示
1. 已补充 `run_intraday_discovery_watch.sh` 与 DeepSeek 官方新闻页结构签名 guard可继续扩展到 DeepSeek pricing 页面
2. 给前端查询页增加“最近一次价格追踪时间 / 最近一次 discovery 验证时间 / 最近一次官方页 drift 检查时间”提示
3. 如果日内事件仍不够敏感,再考虑引入独立 `intraday_signal_snapshot` 或候选情报面板

View File

@@ -0,0 +1,51 @@
//go:build llm_script
package main
import (
"flag"
"fmt"
"os"
"time"
)
func main() {
loadSubscriptionImportEnv()
var url string
var fixture string
var snapshotDir string
var baselinePath string
var timeoutSeconds int
var allowBootstrap bool
flag.StringVar(&url, "url", defaultDeepSeekNewsFetchURL, "DeepSeek 官方新闻页")
flag.StringVar(&fixture, "fixture", "", "DeepSeek 新闻页样例文件")
flag.StringVar(&snapshotDir, "snapshot-dir", "", "DeepSeek news snapshot 输出目录")
flag.StringVar(&baselinePath, "baseline-path", "", "DeepSeek news 结构基线签名路径")
flag.IntVar(&timeoutSeconds, "timeout", 20, "请求超时(秒)")
flag.BoolVar(&allowBootstrap, "allow-bootstrap", true, "当 baseline 缺失时自动初始化")
flag.Parse()
now := time.Now()
cfg := deepseekNewsSignatureGuardConfig{
URL: url,
Fixture: fixture,
SnapshotDir: snapshotDir,
BaselinePath: baselinePath,
Timeout: time.Duration(timeoutSeconds) * time.Second,
AllowBootstrap: allowBootstrap,
}
result, err := runDeepSeekNewsSignatureGuard(cfg, now)
if auditErr := persistDeepSeekNewsSignatureAuditIfConfigured(cfg, result, now, err); auditErr != nil {
fmt.Fprintf(os.Stderr, "deepseek_news_signature_guard audit: %v\n", auditErr)
if err == nil {
err = auditErr
}
}
fmt.Println(formatDeepSeekNewsSignatureGuardSummary(result))
if err != nil {
fmt.Fprintf(os.Stderr, "deepseek_news_signature_guard: %v\n", err)
os.Exit(1)
}
}

View File

@@ -0,0 +1,127 @@
//go:build llm_script
package main
import (
"fmt"
"net/http"
"os"
"path/filepath"
"strings"
"time"
)
type deepseekNewsSignatureGuardConfig struct {
URL string
Fixture string
SnapshotDir string
BaselinePath string
Timeout time.Duration
AllowBootstrap bool
}
type deepseekNewsSignatureGuardResult struct {
SnapshotPath string
SignaturePath string
BaselinePath string
DriftDetected bool
BaselineInitialized bool
PreviousBaselineHash string
CurrentSignature deepseekNewsStructureSignature
}
const defaultDeepSeekNewsFetchURL = "https://api-docs.deepseek.com/news/news250120"
func runDeepSeekNewsSignatureGuard(cfg deepseekNewsSignatureGuardConfig, now time.Time) (deepseekNewsSignatureGuardResult, error) {
snapshotDir := cfg.SnapshotDir
if snapshotDir == "" {
snapshotDir = filepath.Join("logs", "deepseek-news-snapshots")
}
if err := os.MkdirAll(snapshotDir, 0o755); err != nil {
return deepseekNewsSignatureGuardResult{}, fmt.Errorf("mkdir snapshot dir: %w", err)
}
snapshotPath, signaturePath := resolveDeepSeekNewsSnapshotPaths("", "", snapshotDir, now)
baselinePath := cfg.BaselinePath
if baselinePath == "" {
baselinePath = filepath.Join(snapshotDir, "baseline.signature.json")
}
client := &http.Client{Timeout: cfg.Timeout}
raw, err := fetchSubscriptionPage(cfg.URL, cfg.Fixture, client)
if err != nil {
return deepseekNewsSignatureGuardResult{}, err
}
current, err := writeDeepSeekNewsSnapshotArtifacts(raw, cfg.URL, snapshotPath, signaturePath, now)
if err != nil {
return deepseekNewsSignatureGuardResult{}, err
}
result := deepseekNewsSignatureGuardResult{
SnapshotPath: snapshotPath,
SignaturePath: signaturePath,
BaselinePath: baselinePath,
CurrentSignature: current,
}
previous, err := readDeepSeekNewsStructureSignature(baselinePath)
if err != nil {
if os.IsNotExist(err) {
if !cfg.AllowBootstrap {
return result, fmt.Errorf("deepseek news baseline missing: %s", baselinePath)
}
if err := copyFileCommon(signaturePath, baselinePath); err != nil {
return result, fmt.Errorf("initialize baseline: %w", err)
}
result.BaselineInitialized = true
return result, nil
}
return result, err
}
result.PreviousBaselineHash = previous.StructureSHA256
if previous.StructureSHA256 != current.StructureSHA256 {
result.DriftDetected = true
return result, fmt.Errorf(
"deepseek news structure drift detected: baseline=%s current=%s baseline_path=%s signature_path=%s snapshot_path=%s",
previous.StructureSHA256, current.StructureSHA256, baselinePath, signaturePath, snapshotPath,
)
}
return result, nil
}
func formatDeepSeekNewsSignatureGuardSummary(result deepseekNewsSignatureGuardResult) string {
return fmt.Sprintf(
"source=deepseek-news-signature-guard drift=%t baseline_initialized=%t structure_sha256=%s previous_baseline_sha256=%s snapshot_out=%s signature_out=%s baseline_path=%s",
result.DriftDetected,
result.BaselineInitialized,
result.CurrentSignature.StructureSHA256,
emptyIfBlank(result.PreviousBaselineHash),
result.SnapshotPath,
result.SignaturePath,
result.BaselinePath,
)
}
func buildDeepSeekNewsSignatureAuditRecord(cfg deepseekNewsSignatureGuardConfig, result deepseekNewsSignatureGuardResult, checkedAt time.Time, runErr error) officialImportSignatureAuditRecord {
record := officialImportSignatureAuditRecord{
SourceKey: "deepseek_news_signature",
CheckedAt: checkedAt,
Status: officialImportSignatureAuditStatus(result.DriftDetected, result.BaselineInitialized, runErr),
DriftDetected: result.DriftDetected,
BaselineInitialized: result.BaselineInitialized,
SourceURL: strings.TrimSpace(cfg.URL),
FixturePath: strings.TrimSpace(cfg.Fixture),
SnapshotPath: strings.TrimSpace(result.SnapshotPath),
SignaturePath: strings.TrimSpace(result.SignaturePath),
BaselinePath: strings.TrimSpace(result.BaselinePath),
StructureSHA256: strings.TrimSpace(result.CurrentSignature.StructureSHA256),
PreviousStructureSHA256: strings.TrimSpace(result.PreviousBaselineHash),
ByteSize: result.CurrentSignature.ByteSize,
ErrorMessage: errorMessageText(runErr),
}
if hasDeepSeekNewsStructureSignature(result.CurrentSignature) {
signatureCopy := result.CurrentSignature
record.SignaturePayload = &signatureCopy
}
return record
}
func persistDeepSeekNewsSignatureAuditIfConfigured(cfg deepseekNewsSignatureGuardConfig, result deepseekNewsSignatureGuardResult, checkedAt time.Time, runErr error) error {
return persistOfficialImportSignatureAuditIfConfigured(buildDeepSeekNewsSignatureAuditRecord(cfg, result, checkedAt, runErr))
}

View File

@@ -0,0 +1,88 @@
//go:build llm_script
package main
import (
"os"
"path/filepath"
"strings"
"testing"
"time"
)
func TestRunDeepSeekNewsSignatureGuardInitializesBaseline(t *testing.T) {
tempDir := t.TempDir()
baselinePath := filepath.Join(tempDir, "baseline.signature.json")
result, err := runDeepSeekNewsSignatureGuard(deepseekNewsSignatureGuardConfig{
URL: defaultDeepSeekNewsFetchURL,
Fixture: filepath.Join("testdata", "intraday_verification_official_release.html"),
SnapshotDir: tempDir,
BaselinePath: baselinePath,
Timeout: time.Second,
AllowBootstrap: true,
}, time.Date(2026, 5, 27, 21, 0, 0, 0, time.FixedZone("CST", 8*3600)))
if err != nil {
t.Fatalf("runDeepSeekNewsSignatureGuard 返回错误: %v", err)
}
if !result.BaselineInitialized {
t.Fatal("期望初始化 baseline")
}
if _, err := os.Stat(baselinePath); err != nil {
t.Fatalf("baseline 未写入: %v", err)
}
}
func TestRunDeepSeekNewsSignatureGuardDetectsDrift(t *testing.T) {
tempDir := t.TempDir()
baselinePath := filepath.Join(tempDir, "baseline.signature.json")
_, err := runDeepSeekNewsSignatureGuard(deepseekNewsSignatureGuardConfig{
URL: defaultDeepSeekNewsFetchURL,
Fixture: filepath.Join("testdata", "intraday_verification_official_release.html"),
SnapshotDir: tempDir,
BaselinePath: baselinePath,
Timeout: time.Second,
AllowBootstrap: true,
}, time.Date(2026, 5, 27, 21, 1, 0, 0, time.FixedZone("CST", 8*3600)))
if err != nil {
t.Fatalf("初始化 baseline 失败: %v", err)
}
driftFixture := filepath.Join(tempDir, "drift.html")
if err := os.WriteFile(driftFixture, []byte("<html><head><title>DeepSeek-V4 Release</title><meta name=\"description\" content=\"DeepSeek V4 pricing release\"></head><body><h1>DeepSeek V4 Release</h1></body></html>"), 0o644); err != nil {
t.Fatalf("写入 drift fixture 失败: %v", err)
}
result, err := runDeepSeekNewsSignatureGuard(deepseekNewsSignatureGuardConfig{
URL: defaultDeepSeekNewsFetchURL,
Fixture: driftFixture,
SnapshotDir: tempDir,
BaselinePath: baselinePath,
Timeout: time.Second,
AllowBootstrap: false,
}, time.Date(2026, 5, 27, 21, 2, 0, 0, time.FixedZone("CST", 8*3600)))
if err == nil {
t.Fatal("期望结构漂移时报错")
}
if !result.DriftDetected {
t.Fatal("期望 driftDetected=true")
}
if !strings.Contains(err.Error(), "deepseek news structure drift detected") {
t.Fatalf("期望返回 drift 错误,实际: %v", err)
}
}
func TestFormatDeepSeekNewsSignatureGuardSummary(t *testing.T) {
result := deepseekNewsSignatureGuardResult{
SnapshotPath: "/tmp/deepseek-news.html",
SignaturePath: "/tmp/deepseek-news.signature.json",
BaselinePath: "/tmp/baseline.signature.json",
BaselineInitialized: true,
CurrentSignature: deepseekNewsStructureSignature{
StructureSHA256: "abc123",
},
}
summary := formatDeepSeekNewsSignatureGuardSummary(result)
for _, want := range []string{"source=deepseek-news-signature-guard", "baseline_initialized=true", "structure_sha256=abc123"} {
if !strings.Contains(summary, want) {
t.Fatalf("summary 缺少 %q实际: %q", want, summary)
}
}
}

View File

@@ -0,0 +1,196 @@
//go:build llm_script
package main
import (
"crypto/sha256"
"encoding/hex"
"encoding/json"
"fmt"
"os"
"path/filepath"
"regexp"
"sort"
"strings"
"time"
)
type deepseekNewsStructureSignature struct {
ByteSize int `json:"byte_size"`
SHA256 string `json:"sha256"`
StructureSHA256 string `json:"structure_sha256"`
Title string `json:"title"`
MetaDescription string `json:"meta_description"`
Headings []string `json:"headings"`
Contains map[string]bool `json:"contains"`
GeneratedAt string `json:"generated_at,omitempty"`
SourceURL string `json:"source_url,omitempty"`
SnapshotPath string `json:"snapshot_path,omitempty"`
}
var deepseekNewsContainsNeedles = map[string]string{
"deepseek": "deepseek",
"release": "release",
"news": "news",
"api_docs": "api docs",
}
var htmlTagRe = regexp.MustCompile(`(?s)<[^>]+>`)
var titleRe = regexp.MustCompile(`(?is)<title[^>]*>(.*?)</title>`)
var metaDescRe = regexp.MustCompile(`(?is)<meta[^>]+name=["']description["'][^>]+content=["']([^"']+)["']`)
var h1Re = regexp.MustCompile(`(?is)<h1[^>]*>(.*?)</h1>`)
func buildDeepSeekNewsStructureSignature(raw string) deepseekNewsStructureSignature {
title := firstHTMLMatch(titleRe, raw)
meta := firstHTMLMatch(metaDescRe, raw)
h1Matches := h1Re.FindAllStringSubmatch(raw, -1)
headings := make([]string, 0, len(h1Matches))
seen := make(map[string]struct{})
for _, match := range h1Matches {
if len(match) < 2 {
continue
}
clean := cleanHTMLText(match[1])
if clean == "" {
continue
}
if _, exists := seen[clean]; exists {
continue
}
seen[clean] = struct{}{}
headings = append(headings, clean)
}
contains := make(map[string]bool, len(deepseekNewsContainsNeedles))
lower := strings.ToLower(raw)
for key, needle := range deepseekNewsContainsNeedles {
contains[key] = strings.Contains(lower, strings.ToLower(needle))
}
signature := deepseekNewsStructureSignature{
ByteSize: len([]byte(raw)),
SHA256: deepseekNewsSHA256Hex(raw),
Title: title,
MetaDescription: meta,
Headings: headings,
Contains: contains,
}
signature.StructureSHA256 = deepseekNewsSHA256Hex(deepseekNewsStructureDigestPayload(signature))
return signature
}
func writeDeepSeekNewsSnapshotArtifacts(raw string, sourceURL string, snapshotPath string, signaturePath string, now time.Time) (deepseekNewsStructureSignature, error) {
if strings.TrimSpace(snapshotPath) == "" {
return deepseekNewsStructureSignature{}, fmt.Errorf("snapshot path is required")
}
if strings.TrimSpace(signaturePath) == "" {
return deepseekNewsStructureSignature{}, fmt.Errorf("signature path is required")
}
if err := os.MkdirAll(filepath.Dir(snapshotPath), 0o755); err != nil {
return deepseekNewsStructureSignature{}, fmt.Errorf("mkdir snapshot dir: %w", err)
}
if err := os.MkdirAll(filepath.Dir(signaturePath), 0o755); err != nil {
return deepseekNewsStructureSignature{}, fmt.Errorf("mkdir signature dir: %w", err)
}
if err := os.WriteFile(snapshotPath, []byte(raw), 0o644); err != nil {
return deepseekNewsStructureSignature{}, fmt.Errorf("write snapshot: %w", err)
}
signature := buildDeepSeekNewsStructureSignature(raw)
signature.GeneratedAt = now.Format(time.RFC3339)
signature.SourceURL = sourceURL
signature.SnapshotPath = snapshotPath
payload, err := json.MarshalIndent(signature, "", " ")
if err != nil {
return deepseekNewsStructureSignature{}, fmt.Errorf("marshal signature: %w", err)
}
if err := os.WriteFile(signaturePath, payload, 0o644); err != nil {
return deepseekNewsStructureSignature{}, fmt.Errorf("write signature: %w", err)
}
return signature, nil
}
func resolveDeepSeekNewsSnapshotPaths(snapshotPath string, signaturePath string, snapshotDir string, now time.Time) (string, string) {
if strings.TrimSpace(snapshotDir) == "" {
snapshotDir = filepath.Join("logs", "deepseek-news-snapshots")
}
if strings.TrimSpace(snapshotPath) == "" {
base := filepath.Join(snapshotDir, fmt.Sprintf("deepseek-news-%s", now.Format("20060102-150405")))
snapshotPath = base + ".html"
if strings.TrimSpace(signaturePath) == "" {
signaturePath = base + ".signature.json"
}
}
if strings.TrimSpace(signaturePath) == "" {
signaturePath = strings.TrimSuffix(snapshotPath, filepath.Ext(snapshotPath)) + ".signature.json"
}
return snapshotPath, signaturePath
}
func readDeepSeekNewsStructureSignature(path string) (deepseekNewsStructureSignature, error) {
data, err := os.ReadFile(path)
if err != nil {
return deepseekNewsStructureSignature{}, err
}
var signature deepseekNewsStructureSignature
if err := json.Unmarshal(data, &signature); err != nil {
return deepseekNewsStructureSignature{}, fmt.Errorf("unmarshal signature %s: %w", path, err)
}
return signature, nil
}
func hasDeepSeekNewsStructureSignature(signature deepseekNewsStructureSignature) bool {
return signature.ByteSize > 0 ||
strings.TrimSpace(signature.StructureSHA256) != "" ||
strings.TrimSpace(signature.SHA256) != "" ||
strings.TrimSpace(signature.Title) != "" ||
len(signature.Headings) > 0 ||
len(signature.Contains) > 0
}
func deepseekNewsStructureDigestPayload(signature deepseekNewsStructureSignature) string {
type containsEntry struct {
Name string `json:"name"`
Value bool `json:"value"`
}
keys := make([]string, 0, len(signature.Contains))
for key := range signature.Contains {
keys = append(keys, key)
}
sort.Strings(keys)
entries := make([]containsEntry, 0, len(keys))
for _, key := range keys {
entries = append(entries, containsEntry{Name: key, Value: signature.Contains[key]})
}
payload := struct {
Title string `json:"title"`
MetaDescription string `json:"meta_description"`
Headings []string `json:"headings"`
Contains []containsEntry `json:"contains"`
}{
Title: signature.Title,
MetaDescription: signature.MetaDescription,
Headings: signature.Headings,
Contains: entries,
}
bytes, _ := json.Marshal(payload)
return string(bytes)
}
func deepseekNewsSHA256Hex(raw string) string {
sum := sha256.Sum256([]byte(raw))
return hex.EncodeToString(sum[:])
}
func firstHTMLMatch(re *regexp.Regexp, raw string) string {
match := re.FindStringSubmatch(raw)
if len(match) < 2 {
return ""
}
return cleanHTMLText(match[1])
}
func cleanHTMLText(raw string) string {
text := htmlTagRe.ReplaceAllString(raw, " ")
text = strings.ReplaceAll(text, "&amp;", "&")
text = strings.ReplaceAll(text, "&nbsp;", " ")
text = strings.Join(strings.Fields(text), " ")
return strings.TrimSpace(text)
}

View File

@@ -0,0 +1,57 @@
//go:build llm_script
package main
import (
"flag"
"fmt"
"os"
"time"
)
func main() {
loadSubscriptionImportEnv()
var url string
var fixture string
var snapshotDir string
var baselinePath string
var timeoutSeconds int
var allowBootstrap bool
var sourceKey string
var snapshotBase string
flag.StringVar(&sourceKey, "source-key", "deepseek_pricing_signature", "审计 source_key")
flag.StringVar(&snapshotBase, "snapshot-base", "deepseek-pricing", "snapshot 文件名前缀")
flag.StringVar(&url, "url", defaultDeepSeekPricingFetchURL, "DeepSeek 官方价格页")
flag.StringVar(&fixture, "fixture", "", "DeepSeek 价格页样例文件")
flag.StringVar(&snapshotDir, "snapshot-dir", "", "DeepSeek pricing snapshot 输出目录")
flag.StringVar(&baselinePath, "baseline-path", "", "DeepSeek pricing 结构基线签名路径")
flag.IntVar(&timeoutSeconds, "timeout", 20, "请求超时(秒)")
flag.BoolVar(&allowBootstrap, "allow-bootstrap", true, "当 baseline 缺失时自动初始化")
flag.Parse()
now := time.Now()
cfg := deepseekPricingSignatureGuardConfig{
SourceKey: sourceKey,
URL: url,
Fixture: fixture,
SnapshotDir: snapshotDir,
BaselinePath: baselinePath,
Timeout: time.Duration(timeoutSeconds) * time.Second,
AllowBootstrap: allowBootstrap,
SnapshotBase: snapshotBase,
}
result, err := runDeepSeekPricingSignatureGuard(cfg, now)
if auditErr := persistDeepSeekPricingSignatureAuditIfConfigured(cfg, result, now, err); auditErr != nil {
fmt.Fprintf(os.Stderr, "deepseek_pricing_signature_guard audit: %v\n", auditErr)
if err == nil {
err = auditErr
}
}
fmt.Println(formatDeepSeekPricingSignatureGuardSummary(sourceKey, result))
if err != nil {
fmt.Fprintf(os.Stderr, "deepseek_pricing_signature_guard: %v\n", err)
os.Exit(1)
}
}

View File

@@ -0,0 +1,132 @@
//go:build llm_script
package main
import (
"fmt"
"net/http"
"os"
"path/filepath"
"strings"
"time"
)
type deepseekPricingSignatureGuardConfig struct {
SourceKey string
URL string
Fixture string
SnapshotDir string
BaselinePath string
Timeout time.Duration
AllowBootstrap bool
SnapshotBase string
SourceKindLabel string
}
type deepseekPricingSignatureGuardResult struct {
SnapshotPath string
SignaturePath string
BaselinePath string
DriftDetected bool
BaselineInitialized bool
PreviousBaselineHash string
CurrentSignature deepseekPricingStructureSignature
}
const defaultDeepSeekPricingFetchURL = "https://platform.deepseek.com/pricing"
const defaultDeepSeekAPIPricingFetchURL = "https://platform.deepseek.com/docs/api-pricing"
func runDeepSeekPricingSignatureGuard(cfg deepseekPricingSignatureGuardConfig, now time.Time) (deepseekPricingSignatureGuardResult, error) {
snapshotDir := cfg.SnapshotDir
if snapshotDir == "" {
snapshotDir = filepath.Join("logs", cfg.SnapshotBase+"-snapshots")
}
if err := os.MkdirAll(snapshotDir, 0o755); err != nil {
return deepseekPricingSignatureGuardResult{}, fmt.Errorf("mkdir snapshot dir: %w", err)
}
snapshotPath, signaturePath := resolveDeepSeekPricingSnapshotPaths("", "", snapshotDir, cfg.SnapshotBase, now)
baselinePath := cfg.BaselinePath
if baselinePath == "" {
baselinePath = filepath.Join(snapshotDir, "baseline.signature.json")
}
client := &http.Client{Timeout: cfg.Timeout}
raw, err := fetchSubscriptionPage(cfg.URL, cfg.Fixture, client)
if err != nil {
return deepseekPricingSignatureGuardResult{}, err
}
current, err := writeDeepSeekPricingSnapshotArtifacts(raw, cfg.URL, snapshotPath, signaturePath, now)
if err != nil {
return deepseekPricingSignatureGuardResult{}, err
}
result := deepseekPricingSignatureGuardResult{
SnapshotPath: snapshotPath,
SignaturePath: signaturePath,
BaselinePath: baselinePath,
CurrentSignature: current,
}
previous, err := readDeepSeekPricingStructureSignature(baselinePath)
if err != nil {
if os.IsNotExist(err) {
if !cfg.AllowBootstrap {
return result, fmt.Errorf("%s baseline missing: %s", cfg.SourceKey, baselinePath)
}
if err := copyFileCommon(signaturePath, baselinePath); err != nil {
return result, fmt.Errorf("initialize baseline: %w", err)
}
result.BaselineInitialized = true
return result, nil
}
return result, err
}
result.PreviousBaselineHash = previous.StructureSHA256
if previous.StructureSHA256 != current.StructureSHA256 {
result.DriftDetected = true
return result, fmt.Errorf(
"%s structure drift detected: baseline=%s current=%s baseline_path=%s signature_path=%s snapshot_path=%s",
cfg.SourceKey, previous.StructureSHA256, current.StructureSHA256, baselinePath, signaturePath, snapshotPath,
)
}
return result, nil
}
func formatDeepSeekPricingSignatureGuardSummary(sourceKey string, result deepseekPricingSignatureGuardResult) string {
return fmt.Sprintf(
"source=%s drift=%t baseline_initialized=%t structure_sha256=%s previous_baseline_sha256=%s snapshot_out=%s signature_out=%s baseline_path=%s",
sourceKey,
result.DriftDetected,
result.BaselineInitialized,
result.CurrentSignature.StructureSHA256,
emptyIfBlank(result.PreviousBaselineHash),
result.SnapshotPath,
result.SignaturePath,
result.BaselinePath,
)
}
func buildDeepSeekPricingSignatureAuditRecord(cfg deepseekPricingSignatureGuardConfig, result deepseekPricingSignatureGuardResult, checkedAt time.Time, runErr error) officialImportSignatureAuditRecord {
record := officialImportSignatureAuditRecord{
SourceKey: cfg.SourceKey,
CheckedAt: checkedAt,
Status: officialImportSignatureAuditStatus(result.DriftDetected, result.BaselineInitialized, runErr),
DriftDetected: result.DriftDetected,
BaselineInitialized: result.BaselineInitialized,
SourceURL: strings.TrimSpace(cfg.URL),
FixturePath: strings.TrimSpace(cfg.Fixture),
SnapshotPath: strings.TrimSpace(result.SnapshotPath),
SignaturePath: strings.TrimSpace(result.SignaturePath),
BaselinePath: strings.TrimSpace(result.BaselinePath),
StructureSHA256: strings.TrimSpace(result.CurrentSignature.StructureSHA256),
PreviousStructureSHA256: strings.TrimSpace(result.PreviousBaselineHash),
ByteSize: result.CurrentSignature.ByteSize,
ErrorMessage: errorMessageText(runErr),
}
if hasDeepSeekPricingStructureSignature(result.CurrentSignature) {
signatureCopy := result.CurrentSignature
record.SignaturePayload = &signatureCopy
}
return record
}
func persistDeepSeekPricingSignatureAuditIfConfigured(cfg deepseekPricingSignatureGuardConfig, result deepseekPricingSignatureGuardResult, checkedAt time.Time, runErr error) error {
return persistOfficialImportSignatureAuditIfConfigured(buildDeepSeekPricingSignatureAuditRecord(cfg, result, checkedAt, runErr))
}

View File

@@ -0,0 +1,96 @@
//go:build llm_script
package main
import (
"os"
"path/filepath"
"strings"
"testing"
"time"
)
func TestRunDeepSeekPricingSignatureGuardInitializesBaseline(t *testing.T) {
tempDir := t.TempDir()
baselinePath := filepath.Join(tempDir, "baseline.signature.json")
fixture := filepath.Join(tempDir, "pricing.html")
if err := os.WriteFile(fixture, []byte(`<html><head><title>DeepSeek</title><meta name="description" content="Join DeepSeek API platform"><meta name="commit-id" content="abc123"><meta property="og:url" content="https://platform.deepseek.com/pricing"></head><body>pricing</body></html>`), 0o644); err != nil {
t.Fatalf("写入 fixture 失败: %v", err)
}
result, err := runDeepSeekPricingSignatureGuard(deepseekPricingSignatureGuardConfig{
SourceKey: "deepseek_pricing_signature",
URL: defaultDeepSeekPricingFetchURL,
Fixture: fixture,
SnapshotDir: tempDir,
BaselinePath: baselinePath,
Timeout: time.Second,
AllowBootstrap: true,
SnapshotBase: "deepseek-pricing",
}, time.Date(2026, 5, 27, 22, 0, 0, 0, time.FixedZone("CST", 8*3600)))
if err != nil {
t.Fatalf("runDeepSeekPricingSignatureGuard 返回错误: %v", err)
}
if !result.BaselineInitialized {
t.Fatal("期望初始化 baseline")
}
}
func TestRunDeepSeekPricingSignatureGuardDetectsDrift(t *testing.T) {
tempDir := t.TempDir()
baselinePath := filepath.Join(tempDir, "baseline.signature.json")
fixture := filepath.Join(tempDir, "pricing.html")
if err := os.WriteFile(fixture, []byte(`<html><head><title>DeepSeek</title><meta name="description" content="Join DeepSeek API platform"><meta name="commit-id" content="abc123"><meta property="og:url" content="https://platform.deepseek.com/pricing"></head><body>pricing</body></html>`), 0o644); err != nil {
t.Fatalf("写入 fixture 失败: %v", err)
}
_, err := runDeepSeekPricingSignatureGuard(deepseekPricingSignatureGuardConfig{
SourceKey: "deepseek_pricing_signature",
URL: defaultDeepSeekPricingFetchURL,
Fixture: fixture,
SnapshotDir: tempDir,
BaselinePath: baselinePath,
Timeout: time.Second,
AllowBootstrap: true,
SnapshotBase: "deepseek-pricing",
}, time.Date(2026, 5, 27, 22, 1, 0, 0, time.FixedZone("CST", 8*3600)))
if err != nil {
t.Fatalf("初始化 baseline 失败: %v", err)
}
driftFixture := filepath.Join(tempDir, "pricing-drift.html")
if err := os.WriteFile(driftFixture, []byte(`<html><head><title>DeepSeek Pricing</title><meta name="description" content="Updated DeepSeek pricing"><meta name="commit-id" content="def456"><meta property="og:url" content="https://platform.deepseek.com/pricing"></head><body>pricing update</body></html>`), 0o644); err != nil {
t.Fatalf("写入 drift fixture 失败: %v", err)
}
result, err := runDeepSeekPricingSignatureGuard(deepseekPricingSignatureGuardConfig{
SourceKey: "deepseek_pricing_signature",
URL: defaultDeepSeekPricingFetchURL,
Fixture: driftFixture,
SnapshotDir: tempDir,
BaselinePath: baselinePath,
Timeout: time.Second,
AllowBootstrap: false,
SnapshotBase: "deepseek-pricing",
}, time.Date(2026, 5, 27, 22, 2, 0, 0, time.FixedZone("CST", 8*3600)))
if err == nil {
t.Fatal("期望结构漂移时报错")
}
if !result.DriftDetected {
t.Fatal("期望 driftDetected=true")
}
}
func TestFormatDeepSeekPricingSignatureGuardSummary(t *testing.T) {
result := deepseekPricingSignatureGuardResult{
SnapshotPath: "/tmp/deepseek-pricing.html",
SignaturePath: "/tmp/deepseek-pricing.signature.json",
BaselinePath: "/tmp/baseline.signature.json",
BaselineInitialized: true,
CurrentSignature: deepseekPricingStructureSignature{
StructureSHA256: "abc123",
},
}
summary := formatDeepSeekPricingSignatureGuardSummary("deepseek_pricing_signature", result)
for _, want := range []string{"source=deepseek_pricing_signature", "baseline_initialized=true", "structure_sha256=abc123"} {
if !strings.Contains(summary, want) {
t.Fatalf("summary 缺少 %q实际: %q", want, summary)
}
}
}

View File

@@ -0,0 +1,183 @@
//go:build llm_script
package main
import (
"crypto/sha256"
"encoding/hex"
"encoding/json"
"fmt"
"os"
"path/filepath"
"regexp"
"sort"
"strings"
"time"
)
type deepseekPricingStructureSignature struct {
ByteSize int `json:"byte_size"`
SHA256 string `json:"sha256"`
StructureSHA256 string `json:"structure_sha256"`
Title string `json:"title"`
MetaDescription string `json:"meta_description"`
CommitID string `json:"commit_id"`
CanonicalURL string `json:"canonical_url"`
Contains map[string]bool `json:"contains"`
GeneratedAt string `json:"generated_at,omitempty"`
SourceURL string `json:"source_url,omitempty"`
SnapshotPath string `json:"snapshot_path,omitempty"`
}
var deepseekPricingContainsNeedles = map[string]string{
"deepseek": "deepseek",
"platform": "platform",
"pricing": "pricing",
"api_docs": "api",
"developer": "developer resources",
}
var deepseekPricingTitleRe = regexp.MustCompile(`(?is)<title[^>]*>(.*?)</title>`)
var deepseekPricingMetaDescRe = regexp.MustCompile(`(?is)<meta[^>]+name=["']description["'][^>]+content=["']([^"']+)["']`)
var deepseekPricingCommitRe = regexp.MustCompile(`(?is)<meta[^>]+name=["']commit-id["'][^>]+content=["']([^"']+)["']`)
var deepseekPricingCanonicalRe = regexp.MustCompile(`(?is)<meta[^>]+property=["']og:url["'][^>]+content=["']([^"']+)["']`)
var deepseekPricingHTMLTagRe = regexp.MustCompile(`(?s)<[^>]+>`)
func buildDeepSeekPricingStructureSignature(raw string) deepseekPricingStructureSignature {
title := firstDeepSeekPricingHTMLMatch(deepseekPricingTitleRe, raw)
meta := firstDeepSeekPricingHTMLMatch(deepseekPricingMetaDescRe, raw)
commitID := firstDeepSeekPricingHTMLMatch(deepseekPricingCommitRe, raw)
canonicalURL := firstDeepSeekPricingHTMLMatch(deepseekPricingCanonicalRe, raw)
contains := make(map[string]bool, len(deepseekPricingContainsNeedles))
lower := strings.ToLower(raw)
for key, needle := range deepseekPricingContainsNeedles {
contains[key] = strings.Contains(lower, strings.ToLower(needle))
}
signature := deepseekPricingStructureSignature{
ByteSize: len([]byte(raw)),
SHA256: deepseekPricingSHA256Hex(raw),
Title: title,
MetaDescription: meta,
CommitID: commitID,
CanonicalURL: canonicalURL,
Contains: contains,
}
signature.StructureSHA256 = deepseekPricingSHA256Hex(deepseekPricingStructureDigestPayload(signature))
return signature
}
func writeDeepSeekPricingSnapshotArtifacts(raw string, sourceURL string, snapshotPath string, signaturePath string, now time.Time) (deepseekPricingStructureSignature, error) {
if strings.TrimSpace(snapshotPath) == "" {
return deepseekPricingStructureSignature{}, fmt.Errorf("snapshot path is required")
}
if strings.TrimSpace(signaturePath) == "" {
return deepseekPricingStructureSignature{}, fmt.Errorf("signature path is required")
}
if err := os.MkdirAll(filepath.Dir(snapshotPath), 0o755); err != nil {
return deepseekPricingStructureSignature{}, fmt.Errorf("mkdir snapshot dir: %w", err)
}
if err := os.MkdirAll(filepath.Dir(signaturePath), 0o755); err != nil {
return deepseekPricingStructureSignature{}, fmt.Errorf("mkdir signature dir: %w", err)
}
if err := os.WriteFile(snapshotPath, []byte(raw), 0o644); err != nil {
return deepseekPricingStructureSignature{}, fmt.Errorf("write snapshot: %w", err)
}
signature := buildDeepSeekPricingStructureSignature(raw)
signature.GeneratedAt = now.Format(time.RFC3339)
signature.SourceURL = sourceURL
signature.SnapshotPath = snapshotPath
payload, err := json.MarshalIndent(signature, "", " ")
if err != nil {
return deepseekPricingStructureSignature{}, fmt.Errorf("marshal signature: %w", err)
}
if err := os.WriteFile(signaturePath, payload, 0o644); err != nil {
return deepseekPricingStructureSignature{}, fmt.Errorf("write signature: %w", err)
}
return signature, nil
}
func resolveDeepSeekPricingSnapshotPaths(snapshotPath string, signaturePath string, snapshotDir string, baseName string, now time.Time) (string, string) {
if strings.TrimSpace(snapshotDir) == "" {
snapshotDir = filepath.Join("logs", baseName+"-snapshots")
}
if strings.TrimSpace(snapshotPath) == "" {
base := filepath.Join(snapshotDir, fmt.Sprintf("%s-%s", baseName, now.Format("20060102-150405")))
snapshotPath = base + ".html"
if strings.TrimSpace(signaturePath) == "" {
signaturePath = base + ".signature.json"
}
}
if strings.TrimSpace(signaturePath) == "" {
signaturePath = strings.TrimSuffix(snapshotPath, filepath.Ext(snapshotPath)) + ".signature.json"
}
return snapshotPath, signaturePath
}
func readDeepSeekPricingStructureSignature(path string) (deepseekPricingStructureSignature, error) {
data, err := os.ReadFile(path)
if err != nil {
return deepseekPricingStructureSignature{}, err
}
var signature deepseekPricingStructureSignature
if err := json.Unmarshal(data, &signature); err != nil {
return deepseekPricingStructureSignature{}, fmt.Errorf("unmarshal signature %s: %w", path, err)
}
return signature, nil
}
func hasDeepSeekPricingStructureSignature(signature deepseekPricingStructureSignature) bool {
return signature.ByteSize > 0 ||
strings.TrimSpace(signature.StructureSHA256) != "" ||
strings.TrimSpace(signature.SHA256) != "" ||
strings.TrimSpace(signature.Title) != "" ||
strings.TrimSpace(signature.CommitID) != "" ||
len(signature.Contains) > 0
}
func deepseekPricingStructureDigestPayload(signature deepseekPricingStructureSignature) string {
type containsEntry struct {
Name string `json:"name"`
Value bool `json:"value"`
}
keys := make([]string, 0, len(signature.Contains))
for key := range signature.Contains {
keys = append(keys, key)
}
sort.Strings(keys)
entries := make([]containsEntry, 0, len(keys))
for _, key := range keys {
entries = append(entries, containsEntry{Name: key, Value: signature.Contains[key]})
}
payload := struct {
Title string `json:"title"`
MetaDescription string `json:"meta_description"`
CommitID string `json:"commit_id"`
CanonicalURL string `json:"canonical_url"`
Contains []containsEntry `json:"contains"`
}{
Title: signature.Title,
MetaDescription: signature.MetaDescription,
CommitID: signature.CommitID,
CanonicalURL: signature.CanonicalURL,
Contains: entries,
}
bytes, _ := json.Marshal(payload)
return string(bytes)
}
func deepseekPricingSHA256Hex(raw string) string {
sum := sha256.Sum256([]byte(raw))
return hex.EncodeToString(sum[:])
}
func firstDeepSeekPricingHTMLMatch(re *regexp.Regexp, raw string) string {
match := re.FindStringSubmatch(raw)
if len(match) < 2 {
return ""
}
text := deepseekPricingHTMLTagRe.ReplaceAllString(match[1], " ")
text = strings.ReplaceAll(text, "&amp;", "&")
text = strings.ReplaceAll(text, "&nbsp;", " ")
text = strings.Join(strings.Fields(text), " ")
return strings.TrimSpace(text)
}

View File

@@ -154,19 +154,14 @@ func validateIntradayProviderConfig(name string, cfg intradayProviderConfig) err
}
func buildIntradayQueries(date string, providerLimit int) []string {
providers := []string{
"OpenAI", "Anthropic", "Google Gemini", "xAI", "DeepSeek",
"DashScope", "Qwen", "智谱", "百度文心", "腾讯混元", "火山方舟", "MiniMax",
}
keywords := []string{"pricing release announcement", "模型 降价 发布 活动"}
if providerLimit > 0 && providerLimit < len(providers) {
providers = providers[:providerLimit]
}
queries := make([]string, 0, len(providers)*len(keywords))
for _, provider := range providers {
for _, keyword := range keywords {
queries = append(queries, strings.TrimSpace(date+" "+provider+" "+keyword))
queries := []string{
"site:platform.deepseek.com DeepSeek pricing",
"site:api-docs.deepseek.com DeepSeek release news",
"site:docs.anthropic.com Claude Sonnet 4 announcement",
"site:openrouter.ai OpenRouter models",
}
if providerLimit > 0 && providerLimit < len(queries) {
return queries[:providerLimit]
}
return queries
}
@@ -217,8 +212,18 @@ func candidateFromLLMRecord(date string, record intradayLLMRecord, searchIndex m
Status: "candidate",
VerificationConfidence: "candidate",
}
matchedSearch := false
filteredURLs := make([]string, 0, len(candidate.CandidateURLs))
for _, url := range candidate.CandidateURLs {
if searchRecord, ok := searchIndex[url]; ok {
searchRecord, ok := searchIndex[url]
if !ok {
continue
}
if !searchRecordMatchesDate(searchRecord, date) {
continue
}
matchedSearch = true
filteredURLs = append(filteredURLs, url)
candidate.DiscoverySource = "web_search+llm"
candidate.DiscoveryQuery = searchRecord.Title
candidate.DiscoveryEvidence["search_record"] = searchRecord
@@ -232,7 +237,11 @@ func candidateFromLLMRecord(date string, record intradayLLMRecord, searchIndex m
candidate.Summary = strings.TrimSpace(searchRecord.Summary)
}
}
if !matchedSearch {
candidate.CandidateURLs = nil
return candidate
}
candidate.CandidateURLs = dedupeStrings(filteredURLs)
return candidate
}
@@ -294,6 +303,36 @@ func buildIntradayNormalizedKey(candidate intradayNewsCandidate) string {
}, "|")
}
func searchRecordMatchesDate(record intradaySearchRecord, date string) bool {
published := strings.TrimSpace(record.PublishedAt)
if published == "" {
return false
}
if ts, ok := parseSearchPublishedAt(published); ok {
return ts == date
}
return strings.Contains(published, date)
}
func parseSearchPublishedAt(value string) (string, bool) {
for _, layout := range []string{time.RFC3339, "2006-01-02", "Mon, 02 Jan 2006 15:04:05 MST", "Mon, 2 Jan 2006 15:04:05 MST"} {
if ts, err := time.Parse(layout, value); err == nil {
return ts.Format("2006-01-02"), true
}
}
localized := strings.NewReplacer(
"周一", "Mon", "周二", "Tue", "周三", "Wed", "周四", "Thu", "周五", "Fri", "周六", "Sat", "周日", "Sun",
"1月", "Jan", "2月", "Feb", "3月", "Mar", "4月", "Apr", "5月", "May", "6月", "Jun",
"7月", "Jul", "8月", "Aug", "9月", "Sep", "10月", "Oct", "11月", "Nov", "12月", "Dec",
).Replace(value)
for _, layout := range []string{"Mon, 2 Jan 2006 15:04:05 MST", "Mon, 02 Jan 2006 15:04:05 MST"} {
if ts, err := time.Parse(layout, localized); err == nil {
return ts.Format("2006-01-02"), true
}
}
return "", false
}
func summarizeIntradayCandidates(candidates []intradayNewsCandidate, dryRun bool) intradayDiscoverySummary {
eventTypeCounts := make(map[string]int)

View File

@@ -50,6 +50,7 @@ func TestNormalizeIntradayCandidatesDedupesEquivalentEvents(t *testing.T) {
Summary: "Search summary",
URL: "https://openai.example.com/news/gpt-5-6-pricing",
Provider: "OpenAI",
PublishedAt: "2026-05-25",
}}
llmRecords := []intradayLLMRecord{
{
@@ -80,6 +81,29 @@ func TestNormalizeIntradayCandidatesDedupesEquivalentEvents(t *testing.T) {
}
}
func TestNormalizeIntradayCandidatesDropsOutdatedSearchMatches(t *testing.T) {
searchRecords := []intradaySearchRecord{{
Title: "Old DeepSeek pricing article",
Summary: "Yesterday record",
URL: "https://deepseek.example.com/pricing",
Provider: "DeepSeek",
PublishedAt: "2026-05-24",
}}
llmRecords := []intradayLLMRecord{{
EventType: "price_cut",
ProviderName: "DeepSeek",
ModelName: "DeepSeek-V4-Flash",
ProviderCountry: "CN",
Title: "DeepSeek V4 Flash price cut",
Summary: "Should be dropped because search evidence is stale",
CandidateURLs: []string{"https://deepseek.example.com/pricing"},
}}
candidates := normalizeIntradayCandidates("2026-05-25", searchRecords, llmRecords)
if len(candidates) != 0 {
t.Fatalf("旧闻搜索结果不应进入候选池, got=%d", len(candidates))
}
}
func TestNormalizeIntradayCandidatesDropsURLlessRecords(t *testing.T) {
llmRecords := []intradayLLMRecord{{
EventType: "promo_campaign",
@@ -94,6 +118,13 @@ func TestNormalizeIntradayCandidatesDropsURLlessRecords(t *testing.T) {
}
}
func TestSearchRecordMatchesLocalizedBingDate(t *testing.T) {
record := intradaySearchRecord{PublishedAt: "周一, 25 5月 2026 14:08:00 GMT"}
if !searchRecordMatchesDate(record, "2026-05-25") {
t.Fatal("应识别本地化 Bing pubDate 为当天")
}
}
func TestValidateIntradayProviderConfigRequiresCommandOrURLOrFixture(t *testing.T) {
if err := validateIntradayProviderConfig("search", intradayProviderConfig{Mode: "command_json"}); err == nil {
t.Fatal("缺少 command 时应报错")

View File

@@ -95,6 +95,10 @@ type materializeDailySignalsConfig struct {
var signalLogger *slog.Logger
const signalUSDToCNY = 7.25
const defaultDeepSeekNewsSignalURL = "https://api-docs.deepseek.com/news/news250120"
const defaultDeepSeekPricingSignalURL = "https://platform.deepseek.com/pricing"
const defaultDeepSeekAPIPricingSignalURL = "https://platform.deepseek.com/docs/api-pricing"
func init() {
signalLogger = slog.New(slog.NewJSONHandler(os.Stderr, &slog.HandlerOptions{Level: slog.LevelInfo}))
@@ -373,6 +377,11 @@ func loadSignalModelEvents(db *sql.DB, date string) ([]signalModelEvent, error)
return nil, err
}
events = mergeVerifiedDiscoveryEvents(events, discoveryEvents)
deepseekDriftEvents, err := loadDeepSeekNewsDriftSignalEvents(db)
if err != nil {
return nil, err
}
events = mergeVerifiedDiscoveryEvents(events, deepseekDriftEvents)
sort.Slice(events, func(i, j int) bool {
if events[i].Priority != events[j].Priority {
@@ -954,6 +963,110 @@ func firstString(values []string) string {
return ""
}
func loadDeepSeekNewsDriftSignalEvents(db *sql.DB) ([]signalModelEvent, error) {
return loadDeepSeekSignatureSignalEvents(db, []deepseekSignatureEventConfig{
{
SourceKey: "deepseek_news_signature",
ModelName: "DeepSeek 官方新闻页",
SourceKindLabel: "官方新闻页结构变化",
PrimaryURL: defaultDeepSeekNewsSignalURL,
Audience: "适合需要尽快复查 DeepSeek 路线图与默认选型的团队",
EvidenceTemplate: "DeepSeek 官方新闻页结构签名发生变化sha=%s previous=%s",
Baseline: "官方新闻页结构漂移",
Summary: "DeepSeek 官方新闻页结构发生变化,需优先确认是否出现新发布或路线图更新。",
Priority: 117,
},
{
SourceKey: "deepseek_pricing_signature",
ModelName: "DeepSeek 官方价格页",
SourceKindLabel: "官方价格页结构变化",
PrimaryURL: defaultDeepSeekPricingSignalURL,
Audience: "适合需要尽快复查 DeepSeek 价格策略与默认成本模型的团队",
EvidenceTemplate: "DeepSeek 官方价格页结构签名发生变化sha=%s previous=%s",
Baseline: "官方价格页结构漂移",
Summary: "DeepSeek 官方价格页结构发生变化,需优先确认是否出现价格策略更新。",
Priority: 116,
},
{
SourceKey: "deepseek_api_pricing_signature",
ModelName: "DeepSeek API 定价页",
SourceKindLabel: "官方 API 定价页结构变化",
PrimaryURL: defaultDeepSeekAPIPricingSignalURL,
Audience: "适合需要尽快复查 DeepSeek API 定价与预算预期的团队",
EvidenceTemplate: "DeepSeek API 定价页结构签名发生变化sha=%s previous=%s",
Baseline: "官方 API 定价页结构漂移",
Summary: "DeepSeek API 定价页结构发生变化,需优先确认是否出现定价或套餐更新。",
Priority: 115,
},
})
}
type deepseekSignatureEventConfig struct {
SourceKey string
ModelName string
SourceKindLabel string
PrimaryURL string
Audience string
EvidenceTemplate string
Baseline string
Summary string
Priority int
}
func loadDeepSeekSignatureSignalEvents(db *sql.DB, configs []deepseekSignatureEventConfig) ([]signalModelEvent, error) {
if len(configs) == 0 {
return nil, nil
}
var events []signalModelEvent
for _, cfg := range configs {
_, rows, err := queryOfficialImportSignatureAuditWindow(db, 5, cfg.SourceKey, false)
if err != nil {
if strings.Contains(err.Error(), `relation "official_import_signature_audit_recent_view" does not exist`) ||
strings.Contains(err.Error(), `relation "official_import_signature_audit" does not exist`) {
return nil, nil
}
return nil, err
}
for _, row := range rows {
if row.RecentRank != 1 {
continue
}
if event, ok := buildDeepSeekSignatureSignalEvent(row, cfg); ok {
events = append(events, event)
}
break
}
}
return events, nil
}
func buildDeepSeekSignatureSignalEvent(row officialImportSignatureAuditViewRow, cfg deepseekSignatureEventConfig) (signalModelEvent, bool) {
if row.SourceKey != cfg.SourceKey || !row.DriftDetected {
return signalModelEvent{}, false
}
updatedAt := row.CheckedAt.Format("2006-01-02 15:04")
primarySource := nullStringOrNone(row.SnapshotPath)
if primarySource == "none" {
primarySource = cfg.PrimaryURL
}
return signalModelEvent{
EventType: "official_release",
ModelName: cfg.ModelName,
ProviderName: "DeepSeek",
OperatorName: "DeepSeek",
Audience: cfg.Audience,
TrustLabel: "官方来源 / 结构漂移告警",
SourceKindLabel: cfg.SourceKindLabel,
PrimarySource: primarySource,
SourceURL: cfg.PrimaryURL,
UpdatedAt: updatedAt,
EvidenceDetail: fmt.Sprintf(cfg.EvidenceTemplate, row.StructureSHA256, nullStringOrNone(row.PreviousObservedSHA256)),
Baseline: cfg.Baseline,
Summary: cfg.Summary,
Priority: cfg.Priority,
}, true
}
func signalNormalizeIntradayEventType(value string) string {
switch strings.TrimSpace(strings.ToLower(value)) {
case "price_cut":

View File

@@ -92,3 +92,108 @@ func TestMergeVerifiedDiscoveryEventsDropsUnverifiedPriceNarrative(t *testing.T)
t.Fatalf("非正式 discovery 事件不应进入正式快照: %+v", merged)
}
}
func TestBuildDeepSeekNewsDriftEvent(t *testing.T) {
row := officialImportSignatureAuditViewRow{
SourceKey: "deepseek_news_signature",
Status: "drift_detected",
StructureState: "changed",
StructureChanged: true,
DriftDetected: true,
BaselineInitialized: false,
StructureSHA256: "abc123",
}
event, ok := buildDeepSeekSignatureSignalEvent(row, deepseekSignatureEventConfig{
SourceKey: "deepseek_news_signature",
ModelName: "DeepSeek 官方新闻页",
SourceKindLabel: "官方新闻页结构变化",
PrimaryURL: defaultDeepSeekNewsSignalURL,
Audience: "a",
EvidenceTemplate: "news drift %s %s",
Baseline: "官方新闻页结构漂移",
Summary: "summary",
Priority: 117,
})
if !ok {
t.Fatal("期望为 drift 行生成正式信号事件")
}
if event.EventType != "official_release" {
t.Fatalf("DeepSeek drift 应映射为 official_release, got=%q", event.EventType)
}
if event.ProviderName != "DeepSeek" || event.ModelName != "DeepSeek 官方新闻页" {
t.Fatalf("DeepSeek drift 事件主体错误: %+v", event)
}
}
func TestBuildDeepSeekPricingDriftEvent(t *testing.T) {
row := officialImportSignatureAuditViewRow{
SourceKey: "deepseek_pricing_signature",
Status: "drift_detected",
StructureState: "changed",
StructureChanged: true,
DriftDetected: true,
BaselineInitialized: false,
StructureSHA256: "pricing123",
}
event, ok := buildDeepSeekSignatureSignalEvent(row, deepseekSignatureEventConfig{
SourceKey: "deepseek_pricing_signature",
ModelName: "DeepSeek 官方价格页",
SourceKindLabel: "官方价格页结构变化",
PrimaryURL: defaultDeepSeekPricingSignalURL,
Audience: "a",
EvidenceTemplate: "pricing drift %s %s",
Baseline: "官方价格页结构漂移",
Summary: "pricing summary",
Priority: 116,
})
if !ok {
t.Fatal("期望为 pricing drift 行生成正式信号事件")
}
if event.ModelName != "DeepSeek 官方价格页" || event.SourceKindLabel != "官方价格页结构变化" {
t.Fatalf("pricing drift 事件映射错误: %+v", event)
}
}
func TestBuildDeepSeekAPIPricingDriftEvent(t *testing.T) {
row := officialImportSignatureAuditViewRow{
SourceKey: "deepseek_api_pricing_signature",
Status: "drift_detected",
StructureState: "changed",
StructureChanged: true,
DriftDetected: true,
BaselineInitialized: false,
StructureSHA256: "api123",
}
event, ok := buildDeepSeekSignatureSignalEvent(row, deepseekSignatureEventConfig{
SourceKey: "deepseek_api_pricing_signature",
ModelName: "DeepSeek API 定价页",
SourceKindLabel: "官方 API 定价页结构变化",
PrimaryURL: defaultDeepSeekAPIPricingSignalURL,
Audience: "a",
EvidenceTemplate: "api drift %s %s",
Baseline: "官方 API 定价页结构漂移",
Summary: "api pricing summary",
Priority: 115,
})
if !ok {
t.Fatal("期望为 api pricing drift 行生成正式信号事件")
}
if event.ModelName != "DeepSeek API 定价页" || event.SourceKindLabel != "官方 API 定价页结构变化" {
t.Fatalf("api pricing drift 事件映射错误: %+v", event)
}
}
func TestBuildDeepSeekNewsDriftEventSkipsBaselineOnly(t *testing.T) {
row := officialImportSignatureAuditViewRow{
SourceKey: "deepseek_news_signature",
Status: "baseline_initialized",
StructureState: "initial",
StructureChanged: false,
DriftDetected: false,
BaselineInitialized: true,
StructureSHA256: "abc123",
}
if _, ok := buildDeepSeekSignatureSignalEvent(row, deepseekSignatureEventConfig{SourceKey: "deepseek_news_signature"}); ok {
t.Fatal("baseline 初始化不应直接进入正式信号")
}
}

View File

@@ -0,0 +1,59 @@
#!/usr/bin/env python3
import json
import os
import sys
import urllib.request
api_key = os.environ.get('OPENROUTER_API_KEY', '').strip()
if not api_key:
print('OPENROUTER_API_KEY missing', file=sys.stderr)
raise SystemExit(1)
payload = sys.stdin.read()
request_data = json.loads(payload or '{}')
search_results = request_data.get('search_results', [])
date = request_data.get('date', '')
prompt = {
"role": "user",
"content": (
"你是大模型情报候选发现器。根据给定搜索结果,只输出 JSON 数组,不要输出 markdown。"
"每项字段必须包含 event_type, provider_name, model_name, provider_country, title, summary, candidate_urls。"
"event_type 只能是 price_cut, price_increase, official_release, promo_campaign, leak_or_rumor, unknown。"
"只有当搜索结果明确像是当天消息时才输出。没有 URL 的候选不要输出。"
f"\n日期: {date}\n搜索结果:\n" + json.dumps(search_results, ensure_ascii=False)
)
}
req_body = json.dumps({
"model": "deepseek/deepseek-v4-flash",
"messages": [prompt],
"temperature": 0,
"max_tokens": 1200,
"response_format": {"type": "json_object"}
}).encode('utf-8')
req = urllib.request.Request(
'https://openrouter.ai/api/v1/chat/completions',
data=req_body,
headers={
'Authorization': f'Bearer {api_key}',
'Content-Type': 'application/json',
'HTTP-Referer': 'https://github.com/phamnazage-jpg/llm-intelligence',
'X-Title': 'llm-intelligence intraday discovery'
},
method='POST'
)
with urllib.request.urlopen(req, timeout=60) as resp:
data = json.loads(resp.read().decode('utf-8'))
content = data['choices'][0]['message']['content']
parsed = json.loads(content)
if isinstance(parsed, dict):
if 'items' in parsed and isinstance(parsed['items'], list):
parsed = parsed['items']
elif 'candidates' in parsed and isinstance(parsed['candidates'], list):
parsed = parsed['candidates']
else:
parsed = []
if not isinstance(parsed, list):
parsed = []
print(json.dumps(parsed, ensure_ascii=False))

View File

@@ -0,0 +1,65 @@
#!/usr/bin/env python3
import email.utils
import json
import os
import urllib.parse
import urllib.request
import xml.etree.ElementTree as ET
def normalize_pubdate(value: str) -> str:
if not value:
return ''
try:
dt = email.utils.parsedate_to_datetime(value)
return dt.strftime('%Y-%m-%d')
except Exception:
return value
def infer_provider(title: str, link: str) -> str:
text = (title + ' ' + link).lower()
for needle, provider in [
('openai', 'OpenAI'), ('anthropic', 'Anthropic'), ('claude', 'Anthropic'), ('gemini', 'Google'), ('google', 'Google'),
('deepseek', 'DeepSeek'), ('qwen', 'Qwen'), ('dashscope', 'DashScope'), ('zhipu', '智谱'),
('baidu', '百度'), ('tencent', '腾讯'), ('minimax', 'MiniMax'), ('x.ai', 'xAI'), ('xai', 'xAI')
]:
if needle in text:
return provider
return ''
query = os.environ.get("INTRADAY_DISCOVERY_QUERY", "").strip()
if not query:
print("[]")
raise SystemExit(0)
url = "https://www.bing.com/search?format=rss&q=" + urllib.parse.quote(query)
req = urllib.request.Request(url, headers={
"User-Agent": "Mozilla/5.0",
"Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
})
with urllib.request.urlopen(req, timeout=20) as resp:
body = resp.read().decode("utf-8", errors="ignore")
root = ET.fromstring(body)
items = []
for item in root.findall('./channel/item'):
title = (item.findtext('title') or '').strip()
link = (item.findtext('link') or '').strip()
desc = (item.findtext('description') or '').strip()
pub = (item.findtext('pubDate') or '').strip()
provider = infer_provider(title, link)
provider_url = ''
if link:
parsed = urllib.parse.urlparse(link)
provider_url = f"{parsed.scheme}://{parsed.netloc}" if parsed.scheme and parsed.netloc else ''
items.append({
"title": title,
"summary": desc,
"url": link,
"provider": provider,
"provider_url": provider_url,
"published_at": normalize_pubdate(pub),
})
print(json.dumps(items, ensure_ascii=False))

View File

@@ -41,6 +41,30 @@ if [[ "$DRY_RUN" == "true" ]]; then
materialize_args+=(--dry-run)
fi
go run -tags llm_script ./scripts/discover_intraday_news_candidates.go ./scripts/intraday_discovery_provider.go ./scripts/intraday_discovery_common.go "${discovery_args[@]}"
go run -tags llm_script ./scripts/verify_intraday_news_candidates.go ./scripts/intraday_discovery_common.go "${verification_args[@]}"
deepseek_guard_args=()
if [[ "$DRY_RUN" == "true" ]]; then
deepseek_guard_args+=(--allow-bootstrap=false)
fi
if ! go run -tags llm_script ./scripts/discover_intraday_news_candidates.go ./scripts/intraday_discovery_provider.go ./scripts/intraday_discovery_common.go "${discovery_args[@]}"; then
exit 1
fi
if ! go run -tags llm_script ./scripts/verify_intraday_news_candidates.go ./scripts/intraday_discovery_common.go "${verification_args[@]}"; then
exit 1
fi
if ! go run -tags llm_script ./scripts/deepseek_news_signature_guard.go ./scripts/deepseek_news_signature_guard_lib.go ./scripts/deepseek_news_snapshot_lib.go ./scripts/subscription_import_common.go ./scripts/official_import_signature_audit_lib.go ./scripts/pricing_markdown_snapshot_lib.go ./scripts/signature_guard_common.go "${deepseek_guard_args[@]}"; then
if [[ "$DRY_RUN" != "true" ]]; then
exit 1
fi
fi
if ! go run -tags llm_script ./scripts/deepseek_pricing_signature_guard.go ./scripts/deepseek_pricing_signature_guard_lib.go ./scripts/deepseek_pricing_snapshot_lib.go ./scripts/subscription_import_common.go ./scripts/official_import_signature_audit_lib.go ./scripts/pricing_markdown_snapshot_lib.go ./scripts/signature_guard_common.go --source-key deepseek_pricing_signature --snapshot-base deepseek-pricing --url https://platform.deepseek.com/pricing "${deepseek_guard_args[@]}"; then
if [[ "$DRY_RUN" != "true" ]]; then
exit 1
fi
fi
if ! go run -tags llm_script ./scripts/deepseek_pricing_signature_guard.go ./scripts/deepseek_pricing_signature_guard_lib.go ./scripts/deepseek_pricing_snapshot_lib.go ./scripts/subscription_import_common.go ./scripts/official_import_signature_audit_lib.go ./scripts/pricing_markdown_snapshot_lib.go ./scripts/signature_guard_common.go --source-key deepseek_api_pricing_signature --snapshot-base deepseek-api-pricing --url https://platform.deepseek.com/docs/api-pricing "${deepseek_guard_args[@]}"; then
if [[ "$DRY_RUN" != "true" ]]; then
exit 1
fi
fi
REPORT_TRIGGER_SOURCE="intraday_discovery" go run -tags llm_script ./scripts/materialize_daily_signals.go "${materialize_args[@]}"