chore: initial public snapshot for github upload
This commit is contained in:
2
.serena/.gitignore
vendored
Normal file
2
.serena/.gitignore
vendored
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
/cache
|
||||||
|
/project.local.yml
|
||||||
152
.serena/project.yml
Normal file
152
.serena/project.yml
Normal file
@@ -0,0 +1,152 @@
|
|||||||
|
# the name by which the project can be referenced within Serena
|
||||||
|
project_name: "立交桥"
|
||||||
|
|
||||||
|
|
||||||
|
# list of languages for which language servers are started; choose from:
|
||||||
|
# al bash clojure cpp csharp
|
||||||
|
# csharp_omnisharp dart elixir elm erlang
|
||||||
|
# fortran fsharp go groovy haskell
|
||||||
|
# java julia kotlin lua markdown
|
||||||
|
# matlab nix pascal perl php
|
||||||
|
# php_phpactor powershell python python_jedi r
|
||||||
|
# rego ruby ruby_solargraph rust scala
|
||||||
|
# swift terraform toml typescript typescript_vts
|
||||||
|
# vue yaml zig
|
||||||
|
# (This list may be outdated. For the current list, see values of Language enum here:
|
||||||
|
# https://github.com/oraios/serena/blob/main/src/solidlsp/ls_config.py
|
||||||
|
# For some languages, there are alternative language servers, e.g. csharp_omnisharp, ruby_solargraph.)
|
||||||
|
# Note:
|
||||||
|
# - For C, use cpp
|
||||||
|
# - For JavaScript, use typescript
|
||||||
|
# - For Free Pascal/Lazarus, use pascal
|
||||||
|
# Special requirements:
|
||||||
|
# Some languages require additional setup/installations.
|
||||||
|
# See here for details: https://oraios.github.io/serena/01-about/020_programming-languages.html#language-servers
|
||||||
|
# When using multiple languages, the first language server that supports a given file will be used for that file.
|
||||||
|
# The first language is the default language and the respective language server will be used as a fallback.
|
||||||
|
# Note that when using the JetBrains backend, language servers are not used and this list is correspondingly ignored.
|
||||||
|
languages:
|
||||||
|
- python
|
||||||
|
|
||||||
|
# the encoding used by text files in the project
|
||||||
|
# For a list of possible encodings, see https://docs.python.org/3.11/library/codecs.html#standard-encodings
|
||||||
|
encoding: "utf-8"
|
||||||
|
|
||||||
|
# line ending convention to use when writing source files.
|
||||||
|
# Possible values: unset (use global setting), "lf", "crlf", or "native" (platform default)
|
||||||
|
# This does not affect Serena's own files (e.g. memories and configuration files), which always use native line endings.
|
||||||
|
line_ending:
|
||||||
|
|
||||||
|
# The language backend to use for this project.
|
||||||
|
# If not set, the global setting from serena_config.yml is used.
|
||||||
|
# Valid values: LSP, JetBrains
|
||||||
|
# Note: the backend is fixed at startup. If a project with a different backend
|
||||||
|
# is activated post-init, an error will be returned.
|
||||||
|
language_backend:
|
||||||
|
|
||||||
|
# whether to use project's .gitignore files to ignore files
|
||||||
|
ignore_all_files_in_gitignore: true
|
||||||
|
|
||||||
|
# list of additional paths to ignore in this project.
|
||||||
|
# Same syntax as gitignore, so you can use * and **.
|
||||||
|
# Note: global ignored_paths from serena_config.yml are also applied additively.
|
||||||
|
ignored_paths: []
|
||||||
|
|
||||||
|
# whether the project is in read-only mode
|
||||||
|
# If set to true, all editing tools will be disabled and attempts to use them will result in an error
|
||||||
|
# Added on 2025-04-18
|
||||||
|
read_only: false
|
||||||
|
|
||||||
|
# list of tool names to exclude.
|
||||||
|
# This extends the existing exclusions (e.g. from the global configuration)
|
||||||
|
#
|
||||||
|
# Below is the complete list of tools for convenience.
|
||||||
|
# To make sure you have the latest list of tools, and to view their descriptions,
|
||||||
|
# execute `uv run scripts/print_tool_overview.py`.
|
||||||
|
#
|
||||||
|
# * `activate_project`: Activates a project by name.
|
||||||
|
# * `check_onboarding_performed`: Checks whether project onboarding was already performed.
|
||||||
|
# * `create_text_file`: Creates/overwrites a file in the project directory.
|
||||||
|
# * `delete_lines`: Deletes a range of lines within a file.
|
||||||
|
# * `delete_memory`: Deletes a memory from Serena's project-specific memory store.
|
||||||
|
# * `execute_shell_command`: Executes a shell command.
|
||||||
|
# * `find_referencing_code_snippets`: Finds code snippets in which the symbol at the given location is referenced.
|
||||||
|
# * `find_referencing_symbols`: Finds symbols that reference the symbol at the given location (optionally filtered by type).
|
||||||
|
# * `find_symbol`: Performs a global (or local) search for symbols with/containing a given name/substring (optionally filtered by type).
|
||||||
|
# * `get_current_config`: Prints the current configuration of the agent, including the active and available projects, tools, contexts, and modes.
|
||||||
|
# * `get_symbols_overview`: Gets an overview of the top-level symbols defined in a given file.
|
||||||
|
# * `initial_instructions`: Gets the initial instructions for the current project.
|
||||||
|
# Should only be used in settings where the system prompt cannot be set,
|
||||||
|
# e.g. in clients you have no control over, like Claude Desktop.
|
||||||
|
# * `insert_after_symbol`: Inserts content after the end of the definition of a given symbol.
|
||||||
|
# * `insert_at_line`: Inserts content at a given line in a file.
|
||||||
|
# * `insert_before_symbol`: Inserts content before the beginning of the definition of a given symbol.
|
||||||
|
# * `list_dir`: Lists files and directories in the given directory (optionally with recursion).
|
||||||
|
# * `list_memories`: Lists memories in Serena's project-specific memory store.
|
||||||
|
# * `onboarding`: Performs onboarding (identifying the project structure and essential tasks, e.g. for testing or building).
|
||||||
|
# * `prepare_for_new_conversation`: Provides instructions for preparing for a new conversation (in order to continue with the necessary context).
|
||||||
|
# * `read_file`: Reads a file within the project directory.
|
||||||
|
# * `read_memory`: Reads the memory with the given name from Serena's project-specific memory store.
|
||||||
|
# * `remove_project`: Removes a project from the Serena configuration.
|
||||||
|
# * `replace_lines`: Replaces a range of lines within a file with new content.
|
||||||
|
# * `replace_symbol_body`: Replaces the full definition of a symbol.
|
||||||
|
# * `restart_language_server`: Restarts the language server, may be necessary when edits not through Serena happen.
|
||||||
|
# * `search_for_pattern`: Performs a search for a pattern in the project.
|
||||||
|
# * `summarize_changes`: Provides instructions for summarizing the changes made to the codebase.
|
||||||
|
# * `switch_modes`: Activates modes by providing a list of their names
|
||||||
|
# * `think_about_collected_information`: Thinking tool for pondering the completeness of collected information.
|
||||||
|
# * `think_about_task_adherence`: Thinking tool for determining whether the agent is still on track with the current task.
|
||||||
|
# * `think_about_whether_you_are_done`: Thinking tool for determining whether the task is truly completed.
|
||||||
|
# * `write_memory`: Writes a named memory (for future reference) to Serena's project-specific memory store.
|
||||||
|
excluded_tools: []
|
||||||
|
|
||||||
|
# list of tools to include that would otherwise be disabled (particularly optional tools that are disabled by default).
|
||||||
|
# This extends the existing inclusions (e.g. from the global configuration).
|
||||||
|
included_optional_tools: []
|
||||||
|
|
||||||
|
# fixed set of tools to use as the base tool set (if non-empty), replacing Serena's default set of tools.
|
||||||
|
# This cannot be combined with non-empty excluded_tools or included_optional_tools.
|
||||||
|
fixed_tools: []
|
||||||
|
|
||||||
|
# list of mode names to that are always to be included in the set of active modes
|
||||||
|
# The full set of modes to be activated is base_modes + default_modes.
|
||||||
|
# If the setting is undefined, the base_modes from the global configuration (serena_config.yml) apply.
|
||||||
|
# Otherwise, this setting overrides the global configuration.
|
||||||
|
# Set this to [] to disable base modes for this project.
|
||||||
|
# Set this to a list of mode names to always include the respective modes for this project.
|
||||||
|
base_modes:
|
||||||
|
|
||||||
|
# list of mode names that are to be activated by default.
|
||||||
|
# The full set of modes to be activated is base_modes + default_modes.
|
||||||
|
# If the setting is undefined, the default_modes from the global configuration (serena_config.yml) apply.
|
||||||
|
# Otherwise, this overrides the setting from the global configuration (serena_config.yml).
|
||||||
|
# This setting can, in turn, be overridden by CLI parameters (--mode).
|
||||||
|
default_modes:
|
||||||
|
|
||||||
|
# initial prompt for the project. It will always be given to the LLM upon activating the project
|
||||||
|
# (contrary to the memories, which are loaded on demand).
|
||||||
|
initial_prompt: ""
|
||||||
|
|
||||||
|
# time budget (seconds) per tool call for the retrieval of additional symbol information
|
||||||
|
# such as docstrings or parameter information.
|
||||||
|
# This overrides the corresponding setting in the global configuration; see the documentation there.
|
||||||
|
# If null or missing, use the setting from the global configuration.
|
||||||
|
symbol_info_budget:
|
||||||
|
|
||||||
|
# list of regex patterns which, when matched, mark a memory entry as read‑only.
|
||||||
|
# Extends the list from the global configuration, merging the two lists.
|
||||||
|
read_only_memory_patterns: []
|
||||||
|
|
||||||
|
# list of regex patterns for memories to completely ignore.
|
||||||
|
# Matching memories will not appear in list_memories or activate_project output
|
||||||
|
# and cannot be accessed via read_memory or write_memory.
|
||||||
|
# To access ignored memory files, use the read_file tool on the raw file path.
|
||||||
|
# Extends the list from the global configuration, merging the two lists.
|
||||||
|
# Example: ["_archive/.*", "_episodes/.*"]
|
||||||
|
ignored_memory_patterns: []
|
||||||
|
|
||||||
|
# advanced configuration option allowing to configure language server-specific options.
|
||||||
|
# Maps the language key to the options.
|
||||||
|
# Have a look at the docstring of the constructors of the LS implementations within solidlsp (e.g., for C# or PHP) to see which options are available.
|
||||||
|
# No documentation on options means no options are available.
|
||||||
|
ls_specific_settings: {}
|
||||||
111
docs/acceptance_gate_single_source_v1_2026-03-18.md
Normal file
111
docs/acceptance_gate_single_source_v1_2026-03-18.md
Normal file
@@ -0,0 +1,111 @@
|
|||||||
|
# 唯一验收门禁表(Single Source of Truth)
|
||||||
|
|
||||||
|
- 版本:v1.1
|
||||||
|
- 日期:2026-03-24
|
||||||
|
- 状态:生效
|
||||||
|
- 用途:统一 S0/S1/S2 的验收阈值、判定逻辑与阻断动作,消除多文档阈值漂移。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 1. 适用范围与优先级
|
||||||
|
|
||||||
|
1. 本文档是阶段验收与升波决策的唯一门禁来源。
|
||||||
|
2. 若其他文档阈值与本文冲突,以本文为准。
|
||||||
|
3. 其他文档仅保留解释性描述,不再重复定义最终阈值。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 2. 指标字典(唯一口径)
|
||||||
|
|
||||||
|
| 指标ID | 指标名 | 定义/公式 | 目标值 | 阻断阈值 | 数据来源 |
|
||||||
|
|---|---|---|---|---|---|
|
||||||
|
| M-001 | `gateway_availability` | 成功请求/总请求 | >=99.9%(S1);>=99.95%(S2) | <99.5%(连续3天) | 网关指标/APM |
|
||||||
|
| M-002 | `gateway_added_latency_p95_ms` | 网关附加时延 P95 | <=60ms | >100ms(连续3天) | APM/系统指标 |
|
||||||
|
| M-003 | `gateway_added_latency_p99_ms` | 网关附加时延 P99 | <=100ms(S2-B及后) | >150ms | APM/系统指标 |
|
||||||
|
| M-004 | `billing_error_rate_pct` | 账务差错请求/计费请求 | <=0.1% | >0.1% | 对账报表 |
|
||||||
|
| M-005 | `billing_conflict_rate_pct` | 幂等冲突/计费请求 | <=0.01% | >0.01%(即触发阻断) | 幂等审计计数器 |
|
||||||
|
| M-006 | `overall_takeover_pct` | 自研主路径请求/全部主路径请求 | S2终验>=60% | Wave-Global-3期间<60% | 验收SQL |
|
||||||
|
| M-007 | `cn_takeover_pct` | 自研国内供应商请求/国内供应商请求 | =100% | <100% 持续5分钟 | 验收SQL |
|
||||||
|
| M-008 | `route_mark_coverage_pct` | 路由标记覆盖请求/主路径请求 | >=99.9% | <99.9% | 验收SQL |
|
||||||
|
| M-009 | `fallback_success_rate` | fallback 成功次数/fallback 总次数 | >=95%(A);>=97%(B/C) | <90% | 运行指标 |
|
||||||
|
| M-010 | `routing_accuracy_pct` | 路由正确请求/可判定请求 | >=99%(A);>=99.5%(C) | <98% | 路由审计 |
|
||||||
|
| M-011 | `provider_validation_success_pct` | 套餐验证成功数/验证总数 | >=90% | <85% | 供应侧验证服务 |
|
||||||
|
| M-012 | `supplier_onboarded_count` | 入驻供应方数量 | >=10(S0) | <8(S0收尾) | 运营台账 |
|
||||||
|
| M-013 | `supplier_credential_exposure_events` | 供应方上游凭证泄露事件数 | =0 | >0 即 P0 | 安全审计/脱敏扫描 |
|
||||||
|
| M-014 | `platform_credential_ingress_coverage_pct` | 使用平台凭证入站请求/总入站请求 | =100% | <100% | 鉴权日志/网关审计 |
|
||||||
|
| M-015 | `direct_supplier_call_by_consumer_events` | 需求方绕过平台直连供应方事件数 | =0 | >0 即 P0 | 出网审计/安全事件中心 |
|
||||||
|
| M-016 | `query_key_external_reject_rate_pct` | 外部 query key 被拒绝数/外部 query key 请求总数 | =100% | <100% | 网关拦截日志 |
|
||||||
|
|
||||||
|
口径补充:
|
||||||
|
1. 主路径端点集合固定为 `/v1/chat/completions`、`/v1/messages`、`/v1/responses`、`/v1beta/*`。
|
||||||
|
2. `/responses` 等 alias 入口必须在 Ingress 归一后再统计。
|
||||||
|
3. 国内平台分类来源固定为配置表 `gateway_cn_platforms`,禁止 SQL 硬编码。
|
||||||
|
4. 需求方仅可使用平台签发凭证访问平台入口,禁止获取供应方上游凭证。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 3. 分阶段门禁表
|
||||||
|
|
||||||
|
### 3.1 S0 阶段门禁
|
||||||
|
|
||||||
|
| Gate ID | 场景 | 必达条件 | 不通过动作 | 责任人 |
|
||||||
|
|---|---|---|---|---|
|
||||||
|
| G-S0-1 | 供应侧MVP验收 | M-012>=10 且 M-011>=90% | 延长S0 1-2周并冻结对外承诺 | 产品+运营 |
|
||||||
|
| G-S0-2 | Key安全专项验收 | API Key专项安全验收通过(对应WBS C0.1.4) | 阻断S0验收,先修安全缺陷 | 安全+测试 |
|
||||||
|
|
||||||
|
### 3.2 S1 阶段门禁
|
||||||
|
|
||||||
|
| Gate ID | 场景 | 必达条件 | 不通过动作 | 责任人 |
|
||||||
|
|---|---|---|---|---|
|
||||||
|
| G-S1-1 | 灰度7天上线门禁 | M-001>=99.9%,M-004<=0.1%,30分钟内回滚演练通过 | 不得升至全量;维持灰度并整改 | 平台+SRE |
|
||||||
|
| G-S1-2 | 发布前兼容门禁 | Schema/Behavior/Performance 三重Gate全部通过 | 阻断发布 | 架构+QA |
|
||||||
|
| G-S1-3 | 凭证边界门禁 | M-013=0,M-014=100%,M-016=100% | 阻断发布并触发安全复盘 | 安全+平台 |
|
||||||
|
|
||||||
|
### 3.3 S2 阶段门禁(替换核心)
|
||||||
|
|
||||||
|
| Gate ID | 阶段 | 必达条件 | 升波条件 | 阻断条件 | 不通过动作 |
|
||||||
|
|---|---|---|---|---|---|
|
||||||
|
| G-S2-A | 10% | M-001>=99.9%,M-002<=60,M-010>=99%,M-004<=0.1%,M-009>=95%,M-014=100% | 连续2周达标 | 任一红线触发 | 回切 subapi,修复后重试 |
|
||||||
|
| G-S2-B | 30% | M-001>=99.95%,M-003<=100,M-009>=97%,M-014=100% | 连续2周达标 | M-004>0.1% 或 P0事故 | 暂停升波,补救后复核 |
|
||||||
|
| G-S2-C1 | 40%中间检查点 | M-001>=99.95%,M-002<=60,M-003<=100,M-010>=99.5%,M-004<=0.1%,M-009>=97%,M-013=0,M-014=100%,M-015=0 | GO/CONDITIONAL GO | 任一红灯阈值 | 决策会:继续/附条件继续/回滚 |
|
||||||
|
| G-S2-C2 | 60%终验 | M-006>=60%,M-007=100%,M-004<=0.1%,M-005<=0.01%,M-008>=99.9%,M-001>=99.95%,M-013=0,M-014=100%,M-015=0,M-016=100% | 通过后S2完成 | 任一硬门槛不满足 | 延长S2并冻结升波,不降终验目标 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 4. 红线规则(跨阶段)
|
||||||
|
|
||||||
|
任一条命中即触发 `P0`:
|
||||||
|
|
||||||
|
1. `cn_takeover_pct < 100%` 持续 5 分钟。
|
||||||
|
2. `billing_conflict_rate_pct > 0.01%`。
|
||||||
|
3. `billing_error_rate_pct > 0.1%` 持续 30 分钟。
|
||||||
|
4. 流式 replay/双流拼接被验证复现。
|
||||||
|
5. 主路径口径缺失:`route_mark_coverage_pct < 99.9%`。
|
||||||
|
6. `supplier_credential_exposure_events > 0`。
|
||||||
|
7. `direct_supplier_call_by_consumer_events > 0`。
|
||||||
|
8. `platform_credential_ingress_coverage_pct < 100%` 或 `query_key_external_reject_rate_pct < 100%`。
|
||||||
|
|
||||||
|
处理动作:
|
||||||
|
1. 立即停止升波。
|
||||||
|
2. 触发自动回切(目标 10 分钟内触发、30 分钟内恢复)。
|
||||||
|
3. 24小时内提交复盘与修复计划。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 5. 决策与证据包要求
|
||||||
|
|
||||||
|
1. 每个 Gate 必须有“通过/有条件通过/不通过”结论。
|
||||||
|
2. 每次升波必须提交证据包:
|
||||||
|
- 原始执行日志
|
||||||
|
- SQL 结果快照
|
||||||
|
- 关键指标截图
|
||||||
|
- 风险说明与责任人签字
|
||||||
|
3. 无证据包视为未通过。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 6. 变更治理
|
||||||
|
|
||||||
|
1. 本文档阈值变更属于高风险变更,必须通过评审会批准。
|
||||||
|
2. 变更记录必须包含:变更原因、影响评估、回滚条件、生效日期。
|
||||||
|
3. 未完成变更审议前,不得在周报/看板/执行文档中使用新阈值。
|
||||||
452
docs/api_solution_v1_2026-03-18.md
Normal file
452
docs/api_solution_v1_2026-03-18.md
Normal file
@@ -0,0 +1,452 @@
|
|||||||
|
# API设计解决方案(P0问题修复)
|
||||||
|
|
||||||
|
> 版本:v1.0
|
||||||
|
> 日期:2026-03-18
|
||||||
|
> 目的:系统性解决评审发现的API设计P0问题
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 1. API版本管理策略
|
||||||
|
|
||||||
|
### 1.1 当前问题
|
||||||
|
|
||||||
|
- 无版本管理策略
|
||||||
|
- breaking change 无法处理
|
||||||
|
- 旧版本无法废弃
|
||||||
|
|
||||||
|
### 1.2 解决方案
|
||||||
|
|
||||||
|
#### 1.2.1 版本策略:URL Path
|
||||||
|
|
||||||
|
```python
|
||||||
|
# API 版本配置
|
||||||
|
API_VERSION_CONFIG = {
|
||||||
|
'v1': {
|
||||||
|
'status': 'deprecated',
|
||||||
|
'sunset_date': '2027-06-01', # 废弃日期
|
||||||
|
'migration_guide': '/docs/v1-migration',
|
||||||
|
'features': ['basic_chat', 'embeddings']
|
||||||
|
},
|
||||||
|
'v2': {
|
||||||
|
'status': 'active',
|
||||||
|
'features': ['basic_chat', 'embeddings', 'streaming', 'tools']
|
||||||
|
},
|
||||||
|
'v3': {
|
||||||
|
'status': 'beta',
|
||||||
|
'features': ['basic_chat', 'embeddings', 'streaming', 'tools', 'batch']
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# 版本检查中间件
|
||||||
|
class APIVersionMiddleware:
|
||||||
|
def process_request(self, request, handler):
|
||||||
|
# 1. 提取版本
|
||||||
|
path_parts = request.path.split('/')
|
||||||
|
version = path_parts[1] if len(path_parts) > 1 else 'v1'
|
||||||
|
|
||||||
|
# 2. 验证版本存在
|
||||||
|
if version not in API_VERSION_CONFIG:
|
||||||
|
return ErrorResponse(
|
||||||
|
status=404,
|
||||||
|
error={
|
||||||
|
'code': 'API_VERSION_NOT_FOUND',
|
||||||
|
'message': f'API version {version} not found',
|
||||||
|
'available_versions': list(API_VERSION_CONFIG.keys())
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
# 3. 检查废弃状态
|
||||||
|
config = API_VERSION_CONFIG[version]
|
||||||
|
if config['status'] == 'deprecated':
|
||||||
|
# 添加废弃警告头
|
||||||
|
request.headers['Deprecation'] = f'="{config["sunset_date"]}"'
|
||||||
|
request.headers['Link'] = f'<{config["migration_guide"]}>; rel="migration"'
|
||||||
|
|
||||||
|
# 4. 存储版本信息
|
||||||
|
request.api_version = version
|
||||||
|
|
||||||
|
return handler(request)
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 1.2.2 废弃流程
|
||||||
|
|
||||||
|
```python
|
||||||
|
class APIDeprecationManager:
|
||||||
|
def __init__(self):
|
||||||
|
self.timeline = {
|
||||||
|
'v1': {
|
||||||
|
'announced': '2026-03-01',
|
||||||
|
'deprecated': '2026-06-01',
|
||||||
|
'sunset': '2027-06-01',
|
||||||
|
'migration_guide': '/docs/v1-migration'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def handle_request(self, request):
|
||||||
|
"""处理废弃版本请求"""
|
||||||
|
version = request.api_version
|
||||||
|
config = API_VERSION_CONFIG[version]
|
||||||
|
|
||||||
|
if config['status'] == 'deprecated':
|
||||||
|
# 1. 添加警告响应头
|
||||||
|
response.headers['Deprecation'] = 'true'
|
||||||
|
response.headers['Sunset'] = config['sunset_date']
|
||||||
|
|
||||||
|
# 2. 记录废弃版本使用
|
||||||
|
metrics.increment('api.deprecated_version.used', tags={
|
||||||
|
'version': version
|
||||||
|
})
|
||||||
|
|
||||||
|
return response
|
||||||
|
|
||||||
|
def get_migration_guide(self, from_version, to_version):
|
||||||
|
"""获取迁移指南"""
|
||||||
|
return {
|
||||||
|
'from': from_version,
|
||||||
|
'to': to_version,
|
||||||
|
'breaking_changes': [
|
||||||
|
{
|
||||||
|
'endpoint': '/v1/chat/completions',
|
||||||
|
'change': 'Response format changed',
|
||||||
|
'migration': 'Use response_format v2 compatibility mode'
|
||||||
|
}
|
||||||
|
],
|
||||||
|
'tools': [
|
||||||
|
{
|
||||||
|
'name': 'Migration SDK',
|
||||||
|
'description': 'Auto-convert requests to new format',
|
||||||
|
'install': 'pip install lgw-migration'
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 2. 完整错误码体系
|
||||||
|
|
||||||
|
### 2.1 当前问题
|
||||||
|
|
||||||
|
- 只有HTTP状态码
|
||||||
|
- 无业务错误码
|
||||||
|
- 错误信息不完整
|
||||||
|
|
||||||
|
### 2.2 解决方案
|
||||||
|
|
||||||
|
#### 2.2.1 错误码定义
|
||||||
|
|
||||||
|
```python
|
||||||
|
from enum import Enum
|
||||||
|
|
||||||
|
class ErrorCode(Enum):
|
||||||
|
# 认证授权 (AUTH_*)
|
||||||
|
AUTH_INVALID_TOKEN = ('AUTH_001', 'Invalid or expired token', 401, False)
|
||||||
|
AUTH_INSUFFICIENT_PERMISSION = ('AUTH_002', 'Insufficient permissions', 403, False)
|
||||||
|
AUTH_MFA_REQUIRED = ('AUTH_003', 'MFA verification required', 403, False)
|
||||||
|
|
||||||
|
# 计费 (BILLING_*)
|
||||||
|
BILLING_INSUFFICIENT_BALANCE = ('BILLING_001', 'Insufficient balance', 402, False)
|
||||||
|
BILLING_CHARGE_FAILED = ('BILLING_002', 'Charge failed', 500, True)
|
||||||
|
BILLING_REFUND_FAILED = ('BILLING_003', 'Refund failed', 500, True)
|
||||||
|
BILLING_DISCREPANCY = ('BILLING_004', 'Billing discrepancy detected', 500, True)
|
||||||
|
|
||||||
|
# 路由 (ROUTER_*)
|
||||||
|
ROUTER_NO_PROVIDER_AVAILABLE = ('ROUTER_001', 'No provider available', 503, True)
|
||||||
|
ROUTER_ALL_PROVIDERS_FAILED = ('ROUTER_002', 'All providers failed', 503, True)
|
||||||
|
ROUTER_TIMEOUT = ('ROUTER_003', 'Request timeout', 504, True)
|
||||||
|
|
||||||
|
# 供应商 (PROVIDER_*)
|
||||||
|
PROVIDER_INVALID_KEY = ('PROVIDER_001', 'Invalid API key', 401, False)
|
||||||
|
PROVIDER_RATE_LIMIT = ('PROVIDER_002', 'Rate limit exceeded', 429, False)
|
||||||
|
PROVIDER_QUOTA_EXCEEDED = ('PROVIDER_003', 'Quota exceeded', 402, False)
|
||||||
|
PROVIDER_MODEL_NOT_FOUND = ('PROVIDER_004', 'Model not found', 404, False)
|
||||||
|
PROVIDER_ERROR = ('PROVIDER_005', 'Provider error', 502, True)
|
||||||
|
|
||||||
|
# 限流 (RATE_LIMIT_*)
|
||||||
|
RATE_LIMIT_EXCEEDED = ('RATE_LIMIT_001', 'Rate limit exceeded', 429, False)
|
||||||
|
RATE_LIMIT_TOKEN_EXCEEDED = ('RATE_LIMIT_002', 'Token limit exceeded', 429, False)
|
||||||
|
RATE_LIMIT_BURST_EXCEEDED = ('RATE_LIMIT_003', 'Burst limit exceeded', 429, False)
|
||||||
|
|
||||||
|
# 通用 (COMMON_*)
|
||||||
|
COMMON_INVALID_REQUEST = ('COMMON_001', 'Invalid request', 400, False)
|
||||||
|
COMMON_RESOURCE_NOT_FOUND = ('COMMON_002', 'Resource not found', 404, False)
|
||||||
|
COMMON_INTERNAL_ERROR = ('COMMON_003', 'Internal error', 500, True)
|
||||||
|
COMMON_SERVICE_UNAVAILABLE = ('COMMON_004', 'Service unavailable', 503, True)
|
||||||
|
|
||||||
|
def __init__(self, code, message, status_code, retryable):
|
||||||
|
self.code = code
|
||||||
|
self.message = message
|
||||||
|
self.status_code = status_code
|
||||||
|
self.retryable = retryable
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 2.2.2 错误响应格式
|
||||||
|
|
||||||
|
```python
|
||||||
|
class ErrorResponse:
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
error_code: ErrorCode,
|
||||||
|
message: str = None,
|
||||||
|
details: dict = None,
|
||||||
|
request_id: str = None,
|
||||||
|
doc_url: str = None
|
||||||
|
):
|
||||||
|
self.error = {
|
||||||
|
'code': error_code.code,
|
||||||
|
'message': message or error_code.message,
|
||||||
|
'details': details or {},
|
||||||
|
'request_id': request_id,
|
||||||
|
'doc_url': doc_url or f'/docs/errors/{error_code.code.lower()}',
|
||||||
|
'retryable': error_code.retryable
|
||||||
|
}
|
||||||
|
|
||||||
|
def to_dict(self):
|
||||||
|
return self.error
|
||||||
|
|
||||||
|
def to_json(self):
|
||||||
|
return json.dumps(self.error)
|
||||||
|
|
||||||
|
# 使用示例
|
||||||
|
raise ErrorResponse(
|
||||||
|
error_code=ErrorCode.BILLING_INSUFFICIENT_BALANCE,
|
||||||
|
details={
|
||||||
|
'required': 100.00,
|
||||||
|
'available': 50.00,
|
||||||
|
'currency': 'USD',
|
||||||
|
'top_up_url': '/api/v1/billing/top-up'
|
||||||
|
},
|
||||||
|
request_id=get_request_id()
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 2.2.3 错误码文档生成
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
# openapi.yaml 部分
|
||||||
|
components:
|
||||||
|
ErrorCode:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
code:
|
||||||
|
type: string
|
||||||
|
example: BILLING_001
|
||||||
|
message:
|
||||||
|
type: string
|
||||||
|
example: Insufficient balance
|
||||||
|
details:
|
||||||
|
type: object
|
||||||
|
request_id:
|
||||||
|
type: string
|
||||||
|
doc_url:
|
||||||
|
type: string
|
||||||
|
retryable:
|
||||||
|
type: boolean
|
||||||
|
|
||||||
|
errors:
|
||||||
|
BILLING_INSUFFICIENT_BALANCE:
|
||||||
|
status: 402
|
||||||
|
message: "余额不足"
|
||||||
|
details:
|
||||||
|
required:
|
||||||
|
type: number
|
||||||
|
description: "所需金额"
|
||||||
|
available:
|
||||||
|
type: number
|
||||||
|
description: "可用余额"
|
||||||
|
top_up_url:
|
||||||
|
type: string
|
||||||
|
description: "充值链接"
|
||||||
|
retryable: false
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 3. SDK 规划
|
||||||
|
|
||||||
|
### 3.1 当前问题
|
||||||
|
|
||||||
|
- 无官方SDK
|
||||||
|
- 开发者体验差
|
||||||
|
|
||||||
|
### 3.2 解决方案
|
||||||
|
|
||||||
|
#### 3.2.1 SDK 路线图
|
||||||
|
|
||||||
|
```
|
||||||
|
Phase 1 (S1): 兼容层
|
||||||
|
├── Python SDK (OpenAI兼容)
|
||||||
|
├── Node.js SDK (OpenAI兼容)
|
||||||
|
└── 透明迁移工具
|
||||||
|
|
||||||
|
Phase 2 (S2): 自有SDK
|
||||||
|
├── Python SDK (自有API)
|
||||||
|
├── Node.js SDK (自有API)
|
||||||
|
└── Go SDK
|
||||||
|
|
||||||
|
Phase 3 (S3): 高级功能
|
||||||
|
├── 重试中间件
|
||||||
|
├── 缓存中间件
|
||||||
|
├── 指标中间件
|
||||||
|
└── 框架集成 (LangChain, LlamaIndex)
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 3.2.2 Python SDK 设计
|
||||||
|
|
||||||
|
```python
|
||||||
|
# lgw-sdk-python
|
||||||
|
class LLMGateway:
|
||||||
|
"""LLM Gateway Python SDK"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
api_key: str,
|
||||||
|
base_url: str = "https://api.lgateway.com",
|
||||||
|
timeout: float = 60.0,
|
||||||
|
max_retries: int = 3
|
||||||
|
):
|
||||||
|
self.api_key = api_key
|
||||||
|
self.base_url = base_url
|
||||||
|
self.timeout = timeout
|
||||||
|
self.max_retries = max_retries
|
||||||
|
self._session = requests.Session()
|
||||||
|
|
||||||
|
# 默认配置
|
||||||
|
self.default_headers = {
|
||||||
|
'Authorization': f'Bearer {api_key}',
|
||||||
|
'Content-Type': 'application/json'
|
||||||
|
}
|
||||||
|
|
||||||
|
def chat.completions(
|
||||||
|
self,
|
||||||
|
model: str,
|
||||||
|
messages: List[Dict],
|
||||||
|
**kwargs
|
||||||
|
) -> ChatCompletion:
|
||||||
|
"""聊天完成"""
|
||||||
|
response = self._request(
|
||||||
|
method='POST',
|
||||||
|
path='/v1/chat/completions',
|
||||||
|
json={
|
||||||
|
'model': model,
|
||||||
|
'messages': messages,
|
||||||
|
**kwargs
|
||||||
|
}
|
||||||
|
)
|
||||||
|
return ChatCompletion(**response)
|
||||||
|
|
||||||
|
def _request(self, method, path, **kwargs):
|
||||||
|
"""发送请求(带重试)"""
|
||||||
|
url = f"{self.base_url}{path}"
|
||||||
|
headers = {**self.default_headers, **kwargs.pop('headers', {})}
|
||||||
|
|
||||||
|
for attempt in range(self.max_retries):
|
||||||
|
try:
|
||||||
|
response = self._session.request(
|
||||||
|
method=method,
|
||||||
|
url=url,
|
||||||
|
headers=headers,
|
||||||
|
timeout=self.timeout,
|
||||||
|
**kwargs
|
||||||
|
)
|
||||||
|
response.raise_for_status()
|
||||||
|
return response.json()
|
||||||
|
|
||||||
|
except requests.exceptions.RequestException as e:
|
||||||
|
if attempt == self.max_retries - 1:
|
||||||
|
raise
|
||||||
|
# 指数退避
|
||||||
|
time.sleep(2 ** attempt)
|
||||||
|
|
||||||
|
# 使用示例
|
||||||
|
client = LLMGateway(api_key="lgw-xxx")
|
||||||
|
response = client.chat.completions(
|
||||||
|
model="gpt-4",
|
||||||
|
messages=[{"role": "user", "content": "Hello"}]
|
||||||
|
)
|
||||||
|
print(response.choices[0].message.content)
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 3.2.3 Node.js SDK 设计
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
// lgw-sdk-node
|
||||||
|
export class LLMGateway {
|
||||||
|
private apiKey: string;
|
||||||
|
private baseURL: string;
|
||||||
|
private maxRetries: number;
|
||||||
|
|
||||||
|
constructor(config: LLMGatewayConfig) {
|
||||||
|
this.apiKey = config.apiKey;
|
||||||
|
this.baseURL = config.baseURL || 'https://api.lgateway.com';
|
||||||
|
this.maxRetries = config.maxRetries || 3;
|
||||||
|
}
|
||||||
|
|
||||||
|
async chat.completions(
|
||||||
|
params: ChatCompletionParams
|
||||||
|
): Promise<ChatCompletion> {
|
||||||
|
const response = await this.request(
|
||||||
|
'POST',
|
||||||
|
'/v1/chat/completions',
|
||||||
|
params
|
||||||
|
);
|
||||||
|
return response as ChatCompletion;
|
||||||
|
}
|
||||||
|
|
||||||
|
private async request<T>(
|
||||||
|
method: string,
|
||||||
|
path: string,
|
||||||
|
body?: any,
|
||||||
|
retries: number = 0
|
||||||
|
): Promise<T> {
|
||||||
|
try {
|
||||||
|
const response = await fetch(`${this.baseURL}${path}`, {
|
||||||
|
method,
|
||||||
|
headers: {
|
||||||
|
'Authorization': `Bearer ${this.apiKey}`,
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
},
|
||||||
|
body: body ? JSON.stringify(body) : undefined,
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!response.ok) {
|
||||||
|
throw new LLMGatewayError(await response.json());
|
||||||
|
}
|
||||||
|
|
||||||
|
return response.json();
|
||||||
|
} catch (error) {
|
||||||
|
if (retries < this.maxRetries) {
|
||||||
|
await this.sleep(Math.pow(2, retries));
|
||||||
|
return this.request(method, path, body, retries + 1);
|
||||||
|
}
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 4. 实施计划
|
||||||
|
|
||||||
|
### 4.1 任务分解
|
||||||
|
|
||||||
|
| 任务 | 负责人 | 截止 | 依赖 |
|
||||||
|
|------|--------|------|------|
|
||||||
|
| API版本管理中间件 | 架构 | S0-M1 | - |
|
||||||
|
| 错误码体系定义 | 后端 | S0-M1 | - |
|
||||||
|
| 错误响应格式统一 | 后端 | S0-M1 | - |
|
||||||
|
| Python SDK开发 | 前端 | S1 | - |
|
||||||
|
| Node.js SDK开发 | 前端 | S1 | - |
|
||||||
|
|
||||||
|
### 4.2 验证标准
|
||||||
|
|
||||||
|
- API版本可管理、可废弃
|
||||||
|
- 所有错误都有完整错误码
|
||||||
|
- SDK可通过pip/npm安装
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
**文档状态**:API设计解决方案
|
||||||
|
**关联文档**:
|
||||||
|
- `llm_gateway_prd_v0_2026-03-16.md`
|
||||||
527
docs/architecture_solution_v1_2026-03-18.md
Normal file
527
docs/architecture_solution_v1_2026-03-18.md
Normal file
@@ -0,0 +1,527 @@
|
|||||||
|
# 架构解决方案(P0问题修复)
|
||||||
|
|
||||||
|
> 版本:v1.0
|
||||||
|
> 日期:2026-03-18
|
||||||
|
> 目的:系统性解决评审发现的架构P0问题
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 1. Router Core 自研风险控制
|
||||||
|
|
||||||
|
### 1.1 当前问题
|
||||||
|
|
||||||
|
- S2目标60%接管率激进
|
||||||
|
- 首次自研缺乏经验
|
||||||
|
- 时间只有13周
|
||||||
|
|
||||||
|
### 1.2 解决方案
|
||||||
|
|
||||||
|
#### 1.2.1 终验目标不变,增加过程缓冲
|
||||||
|
|
||||||
|
| 指标 | 原目标 | v4.1收敛目标 | 理由 |
|
||||||
|
|------|--------|-------------|------|
|
||||||
|
| 全供应商接管率(终验) | >=60% | **>=60%(不降档)** | 保持与主基线一致 |
|
||||||
|
| 全供应商接管率(过程) | - | **40%中间检查点** | 降低推进风险 |
|
||||||
|
| 国内供应商接管率 | 100% | **100%** | 保持不变 |
|
||||||
|
| 验收时间 | S2结束 | **S2结束(13周)** | 与S2主基线一致 |
|
||||||
|
|
||||||
|
#### 1.2.2 分阶段验证
|
||||||
|
|
||||||
|
```python
|
||||||
|
class RouterTakeoverPlan:
|
||||||
|
STAGES = [
|
||||||
|
{
|
||||||
|
'name': 'S2-A',
|
||||||
|
'target_rate': 0.10,
|
||||||
|
'duration_weeks': 4,
|
||||||
|
'goal': '验证稳定性',
|
||||||
|
'success_criteria': {
|
||||||
|
'availability': 0.999,
|
||||||
|
'latency_p99': 200,
|
||||||
|
'error_rate': 0.001
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'name': 'S2-B',
|
||||||
|
'target_rate': 0.30,
|
||||||
|
'duration_weeks': 4,
|
||||||
|
'goal': '优化性能',
|
||||||
|
'success_criteria': {
|
||||||
|
'availability': 0.9995,
|
||||||
|
'latency_p99': 150,
|
||||||
|
'error_rate': 0.0005
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'name': 'S2-C1',
|
||||||
|
'target_rate': 0.40,
|
||||||
|
'duration_weeks': 2,
|
||||||
|
'goal': '中间检查点',
|
||||||
|
'success_criteria': {
|
||||||
|
'availability': 0.9999,
|
||||||
|
'latency_p99': 120,
|
||||||
|
'error_rate': 0.0001
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'name': 'S2-C2',
|
||||||
|
'target_rate': 0.60, # 终验目标保持60%
|
||||||
|
'duration_weeks': 4,
|
||||||
|
'goal': '达成终验目标',
|
||||||
|
'success_criteria': {
|
||||||
|
'availability': 0.9999,
|
||||||
|
'latency_p99': 100,
|
||||||
|
'error_rate': 0.0001
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 1.2.3 原型提前启动
|
||||||
|
|
||||||
|
```
|
||||||
|
时间线:
|
||||||
|
|
||||||
|
W1-W4: Router Core 原型开发(提前开始)
|
||||||
|
│
|
||||||
|
W5-W8: S0 阶段
|
||||||
|
│
|
||||||
|
W9-S2: 继续开发 + 集成测试
|
||||||
|
│
|
||||||
|
S2-A: 10% 流量验证
|
||||||
|
S2-B: 30% 流量验证
|
||||||
|
S2-C: 40% 中间检查点(终验目标仍为60%)
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 2. Subapi 耦合解耦
|
||||||
|
|
||||||
|
### 2.1 当前问题
|
||||||
|
|
||||||
|
- 直接依赖 subapi
|
||||||
|
- 升级可能破坏兼容
|
||||||
|
- 定制困难
|
||||||
|
|
||||||
|
### 2.2 解决方案
|
||||||
|
|
||||||
|
#### 2.2.1 Provider Adapter 抽象层
|
||||||
|
|
||||||
|
```python
|
||||||
|
from abc import ABC, abstractmethod
|
||||||
|
|
||||||
|
class ProviderAdapter(ABC):
|
||||||
|
"""供应商适配器抽象基类"""
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
async def chat_completion(
|
||||||
|
self,
|
||||||
|
model: str,
|
||||||
|
messages: List[Message],
|
||||||
|
options: CompletionOptions
|
||||||
|
) -> CompletionResponse:
|
||||||
|
"""发送聊天完成请求"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
async def get_usage(self, response: Response) -> Usage:
|
||||||
|
"""获取使用量"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def map_error(self, error: Exception) -> ProviderError:
|
||||||
|
"""错误码映射"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
async def health_check(self) -> bool:
|
||||||
|
"""健康检查"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
@property
|
||||||
|
@abstractmethod
|
||||||
|
def provider_name(self) -> str:
|
||||||
|
"""供应商名称"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
# ==================== Subapi 适配器 ====================
|
||||||
|
class SubapiAdapter(ProviderAdapter):
|
||||||
|
"""Subapi 适配器"""
|
||||||
|
|
||||||
|
def __init__(self, config: SubapiConfig):
|
||||||
|
self.client = SubapiClient(config)
|
||||||
|
self.retry_config = RetryConfig(
|
||||||
|
max_attempts=3,
|
||||||
|
backoff_factor=2,
|
||||||
|
retry_on_status=[429, 500, 502, 503, 504]
|
||||||
|
)
|
||||||
|
|
||||||
|
async def chat_completion(self, model, messages, options):
|
||||||
|
# 1. 构建请求
|
||||||
|
request = self.build_request(model, messages, options)
|
||||||
|
|
||||||
|
# 2. 发送请求(带重试)
|
||||||
|
response = await self.retry_with_backoff(
|
||||||
|
lambda: self.client.post('/v1/chat/completions', request)
|
||||||
|
)
|
||||||
|
|
||||||
|
# 3. 转换响应
|
||||||
|
return self.transform_response(response)
|
||||||
|
|
||||||
|
def map_error(self, error: Exception) -> ProviderError:
|
||||||
|
# Subapi 错误码 -> 统一错误码
|
||||||
|
error_mapping = {
|
||||||
|
'invalid_api_key': ProviderError.INVALID_KEY,
|
||||||
|
'rate_limit_exceeded': ProviderError.RATE_LIMIT,
|
||||||
|
'insufficient_quota': ProviderError.INSUFFICIENT_QUOTA,
|
||||||
|
'model_not_found': ProviderError.MODEL_NOT_FOUND,
|
||||||
|
}
|
||||||
|
return error_mapping.get(error.code, ProviderError.UNKNOWN)
|
||||||
|
|
||||||
|
# ==================== 自研 Router Core 适配器 ====================
|
||||||
|
class RouterCoreAdapter(ProviderAdapter):
|
||||||
|
"""自研 Router Core 适配器"""
|
||||||
|
|
||||||
|
def __init__(self, config: RouterCoreConfig):
|
||||||
|
self.client = RouterCoreClient(config)
|
||||||
|
|
||||||
|
async def chat_completion(self, model, messages, options):
|
||||||
|
# 直接调用内部服务
|
||||||
|
response = await self.client.chat_complete(
|
||||||
|
model=model,
|
||||||
|
messages=messages,
|
||||||
|
**options.to_dict()
|
||||||
|
)
|
||||||
|
return self.transform_response(response)
|
||||||
|
|
||||||
|
def map_error(self, error: Exception) -> ProviderError:
|
||||||
|
# Router Core 错误码 -> 统一错误码
|
||||||
|
return RouterCoreErrorMapper.map(error)
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 2.2.2 适配器注册中心
|
||||||
|
|
||||||
|
```python
|
||||||
|
class AdapterRegistry:
|
||||||
|
"""适配器注册中心"""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self._adapters: Dict[str, ProviderAdapter] = {}
|
||||||
|
self._fallbacks: Dict[str, str] = {}
|
||||||
|
|
||||||
|
def register(self, provider: str, adapter: ProviderAdapter,
|
||||||
|
fallback: str = None):
|
||||||
|
"""注册适配器"""
|
||||||
|
self._adapters[provider] = adapter
|
||||||
|
if fallback:
|
||||||
|
self._fallbacks[provider] = fallback
|
||||||
|
|
||||||
|
def get(self, provider: str) -> ProviderAdapter:
|
||||||
|
"""获取适配器"""
|
||||||
|
if provider not in self._adapters:
|
||||||
|
raise AdapterNotFoundError(f"No adapter for {provider}")
|
||||||
|
return self._adapters[provider]
|
||||||
|
|
||||||
|
def get_with_fallback(self, provider: str) -> ProviderAdapter:
|
||||||
|
"""获取适配器(带降级)- 修复版"""
|
||||||
|
adapter = self.get(provider)
|
||||||
|
|
||||||
|
# 修复A-D-01: 使用异步心跳,避免同步阻塞
|
||||||
|
# 从缓存获取健康状态,不实时调用
|
||||||
|
health_status = self._health_cache.get(provider)
|
||||||
|
|
||||||
|
if not health_status or not health_status.is_healthy:
|
||||||
|
# 异步更新健康状态,不阻塞请求
|
||||||
|
asyncio.create_task(self._update_health_async(provider))
|
||||||
|
|
||||||
|
# 降级到备用
|
||||||
|
if provider in self._fallbacks:
|
||||||
|
fallback_adapter = self.get(self._fallbacks[provider])
|
||||||
|
fallback_health = self._health_cache.get(self._fallbacks[provider])
|
||||||
|
if fallback_health and fallback_health.is_healthy:
|
||||||
|
return fallback_adapter
|
||||||
|
|
||||||
|
return adapter
|
||||||
|
|
||||||
|
async def _update_health_async(self, provider: str):
|
||||||
|
"""异步更新健康状态"""
|
||||||
|
try:
|
||||||
|
adapter = self.get(provider)
|
||||||
|
is_healthy = await adapter.health_check()
|
||||||
|
self._health_cache[provider] = HealthStatus(
|
||||||
|
is_healthy=is_healthy,
|
||||||
|
checked_at=datetime.now()
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"健康检查失败: {provider}", e)
|
||||||
|
self._health_cache[provider] = HealthStatus(is_healthy=False)
|
||||||
|
|
||||||
|
# 使用示例
|
||||||
|
registry = AdapterRegistry()
|
||||||
|
registry.register('openai', SubapiAdapter(subapi_config), fallback='azure')
|
||||||
|
registry.register('anthropic', SubapiAdapter(subapi_config))
|
||||||
|
registry.register('domestic', RouterCoreAdapter(router_config))
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 2.2.3 契约测试
|
||||||
|
|
||||||
|
```python
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
class TestProviderContract:
|
||||||
|
"""供应商适配器契约测试"""
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_chat_completion_response_structure(self, adapter: ProviderAdapter):
|
||||||
|
"""测试响应结构"""
|
||||||
|
response = await adapter.chat_completion(
|
||||||
|
model='gpt-4',
|
||||||
|
messages=[{'role': 'user', 'content': 'Hello'}]
|
||||||
|
)
|
||||||
|
|
||||||
|
# 验证必需字段
|
||||||
|
assert response.id is not None
|
||||||
|
assert response.model is not None
|
||||||
|
assert response.choices is not None
|
||||||
|
assert response.usage is not None
|
||||||
|
assert response.usage.prompt_tokens >= 0
|
||||||
|
assert response.usage.completion_tokens >= 0
|
||||||
|
assert response.usage.total_tokens >= 0
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_error_mapping(self, adapter: ProviderAdapter):
|
||||||
|
"""测试错误码映射"""
|
||||||
|
# 测试各种错误情况
|
||||||
|
error_cases = [
|
||||||
|
(InvalidKeyError(), ProviderError.INVALID_KEY),
|
||||||
|
(RateLimitError(), ProviderError.RATE_LIMIT),
|
||||||
|
(QuotaExceededError(), ProviderError.INSUFFICIENT_QUOTA),
|
||||||
|
]
|
||||||
|
|
||||||
|
for original, expected in error_cases:
|
||||||
|
result = adapter.map_error(original)
|
||||||
|
assert result == expected
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_streaming_response(self, adapter: ProviderAdapter):
|
||||||
|
"""测试流式响应"""
|
||||||
|
response = await adapter.chat_completion(
|
||||||
|
model='gpt-4',
|
||||||
|
messages=[{'role': 'user', 'content': 'Count to 5'}],
|
||||||
|
stream=True
|
||||||
|
)
|
||||||
|
|
||||||
|
# 验证流式响应
|
||||||
|
chunks = []
|
||||||
|
async for chunk in response.stream():
|
||||||
|
chunks.append(chunk)
|
||||||
|
if len(chunks) >= 5:
|
||||||
|
break
|
||||||
|
|
||||||
|
assert len(chunks) > 0
|
||||||
|
assert all(hasattr(c, 'delta') for c in chunks)
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 3. 数据一致性保证
|
||||||
|
|
||||||
|
### 3.1 当前问题
|
||||||
|
|
||||||
|
- 异步写入可能失败
|
||||||
|
- 进程崩溃可能导致数据丢失
|
||||||
|
- 分布式事务未处理
|
||||||
|
|
||||||
|
### 3.2 解决方案
|
||||||
|
|
||||||
|
#### 3.2.1 同步预扣 + 异步确认
|
||||||
|
|
||||||
|
```python
|
||||||
|
class BillingService:
|
||||||
|
async def handle_request(self, request: LLMRequest) -> Response:
|
||||||
|
# 1. 同步预扣额度(乐观锁)
|
||||||
|
estimated_cost = self.estimate_cost(request)
|
||||||
|
success = await self.reserve_balance(
|
||||||
|
user_id=request.user_id,
|
||||||
|
amount=estimated_cost,
|
||||||
|
request_id=request.request_id
|
||||||
|
)
|
||||||
|
|
||||||
|
if not success:
|
||||||
|
raise InsufficientBalanceError()
|
||||||
|
|
||||||
|
# 2. 处理请求
|
||||||
|
try:
|
||||||
|
response = await self.router.route(request)
|
||||||
|
|
||||||
|
# 3. 同步计算实际费用
|
||||||
|
actual_cost = self.calculate_actual_cost(response)
|
||||||
|
|
||||||
|
# 4. 同步扣减余额(补偿事务)
|
||||||
|
await self.charge(
|
||||||
|
user_id=request.user_id,
|
||||||
|
amount=actual_cost,
|
||||||
|
request_id=request.request_id,
|
||||||
|
# 记录预扣信息用于对账
|
||||||
|
reserved_amount=estimated_cost,
|
||||||
|
final_amount=actual_cost
|
||||||
|
)
|
||||||
|
|
||||||
|
# 5. 记录使用量(异步,可重试)
|
||||||
|
asyncio.create_task(
|
||||||
|
self.record_usage_async(
|
||||||
|
user_id=request.user_id,
|
||||||
|
usage=response.usage,
|
||||||
|
request_id=request.request_id
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
return response
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
# 6. 请求失败,释放预扣额度
|
||||||
|
await self.release_reservation(
|
||||||
|
user_id=request.user_id,
|
||||||
|
amount=estimated_cost,
|
||||||
|
request_id=request.request_id
|
||||||
|
)
|
||||||
|
raise e
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 3.2.2 补偿事务队列
|
||||||
|
|
||||||
|
```python
|
||||||
|
class CompensationQueue:
|
||||||
|
"""补偿事务队列 - 修复版"""
|
||||||
|
|
||||||
|
# 修复A-D-02: 增加最大重试时间和指数退避
|
||||||
|
MAX_RETRY_COUNT = 5 # 增加到5次
|
||||||
|
MAX_RETRY_SECONDS = 3600 # 最大重试时间1小时
|
||||||
|
BASE_DELAY = 1 # 基础延迟1秒
|
||||||
|
|
||||||
|
def __init__(self, redis: Redis, db: Database):
|
||||||
|
self.redis = redis
|
||||||
|
self.db = db
|
||||||
|
|
||||||
|
async def enqueue_compensation(self, transaction: CompensationTransaction):
|
||||||
|
"""加入补偿队列"""
|
||||||
|
await self.redis.lpush(
|
||||||
|
'compensation_queue',
|
||||||
|
json.dumps({
|
||||||
|
'type': transaction.type,
|
||||||
|
'data': transaction.data,
|
||||||
|
'retry_count': 0,
|
||||||
|
'created_at': datetime.now().isoformat(),
|
||||||
|
'first_retry_at': None
|
||||||
|
})
|
||||||
|
)
|
||||||
|
|
||||||
|
async def process_compensations(self):
|
||||||
|
"""处理补偿队列(后台任务)- 修复版"""
|
||||||
|
while True:
|
||||||
|
# 1. 获取待处理项
|
||||||
|
item = await self.redis.lpop('compensation_queue')
|
||||||
|
if not item:
|
||||||
|
await asyncio.sleep(1)
|
||||||
|
continue
|
||||||
|
|
||||||
|
transaction = json.loads(item)
|
||||||
|
|
||||||
|
try:
|
||||||
|
# 2. 执行补偿
|
||||||
|
await self.execute_compensation(transaction)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
# 3. 指数退避重试逻辑
|
||||||
|
retry_count = transaction['retry_count']
|
||||||
|
created_at = datetime.fromisoformat(transaction['created_at'])
|
||||||
|
elapsed = (datetime.now() - created_at).total_seconds()
|
||||||
|
|
||||||
|
# 检查是否超过最大重试时间
|
||||||
|
if elapsed > self.MAX_RETRY_SECONDS:
|
||||||
|
# 超过最大时间,告警人工处理
|
||||||
|
await self.alert_manual_intervention(transaction, e)
|
||||||
|
continue
|
||||||
|
|
||||||
|
# 指数退避:1s, 2s, 4s, 8s, 16s
|
||||||
|
if retry_count < self.MAX_RETRY_COUNT:
|
||||||
|
transaction['retry_count'] += 1
|
||||||
|
transaction['first_retry_at'] = datetime.now().isoformat()
|
||||||
|
|
||||||
|
delay = min(self.BASE_DELAY * (2 ** retry_count), 60) # 最大延迟60秒
|
||||||
|
await asyncio.sleep(delay)
|
||||||
|
await self.redis.lpush('compensation_queue', json.dumps(transaction))
|
||||||
|
else:
|
||||||
|
# 4. 超过重试次数,告警人工处理
|
||||||
|
await self.alert_manual_intervention(transaction, e)
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 3.2.3 实时对账
|
||||||
|
|
||||||
|
```python
|
||||||
|
class RealTimeReconciliation:
|
||||||
|
"""实时对账"""
|
||||||
|
|
||||||
|
async def verify_billing(self, request_id: str):
|
||||||
|
"""验证单笔计费"""
|
||||||
|
# 1. 获取预扣记录
|
||||||
|
reservation = await self.get_reservation(request_id)
|
||||||
|
|
||||||
|
# 2. 获取实际扣费记录
|
||||||
|
charge = await self.get_charge(request_id)
|
||||||
|
|
||||||
|
# 3. 获取使用量记录
|
||||||
|
usage = await self.get_usage(request_id)
|
||||||
|
|
||||||
|
# 4. 验证一致性
|
||||||
|
if reservation and charge:
|
||||||
|
# 预扣 vs 实扣
|
||||||
|
diff = abs(reservation.amount - charge.amount)
|
||||||
|
# 修复A-D-03: 对账精度提高到0.001元
|
||||||
|
if diff > 0.001: # 允许0.1分误差
|
||||||
|
await self.alert('billing_discrepancy', {
|
||||||
|
'request_id': request_id,
|
||||||
|
'reserved': reservation.amount,
|
||||||
|
'charged': charge.amount
|
||||||
|
})
|
||||||
|
|
||||||
|
if charge and usage:
|
||||||
|
# 扣费 vs 使用量
|
||||||
|
expected = self.calculate_cost(usage)
|
||||||
|
if abs(charge.amount - expected) > expected * 0.001:
|
||||||
|
await self.alert('usage_charge_mismatch', {
|
||||||
|
'request_id': request_id,
|
||||||
|
'usage': usage,
|
||||||
|
'charged': charge.amount,
|
||||||
|
'expected': expected
|
||||||
|
})
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 4. 实施计划
|
||||||
|
|
||||||
|
### 4.1 任务分解
|
||||||
|
|
||||||
|
| 任务 | 负责人 | 截止 | 依赖 |
|
||||||
|
|------|--------|------|------|
|
||||||
|
| Router Core 目标调整 | 产品 | 立即 | - |
|
||||||
|
| Provider Adapter 抽象层 | 架构 | S0-M1 | - |
|
||||||
|
| 适配器注册中心 | 后端 | S0-M1 | - |
|
||||||
|
| 契约测试框架 | 测试 | S0-M2 | - |
|
||||||
|
| 同步预扣机制 | 后端 | S1前 | - |
|
||||||
|
| 补偿队列 | 后端 | S1前 | - |
|
||||||
|
| 实时对账 | 后端 | S1前 | - |
|
||||||
|
|
||||||
|
### 4.2 验证标准
|
||||||
|
|
||||||
|
- Router Core 60%接管率稳定运行(40%仅为中间检查点)
|
||||||
|
- 任意时刻可切换 subapi / 自研
|
||||||
|
- 计费数据0误差
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
**文档状态**:架构解决方案
|
||||||
|
**关联文档**:
|
||||||
|
- `llm_gateway_product_technical_blueprint_v1_2026-03-16.md`
|
||||||
|
- `s2_takeover_buffer_strategy_v1_2026-03-18.md`
|
||||||
256
docs/business_model_profitability_design_v1_2026-03-18.md
Normal file
256
docs/business_model_profitability_design_v1_2026-03-18.md
Normal file
@@ -0,0 +1,256 @@
|
|||||||
|
# 商业模式与盈利能力设计(评审建议新增章节)
|
||||||
|
|
||||||
|
> 本章节补充各阶段的ROI测算和Enterprise版定价策略,基于评审建议和已确认的商业参数。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 0. 术语字典
|
||||||
|
|
||||||
|
| 术语 | 定义 | 备注 |
|
||||||
|
|------|------|------|
|
||||||
|
| **采购折扣系数** | 供应方获得官方价格的折扣比例 | 本方案定义为 **60%** |
|
||||||
|
| **毛利率** | 平台销售收入与采购成本的差额比例 | 目标 **15-50%** |
|
||||||
|
| **供应方** | 在平台挂载多余LLM配额的个人或企业 | 平台的用户角色 |
|
||||||
|
| **供应商** | LLM 服务提供商(OpenAI、百度等) | 上游账号来源 |
|
||||||
|
| **统购统销** | 平台买断供应方配额→加价出售平台服务给需求方 | 核心商业模式(不转售上游凭证) |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 1. 商业参数确认
|
||||||
|
|
||||||
|
| 参数 | 值 | 说明 |
|
||||||
|
|------|------|------|
|
||||||
|
| 采购折扣系数 | 60% | 供应方获得官方价格的60% |
|
||||||
|
| 毛利率目标区间 | 15-50% | 根据场景动态调整 |
|
||||||
|
| 套餐结构 | Free / Growth / Enterprise | 全生命周期覆盖 |
|
||||||
|
| 核心模式 | 统购统销 | 平台买断→加价出售平台服务(不向需求方外发供应方上游凭证) |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 2. 毛利率区间设计
|
||||||
|
|
||||||
|
### 2.1 毛利率分层策略
|
||||||
|
|
||||||
|
| 客户类型 | 模型类型 | 毛利率区间 | 定价策略 |
|
||||||
|
|----------|----------|-----------|----------|
|
||||||
|
| **Growth** | 热门模型(GPT-4o, Claude-3) | 15-25% | 低价引流 |
|
||||||
|
| **Growth** | 长尾模型 | 30-50% | 高毛利补充 |
|
||||||
|
| **Enterprise** | 全模型 | 35-50% | 高毛利+服务溢价 |
|
||||||
|
| **供应方补贴期** | 全部 | 10-15% | 吸引供应方入驻 |
|
||||||
|
|
||||||
|
### 2.2 毛利率调整机制
|
||||||
|
|
||||||
|
```
|
||||||
|
毛利率 = 基础毛利率 × 供需系数 × 竞争系数
|
||||||
|
|
||||||
|
- 供需系数:供不应求时上调,供过于求时下调
|
||||||
|
- 竞争系数:竞品降价时跟进,不主动打价格战
|
||||||
|
- 调整频率:月度评估,季度调整
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 3. 客户价值模型(客户ROI)
|
||||||
|
|
||||||
|
### 3.1 客户收益计算
|
||||||
|
|
||||||
|
**客户使用平台vs直接购买的收益对比**:
|
||||||
|
|
||||||
|
```
|
||||||
|
客户收益 = 节省成本 + 便捷性价值 + 治理价值
|
||||||
|
|
||||||
|
节省成本 = (官方价格 - 平台售价) × 使用量
|
||||||
|
= (官方价格 × (1 - 采购折扣 × (1+毛利率))) × 使用量
|
||||||
|
|
||||||
|
便捷性价值 = 统一API + 多模型切换 + 免账号管理
|
||||||
|
治理价值 = 预算控制 + 成本归因 + 审计追溯
|
||||||
|
```
|
||||||
|
|
||||||
|
### 3.2 客户ROI示例
|
||||||
|
|
||||||
|
| 场景 | 客户月用量 | 官方价格 | 平台售价 | 年节省 |
|
||||||
|
|------|-----------|----------|---------|--------|
|
||||||
|
| 初创团队 | 50M tokens | $150 | $120 | $360 |
|
||||||
|
| 成长团队 | 500M tokens | $1,500 | $1,200 | $3,600 |
|
||||||
|
| 中型企业 | 5B tokens | $15,000 | $12,000 | $36,000 |
|
||||||
|
| 大型企业 | 50B tokens | $150,000 | $120,000 | $360,000 |
|
||||||
|
|
||||||
|
> 假设:平台毛利率25%,采购折扣60%
|
||||||
|
> 平台售价 = 官方价格 × 60% × (1+25%) = 官方价格 × 75%
|
||||||
|
|
||||||
|
### 3.3 客户ROI率
|
||||||
|
|
||||||
|
| 客户类型 | 年节省(中位数) | 平台年费 | ROI率 |
|
||||||
|
|----------|---------------|---------|-------|
|
||||||
|
| 初创团队 | $360 | $0 (Free) | ∞ |
|
||||||
|
| 成长团队 | $3,600 | $1,200 | 200% |
|
||||||
|
| 中型企业 | $36,000 | $8,000 | 350% |
|
||||||
|
| 大型企业 | $360,000 | $50,000 | 620% |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 4. 套餐定价设计
|
||||||
|
|
||||||
|
### 4.1 Free 套餐(获客)
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
|------|------|
|
||||||
|
| **目标** | 降低上手门槛,获取潜在客户 |
|
||||||
|
| **定价** | 免费 |
|
||||||
|
| **功能限制** | 请求量限制、模型限制、功能限制 |
|
||||||
|
| **使用量上限** | 100万 tokens/月 |
|
||||||
|
| **期限** | 永久 |
|
||||||
|
| **转化目标** | 30天内引导至Growth |
|
||||||
|
|
||||||
|
### 4.2 Growth 套餐(核心收入)
|
||||||
|
|
||||||
|
| 层级 | Starter | Pro | Business |
|
||||||
|
|------|---------|-----|----------|
|
||||||
|
| **月费** | ¥299 | ¥999 | ¥2,999 |
|
||||||
|
| **年费(8折)** | ¥2,871 | ¥9,590 | ¥28,770 |
|
||||||
|
| **月请求量** | 10万 | 50万 | 200万 |
|
||||||
|
| **月tokens额度** | 100M | 500M | 2B |
|
||||||
|
| **团队成员** | 3人 | 10人 | 50人 |
|
||||||
|
| **模型** | 基础 | 主流 | 全部 |
|
||||||
|
| **功能** | 基础路由 | 高级路由 | 全部 |
|
||||||
|
| **支持** | 邮件 | 邮件+工单 | 专属客服 |
|
||||||
|
|
||||||
|
### 4.3 Enterprise 套餐(高毛利)
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
|------|------|
|
||||||
|
| **定价** | 定制化,年约 ¥10万起 |
|
||||||
|
| **适用** | 500+人团队,有合规要求 |
|
||||||
|
| **功能** | 全部功能 + 供应链增强 |
|
||||||
|
| **SLA** | 99.95% 可用性保证 |
|
||||||
|
| **支持** | 专属客户经理 + 7×24支持 |
|
||||||
|
| **部署** | 公有云/私有化/混合部署 |
|
||||||
|
| **账期** | 月付/季付/年付 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 5. 盈利能力预测(1年期 - 激进场景)
|
||||||
|
|
||||||
|
### 5.1 客户增长预测
|
||||||
|
|
||||||
|
| 季度 | Free用户 | Growth付费 | Enterprise | 总客户数 |
|
||||||
|
|------|----------|------------|------------|----------|
|
||||||
|
| Q1 | 200 | 20 | 0 | 220 |
|
||||||
|
| Q2 | 500 | 60 | 2 | 562 |
|
||||||
|
| Q3 | 1,000 | 150 | 5 | 1,155 |
|
||||||
|
| Q4 | 2,000 | 300 | 10 | 2,310 |
|
||||||
|
|
||||||
|
### 5.2 收入预测
|
||||||
|
|
||||||
|
| 收入来源 | Q1 | Q2 | Q3 | Q4 | 全年 |
|
||||||
|
|----------|-----|-----|-----|-----|------|
|
||||||
|
| **Growth订阅** | ¥30,000 | ¥120,000 | ¥300,000 | ¥600,000 | ¥1,050,000 |
|
||||||
|
| **Enterprise** | ¥0 | ¥100,000 | ¥250,000 | ¥500,000 | ¥850,000 |
|
||||||
|
| **Token差价** | ¥50,000 | ¥200,000 | ¥500,000 | ¥1,000,000 | ¥1,750,000 |
|
||||||
|
| **总计** | ¥80,000 | ¥420,000 | ¥1,050,000 | ¥2,100,000 | **¥3,650,000** |
|
||||||
|
|
||||||
|
### 5.3 成本预测
|
||||||
|
|
||||||
|
| 成本项 | Q1 | Q2 | Q3 | Q4 | 全年 |
|
||||||
|
|--------|-----|-----|-----|-----|------|
|
||||||
|
| **LLM采购成本** | ¥40,000 | ¥160,000 | ¥400,000 | ¥800,000 | ¥1,400,000 |
|
||||||
|
| **云服务/运维** | ¥30,000 | ¥50,000 | ¥80,000 | ¥120,000 | ¥280,000 |
|
||||||
|
| **研发人力** | ¥150,000 | ¥150,000 | ¥180,000 | ¥200,000 | ¥680,000 |
|
||||||
|
| **市场/销售** | ¥20,000 | ¥40,000 | ¥60,000 | ¥100,000 | ¥220,000 |
|
||||||
|
| **总计** | ¥240,000 | ¥400,000 | ¥720,000 | ¥1,220,000 | **¥2,580,000** |
|
||||||
|
|
||||||
|
### 5.4 利润预测
|
||||||
|
|
||||||
|
| 指标 | Q1 | Q2 | Q3 | Q4 | 全年 |
|
||||||
|
|------|-----|-----|-----|-----|------|
|
||||||
|
| **总收入** | ¥80,000 | ¥420,000 | ¥1,050,000 | ¥2,100,000 | ¥3,650,000 |
|
||||||
|
| **总成本** | ¥240,000 | ¥400,000 | ¥720,000 | ¥1,220,000 | ¥2,580,000 |
|
||||||
|
| **毛利** | -¥160,000 | ¥20,000 | ¥330,000 | ¥880,000 | **¥1,070,000** |
|
||||||
|
| **毛利率** | -200% | 5% | 31% | 42% | **29%** |
|
||||||
|
| **累计毛利** | -¥160,000 | -¥140,000 | ¥190,000 | ¥1,070,000 | - |
|
||||||
|
|
||||||
|
> **盈亏平衡点:Q2末/Q3初**
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 6. 供应侧商业模式
|
||||||
|
|
||||||
|
### 6.1 供应方收益
|
||||||
|
|
||||||
|
| 供应方类型 | 月供 tokens | 月收益(60%折扣) |
|
||||||
|
|------------|-------------|-----------------|
|
||||||
|
| 个人 | 10M | $150 |
|
||||||
|
| 小团队 | 100M | $1,500 |
|
||||||
|
| 企业 | 1B | $15,000 |
|
||||||
|
|
||||||
|
### 6.2 平台收益
|
||||||
|
|
||||||
|
| 收入来源 | 收费方式 | 示例 |
|
||||||
|
|----------|----------|------|
|
||||||
|
| **Token差价** | 毛利率15-50% | $150K/月 → $22.5K-$75K |
|
||||||
|
| **交易服务费** | 交易额1-2% | $150K/月 → $1.5K-$3K |
|
||||||
|
| **提现服务费** | 提现额1% | - |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 7. 竞品定价参考
|
||||||
|
|
||||||
|
| 竞品 | Growth定价 | Enterprise定价 | 差异化 |
|
||||||
|
|------|-----------|----------------|--------|
|
||||||
|
| **One-API** | 开源免费 | 定制收费 | 基础功能 |
|
||||||
|
| **New-API** | ¥99/月起 | 定制 | 国内模型 |
|
||||||
|
| **LiteLLM** | 开源/Pro $50/月 | 定制 | 多模型 |
|
||||||
|
| **我方平台** | **¥299/月起** | **¥10万起** | **治理+供应侧** |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 8. 定价原则
|
||||||
|
|
||||||
|
### 8.1 核心原则
|
||||||
|
|
||||||
|
1. **价值定价**:定价基于客户获得的节省和价值,而非成本
|
||||||
|
2. **差异化**:不与开源工具比免费,与商业方案比价值
|
||||||
|
3. **透明**:价格公开,无隐藏费用
|
||||||
|
4. **灵活**:支持月度/年度,支持扩容
|
||||||
|
|
||||||
|
### 8.2 定价禁区
|
||||||
|
|
||||||
|
1. ❌ 不打价格战
|
||||||
|
2. ❌ 不低于成本销售
|
||||||
|
3. ❌ 不设置长期负毛利套餐
|
||||||
|
4. ❌ 不承诺无法保障的SLA
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 9. ROI 总结
|
||||||
|
|
||||||
|
### 9.1 客户视角
|
||||||
|
|
||||||
|
- **平均ROI**:200-600%(相比直接购买)
|
||||||
|
- **回本周期**:1-3个月
|
||||||
|
- **核心价值**:成本节省 + 治理能力
|
||||||
|
|
||||||
|
### 9.2 平台视角
|
||||||
|
|
||||||
|
- **首年目标收入**:¥365万
|
||||||
|
- **首年目标毛利**:¥107万(29%)
|
||||||
|
- **盈亏平衡**:Q2末
|
||||||
|
- **3年目标收入**:¥2000万+
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 10. 待决策事项
|
||||||
|
|
||||||
|
| 编号 | 决策项 | 选项 | 建议 |
|
||||||
|
|------|--------|------|------|
|
||||||
|
| P1 | Growth Starter定价 | ¥199/¥299/¥399 | ¥299 |
|
||||||
|
| P2 | Enterprise起步价 | ¥5万/¥10万/¥20万 | ¥10万 |
|
||||||
|
| P3 | 年付折扣 | 8折/8.5折/9折 | 8折 |
|
||||||
|
| P4 | 是否有免费版 | 是/否 | 是(Free) |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
**章节状态**:初稿
|
||||||
|
**关联文档**:
|
||||||
|
- `llm_gateway_subapi_evolution_plan_v4_1_2026-03-18.md`
|
||||||
|
- `supply_side_product_design_v1_2026-03-18.md`
|
||||||
472
docs/business_solution_v1_2026-03-18.md
Normal file
472
docs/business_solution_v1_2026-03-18.md
Normal file
@@ -0,0 +1,472 @@
|
|||||||
|
# 业务解决方案(P0问题修复)
|
||||||
|
|
||||||
|
> 版本:v1.0
|
||||||
|
> 日期:2026-03-18
|
||||||
|
> 目的:系统性解决评审发现的业务P0问题
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 1. 资金合规解决方案
|
||||||
|
|
||||||
|
### 1.1 当前问题
|
||||||
|
|
||||||
|
- 资金池可能需要支付牌照
|
||||||
|
- 资金沉淀处理不明确
|
||||||
|
- 税务合规未确认
|
||||||
|
|
||||||
|
### 1.2 解决方案
|
||||||
|
|
||||||
|
#### 1.2.1 资金托管模式
|
||||||
|
|
||||||
|
```
|
||||||
|
传统模式(有问题):
|
||||||
|
用户 ──▶ 平台账户 ──▶ 供应方结算
|
||||||
|
↑
|
||||||
|
资金沉淀
|
||||||
|
|
||||||
|
托管模式(推荐):
|
||||||
|
用户 ──▶ 第三方支付 ──▶ 平台运营账户 ──▶ 供应方结算
|
||||||
|
(Stripe) (无沉淀) (T+N结算)
|
||||||
|
```
|
||||||
|
|
||||||
|
```python
|
||||||
|
class PaymentService:
|
||||||
|
"""支付服务 - 修复B-D-01:支持多支付渠道"""
|
||||||
|
|
||||||
|
# 支持的支付渠道
|
||||||
|
CHANNELS = {
|
||||||
|
'stripe': {'name': 'Stripe', 'regions': ['US', 'EU', 'APAC']},
|
||||||
|
'alipay': {'name': '支付宝', 'regions': ['CN']},
|
||||||
|
'wechat': {'name': '微信支付', 'regions': ['CN']},
|
||||||
|
'bank': {'name': '银行转账', 'regions': ['CN', 'US']}
|
||||||
|
}
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
# 初始化各渠道
|
||||||
|
self.providers = {
|
||||||
|
'stripe': StripePaymentProvider(),
|
||||||
|
'alipay': AlipayProvider(),
|
||||||
|
'wechat': WechatPayProvider(),
|
||||||
|
'bank': BankTransferProvider()
|
||||||
|
}
|
||||||
|
|
||||||
|
async def create_payment(
|
||||||
|
self,
|
||||||
|
user_id: int,
|
||||||
|
amount: float,
|
||||||
|
currency: str,
|
||||||
|
channel: str = None
|
||||||
|
) -> PaymentResult:
|
||||||
|
"""创建支付 - 自动选择渠道"""
|
||||||
|
# 1. 自动选择最优渠道
|
||||||
|
if not channel:
|
||||||
|
channel = self.select_optimal_channel(user_id, currency)
|
||||||
|
|
||||||
|
# 2. 获取渠道提供商
|
||||||
|
provider = self.providers.get(channel)
|
||||||
|
if not provider:
|
||||||
|
raise PaymentChannelError(f"不支持的支付渠道: {channel}")
|
||||||
|
|
||||||
|
# 3. 创建支付
|
||||||
|
payment = await provider.create_checkout(
|
||||||
|
amount=int(amount * 100),
|
||||||
|
currency=currency,
|
||||||
|
metadata={'user_id': user_id, 'type': 'top_up'}
|
||||||
|
)
|
||||||
|
|
||||||
|
return PaymentResult(
|
||||||
|
channel=channel,
|
||||||
|
payment_url=payment.url,
|
||||||
|
payment_id=payment.id,
|
||||||
|
expires_at=payment.expires_at
|
||||||
|
)
|
||||||
|
|
||||||
|
def select_optimal_channel(self, user_id: int, currency: str) -> str:
|
||||||
|
"""自动选择最优渠道"""
|
||||||
|
# 根据用户位置和币种选择
|
||||||
|
user = self.get_user(user_id)
|
||||||
|
region = user.region
|
||||||
|
|
||||||
|
# 优先使用用户地区支持的渠道
|
||||||
|
for channel, config in self.CHANNELS.items():
|
||||||
|
if region in config['regions']:
|
||||||
|
return channel
|
||||||
|
|
||||||
|
# 默认使用Stripe
|
||||||
|
return 'stripe'
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 1.2.2 结算T+N模式
|
||||||
|
|
||||||
|
```python
|
||||||
|
class SettlementService:
|
||||||
|
"""结算服务:T+N 结算"""
|
||||||
|
|
||||||
|
# 根据供应方等级确定结算周期
|
||||||
|
SETTLEMENT_CONFIG = {
|
||||||
|
'new': {'days': 30, 'min_amount': 100}, # 新供应方
|
||||||
|
'stable': {'days': 14, 'min_amount': 50}, # 稳定供应方
|
||||||
|
'excellent': {'days': 7, 'min_amount': 0}, # 优质供应方
|
||||||
|
}
|
||||||
|
|
||||||
|
async def process_settlement(self, provider_id, period):
|
||||||
|
"""处理结算"""
|
||||||
|
# 1. 获取结算周期配置
|
||||||
|
provider = await self.get_provider(provider_id)
|
||||||
|
config = self.SETTLEMENT_CONFIG[provider.tier]
|
||||||
|
|
||||||
|
# 2. 计算结算金额
|
||||||
|
pending_amount = await self.get_pending_amount(provider_id, period)
|
||||||
|
|
||||||
|
# 3. 检查最低金额
|
||||||
|
if pending_amount < config['min_amount']:
|
||||||
|
return {'status': 'pending', 'reason': 'Below minimum'}
|
||||||
|
|
||||||
|
# 4. 对账验证
|
||||||
|
if not await self.verify_settlement(provider_id, period):
|
||||||
|
raise SettlementError('Verification failed')
|
||||||
|
|
||||||
|
# 5. 风控检查
|
||||||
|
if await self.is_flagged(provider_id):
|
||||||
|
await self.flag_for_manual_review(provider_id, pending_amount)
|
||||||
|
return {'status': 'pending', 'reason': 'Manual review'}
|
||||||
|
|
||||||
|
# 6. 执行结算
|
||||||
|
settlement = await self.execute_settlement(
|
||||||
|
provider_id=provider_id,
|
||||||
|
amount=pending_amount,
|
||||||
|
settlement_days=config['days']
|
||||||
|
)
|
||||||
|
|
||||||
|
return {'status': 'completed', 'settlement_id': settlement.id}
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 1.2.3 税务合规
|
||||||
|
|
||||||
|
```python
|
||||||
|
class TaxService:
|
||||||
|
"""税务服务"""
|
||||||
|
|
||||||
|
# 代扣代缴个人所得税率(示例)
|
||||||
|
PERSONAL_INCOME_TAX_RATES = {
|
||||||
|
0: 0.00, # 0-3000
|
||||||
|
3000: 0.03, # 3001-12000
|
||||||
|
12000: 0.10, # 12001-25000
|
||||||
|
25000: 0.20, # 25001-35000
|
||||||
|
35000: 0.25, # 35001-55000
|
||||||
|
55000: 0.30, # 55001-80000
|
||||||
|
80000: 0.35, # 80001+
|
||||||
|
}
|
||||||
|
|
||||||
|
def calculate_tax(self, income):
|
||||||
|
"""计算个人所得税"""
|
||||||
|
tax = 0
|
||||||
|
remaining = income
|
||||||
|
|
||||||
|
for threshold, rate in self.PERSONAL_INCOME_TAX_RATES.items():
|
||||||
|
if remaining <= 0:
|
||||||
|
break
|
||||||
|
|
||||||
|
taxable = min(remaining, threshold + 3000 if threshold > 0 else threshold)
|
||||||
|
tax += taxable * rate
|
||||||
|
remaining -= taxable
|
||||||
|
|
||||||
|
return tax
|
||||||
|
|
||||||
|
def process_settlement_with_tax(self, provider_id, gross_amount):
|
||||||
|
"""结算并代扣税"""
|
||||||
|
# 1. 计算税额
|
||||||
|
tax = self.calculate_tax(gross_amount)
|
||||||
|
|
||||||
|
# 2. 净收入
|
||||||
|
net_amount = gross_amount - tax
|
||||||
|
|
||||||
|
# 3. 生成税务凭证
|
||||||
|
tax_record = {
|
||||||
|
'provider_id': provider_id,
|
||||||
|
'gross_amount': gross_amount,
|
||||||
|
'tax': tax,
|
||||||
|
'net_amount': net_amount,
|
||||||
|
'tax_period': self.get_current_period(),
|
||||||
|
'generated_at': datetime.now()
|
||||||
|
}
|
||||||
|
|
||||||
|
# 4. 存储并生成报表
|
||||||
|
await self.save_tax_record(tax_record)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'gross_amount': gross_amount,
|
||||||
|
'tax': tax,
|
||||||
|
'net_amount': net_amount,
|
||||||
|
'tax_certificate_url': f'/api/v1/tax/{tax_record.id}'
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 2. 计费精度解决方案
|
||||||
|
|
||||||
|
### 2.1 当前问题
|
||||||
|
|
||||||
|
- 浮点数精度问题
|
||||||
|
- 并发计费问题
|
||||||
|
- 退款处理问题
|
||||||
|
|
||||||
|
### 2.2 解决方案
|
||||||
|
|
||||||
|
#### 2.2.1 Decimal 精确计算
|
||||||
|
|
||||||
|
```python
|
||||||
|
from decimal import Decimal, ROUND_HALF_UP
|
||||||
|
from dataclasses import dataclass
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Money:
|
||||||
|
amount: Decimal
|
||||||
|
currency: str = 'USD'
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_float(cls, amount: float, currency: str = 'USD') -> 'Money':
|
||||||
|
# 从浮点数创建,避免精度问题
|
||||||
|
return cls(amount=Decimal(str(amount)), currency=currency)
|
||||||
|
|
||||||
|
def __add__(self, other: 'Money') -> 'Money':
|
||||||
|
if self.currency != other.currency:
|
||||||
|
raise CurrencyMismatchError()
|
||||||
|
return Money(amount=self.amount + other.amount, currency=self.currency)
|
||||||
|
|
||||||
|
def __mul__(self, multiplier: Decimal) -> 'Money':
|
||||||
|
return Money(amount=self.amount * multiplier, currency=self.currency)
|
||||||
|
|
||||||
|
def round(self, places: int = 2) -> 'Money':
|
||||||
|
quantizer = Decimal(10) ** -places
|
||||||
|
return Money(
|
||||||
|
amount=self.amount.quantize(quantizer, rounding=ROUND_HALF_UP),
|
||||||
|
currency=self.currency
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 2.2.2 高精度计费引擎
|
||||||
|
|
||||||
|
```python
|
||||||
|
class PrecisionBillingEngine:
|
||||||
|
"""高精度计费引擎"""
|
||||||
|
|
||||||
|
def calculate_cost(
|
||||||
|
self,
|
||||||
|
model: str,
|
||||||
|
usage: Usage,
|
||||||
|
price: Price
|
||||||
|
) -> Money:
|
||||||
|
# 1. 使用 Decimal 计算
|
||||||
|
input_cost = Decimal(str(usage.prompt_tokens)) * Decimal(str(price.input_per_1k)) / 1000
|
||||||
|
output_cost = Decimal(str(usage.completion_tokens)) * Decimal(str(price.output_per_1k)) / 1000
|
||||||
|
|
||||||
|
# 2. 计算总价
|
||||||
|
total = (input_cost + output_cost).quantize(
|
||||||
|
Decimal('0.01'),
|
||||||
|
rounding=ROUND_HALF_UP
|
||||||
|
)
|
||||||
|
|
||||||
|
# 3. 返回 Money 对象
|
||||||
|
return Money(amount=total, currency=price.currency)
|
||||||
|
|
||||||
|
def charge(
|
||||||
|
self,
|
||||||
|
user_id: int,
|
||||||
|
cost: Money,
|
||||||
|
transaction_id: str
|
||||||
|
) -> BillingResult:
|
||||||
|
# 1. 分布式锁防止并发
|
||||||
|
with self.distributed_lock(f'billing:{user_id}', timeout=5):
|
||||||
|
# 2. 获取当前余额
|
||||||
|
balance = self.get_balance(user_id)
|
||||||
|
|
||||||
|
# 3. 扣款
|
||||||
|
if balance < cost.amount:
|
||||||
|
raise InsufficientBalanceError()
|
||||||
|
|
||||||
|
new_balance = balance - cost.amount
|
||||||
|
self.set_balance(user_id, new_balance)
|
||||||
|
|
||||||
|
# 4. 记录流水
|
||||||
|
self.record_transaction(
|
||||||
|
user_id=user_id,
|
||||||
|
amount=-cost.amount,
|
||||||
|
transaction_id=transaction_id,
|
||||||
|
balance_after=new_balance
|
||||||
|
)
|
||||||
|
|
||||||
|
return BillingResult(
|
||||||
|
success=True,
|
||||||
|
transaction_id=transaction_id,
|
||||||
|
amount=cost.amount,
|
||||||
|
balance_after=new_balance
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 3. 结算风控解决方案
|
||||||
|
|
||||||
|
### 3.1 当前问题
|
||||||
|
|
||||||
|
- 虚假挂载风险
|
||||||
|
- 额度作弊风险
|
||||||
|
- 恶意退款风险
|
||||||
|
|
||||||
|
### 3.2 解决方案
|
||||||
|
|
||||||
|
#### 3.2.1 多维度风控
|
||||||
|
|
||||||
|
```python
|
||||||
|
class ProviderSettlementRiskController:
|
||||||
|
"""供应方结算风控"""
|
||||||
|
|
||||||
|
RISK_INDICATORS = {
|
||||||
|
'abnormal_usage_pattern': {
|
||||||
|
'weight': 30,
|
||||||
|
'check': lambda p: self.check_usage_pattern(p)
|
||||||
|
},
|
||||||
|
'low_verification_rate': {
|
||||||
|
'weight': 20,
|
||||||
|
'check': lambda p: self.check_verification_rate(p)
|
||||||
|
},
|
||||||
|
'high_refund_rate': {
|
||||||
|
'weight': 25,
|
||||||
|
'check': lambda p: self.check_refund_rate(p)
|
||||||
|
},
|
||||||
|
'new_account': {
|
||||||
|
'weight': 15,
|
||||||
|
'check': lambda p: self.check_account_age(p)
|
||||||
|
},
|
||||||
|
'inconsistent_income': {
|
||||||
|
'weight': 10,
|
||||||
|
'check': lambda p: self.check_income_consistency(p)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async def evaluate_settlement_risk(self, provider_id, amount) -> RiskResult:
|
||||||
|
"""评估结算风险"""
|
||||||
|
provider = await self.get_provider(provider_id)
|
||||||
|
|
||||||
|
# 1. 计算风险分数
|
||||||
|
risk_score = 0
|
||||||
|
risk_details = []
|
||||||
|
|
||||||
|
for indicator_name, config in self.RISK_INDICATORS.items():
|
||||||
|
is_risky, detail = config['check'](provider)
|
||||||
|
if is_risky:
|
||||||
|
risk_score += config['weight']
|
||||||
|
risk_details.append({
|
||||||
|
'indicator': indicator_name,
|
||||||
|
'detail': detail
|
||||||
|
})
|
||||||
|
|
||||||
|
# 2. 风险分级
|
||||||
|
if risk_score >= 70:
|
||||||
|
# 高风险:拒绝结算
|
||||||
|
return RiskResult(
|
||||||
|
level='HIGH',
|
||||||
|
action='BLOCK',
|
||||||
|
score=risk_score,
|
||||||
|
details=risk_details,
|
||||||
|
message='Settlement blocked due to high risk'
|
||||||
|
)
|
||||||
|
elif risk_score >= 40:
|
||||||
|
# 中风险:人工审核
|
||||||
|
await self.queue_for_review(provider_id, amount)
|
||||||
|
return RiskResult(
|
||||||
|
level='MEDIUM',
|
||||||
|
action='REVIEW',
|
||||||
|
score=risk_score,
|
||||||
|
details=risk_details,
|
||||||
|
message='Settlement queued for manual review'
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
# 低风险:正常结算
|
||||||
|
return RiskResult(
|
||||||
|
level='LOW',
|
||||||
|
action='APPROVE',
|
||||||
|
score=risk_score,
|
||||||
|
details=risk_details,
|
||||||
|
message='Settlement approved'
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 3.2.2 阶梯结算策略
|
||||||
|
|
||||||
|
```python
|
||||||
|
class TieredSettlement:
|
||||||
|
"""阶梯结算策略"""
|
||||||
|
|
||||||
|
TIERS = {
|
||||||
|
'new': {
|
||||||
|
'settlement_days': 30,
|
||||||
|
'max_daily_settlement': 1000,
|
||||||
|
'require_verification': True,
|
||||||
|
'保证金': 500
|
||||||
|
},
|
||||||
|
'stable': {
|
||||||
|
'settlement_days': 14,
|
||||||
|
'max_daily_settlement': 5000,
|
||||||
|
'require_verification': False,
|
||||||
|
'保证金': 0
|
||||||
|
},
|
||||||
|
'excellent': {
|
||||||
|
'settlement_days': 7,
|
||||||
|
'max_daily_settlement': None, # 无限制
|
||||||
|
'require_verification': False,
|
||||||
|
'保证金': 0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def calculate_settlement_limit(self, provider) -> dict:
|
||||||
|
"""计算结算限额"""
|
||||||
|
tier = self.get_provider_tier(provider)
|
||||||
|
|
||||||
|
# 基于历史结算金额动态调整
|
||||||
|
history_avg = self.get_avg_daily_settlement(provider.id, 30)
|
||||||
|
max_limit = self.TIERS[tier]['max_daily_settlement']
|
||||||
|
|
||||||
|
# 限制为历史平均的2倍,防止突然大额
|
||||||
|
if max_limit:
|
||||||
|
effective_limit = min(max_limit, history_avg * 2)
|
||||||
|
else:
|
||||||
|
effective_limit = history_avg * 2
|
||||||
|
|
||||||
|
return {
|
||||||
|
'tier': tier,
|
||||||
|
'settlement_days': self.TIERS[tier]['settlement_days'],
|
||||||
|
'daily_limit': effective_limit,
|
||||||
|
'require_verification': self.TIERS[tier]['require_verification']
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 4. 实施计划
|
||||||
|
|
||||||
|
### 4.1 任务分解
|
||||||
|
|
||||||
|
| 任务 | 负责人 | 截止 | 依赖 |
|
||||||
|
|------|--------|------|------|
|
||||||
|
| 法务合规确认 | 产品 | 立即 | - |
|
||||||
|
| 支付SDK集成 | 后端 | S0-M2 | 法务确认 |
|
||||||
|
| 税务计算模块 | 后端 | S0-M2 | - |
|
||||||
|
| Decimal计费引擎 | 后端 | S1前 | - |
|
||||||
|
| 结算风控模块 | 风控 | S0-M1 | - |
|
||||||
|
| 阶梯结算策略 | 后端 | S0-M1 | - |
|
||||||
|
|
||||||
|
### 4.2 验证标准
|
||||||
|
|
||||||
|
- 资金由第三方托管
|
||||||
|
- 计费精度 100%
|
||||||
|
- 结算风控拦截率 >95%
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
**文档状态**:业务解决方案
|
||||||
|
**关联文档**:
|
||||||
|
- `business_model_profitability_design_v1_2026-03-18.md`
|
||||||
|
- `supply_side_product_design_v1_2026-03-18.md`
|
||||||
420
docs/llm_gateway_competitor_function_business_2026-03-16.md
Normal file
420
docs/llm_gateway_competitor_function_business_2026-03-16.md
Normal file
@@ -0,0 +1,420 @@
|
|||||||
|
# 商用 LLM 通用转发网关竞品全景调研(功能清单 + 商业模型 + 技术优劣)
|
||||||
|
|
||||||
|
- 版本:v1.0(可评审版)
|
||||||
|
- 日期:2026-03-16
|
||||||
|
- 阶段定位:产品与市场阶段(非技术实现设计)
|
||||||
|
- 适用对象:产品负责人、商业负责人、技术管理层、投融资材料准备
|
||||||
|
|
||||||
|
## 0. 阶段声明
|
||||||
|
|
||||||
|
本文档用于回答“是否值得做、怎么切入、先做什么”三类问题。
|
||||||
|
|
||||||
|
本阶段**不展开**数据库、服务拆分、接口实现细节;仅输出:
|
||||||
|
|
||||||
|
1. 竞品功能清单与覆盖差异
|
||||||
|
2. 竞品商业模型与定价逻辑
|
||||||
|
3. 技术能力优劣(平台成熟度层面)
|
||||||
|
4. 我方可执行的产品切入与机会空白
|
||||||
|
|
||||||
|
## 1. 研究边界与方法
|
||||||
|
|
||||||
|
## 1.1 目标产品定义
|
||||||
|
|
||||||
|
目标产品是“商用 LLM 通用转发网关”,核心能力是:
|
||||||
|
|
||||||
|
1. 多模型统一接入(OpenAI 兼容/原生协议)
|
||||||
|
2. 路由与回退(成本、延迟、可用性、多策略)
|
||||||
|
3. 治理与风控(预算、配额、权限、审计)
|
||||||
|
4. 可观测与经营(成本看板、告警、账单、归因)
|
||||||
|
|
||||||
|
## 1.2 样本分层
|
||||||
|
|
||||||
|
本次样本分三层:
|
||||||
|
|
||||||
|
1. 商业化 AI 网关(直接竞争):OpenRouter、Portkey、Cloudflare AI Gateway、Helicone、Kong AI Gateway
|
||||||
|
2. 开源网关(价格天花板竞争):LiteLLM OSS、One-API、New-API、Sub2API
|
||||||
|
3. 云厂商平台(替代竞争):AWS Bedrock、Google Vertex AI、Azure AI
|
||||||
|
|
||||||
|
## 1.3 信息来源原则
|
||||||
|
|
||||||
|
1. 优先官方文档、官方定价页、官方仓库/官方 API
|
||||||
|
2. 对无法完全核验的字段,标注“公开信息未完全披露”
|
||||||
|
3. 开源热度采用 GitHub 官方 API(2026-03-16 当天采集)
|
||||||
|
|
||||||
|
## 2. 竞品分层与定位地图
|
||||||
|
|
||||||
|
| 层级 | 代表产品 | 主要购买方 | 主要卖点 | 主要短板/风险 |
|
||||||
|
|---|---|---|---|---|
|
||||||
|
| 商业 AI 网关 | OpenRouter / Portkey / Cloudflare / Helicone / Kong | AI 团队、平台团队、企业采购 | 快速上线、平台化治理、SLA/支持 | 价格与锁定、功能边界受平台约束 |
|
||||||
|
| 开源网关 | LiteLLM OSS / One-API / New-API / Sub2API | 成本敏感团队、DevOps 能力团队 | 零 license、可控可改、上线快 | 运维/合规/SLA 自担 |
|
||||||
|
| 云厂商替代 | Bedrock / Vertex / Azure AI | 已重度上云企业 | 合规与采购便利、生态整合 | 跨云治理弱、迁移成本高 |
|
||||||
|
|
||||||
|
## 3. 核心竞品标准化卡片(商业 + 开源)
|
||||||
|
|
||||||
|
## 3.1 OpenRouter
|
||||||
|
|
||||||
|
- 定位:多模型聚合与交易型路由平台,强调“统一 API + 提升可用性”。
|
||||||
|
- 目标客户:独立开发者、中小团队、需要快速接入多模型的产品团队。
|
||||||
|
- 关键能力:
|
||||||
|
1. 提供 provider routing(排序、fallback、参数兼容约束)
|
||||||
|
2. 支持按价格/延迟/吞吐路由偏好
|
||||||
|
3. 支持 BYOK 与数据策略控制字段
|
||||||
|
- 商业模型:
|
||||||
|
1. 推理价格按上游透传
|
||||||
|
2. 充值时收取平台费(FAQ 公布 5.5%,最低 0.8 美元)
|
||||||
|
3. BYOK 超阈值后按比例收取服务费
|
||||||
|
- 优势:模型覆盖广、接入速度快、路由能力产品化强。
|
||||||
|
- 短板:企业私有化与深度合规能力相对有限,采购链路偏“开发者自助”。
|
||||||
|
|
||||||
|
## 3.2 Portkey
|
||||||
|
|
||||||
|
- 定位:AI Gateway + Guardrails + Observability 一体化生产平台。
|
||||||
|
- 目标客户:中大型 AI 团队、强调治理与生产稳定性的组织。
|
||||||
|
- 关键能力:
|
||||||
|
1. 配置化路由(负载、回退、可组合策略)
|
||||||
|
2. 观测、告警、策略控制、治理能力并列提供
|
||||||
|
3. 强调组织级运营能力(跨项目管理)
|
||||||
|
- 商业模型:
|
||||||
|
1. 公开套餐化(官网展示免费/成长/企业路径)
|
||||||
|
2. 企业合同与支持服务并行
|
||||||
|
3. 常见模式为平台订阅 + 用量增长
|
||||||
|
- 优势:功能面完整,偏“生产控制台”而非单纯转发器。
|
||||||
|
- 短板:对小团队可能偏重,学习和治理配置复杂度高于轻量网关。
|
||||||
|
|
||||||
|
## 3.3 Cloudflare AI Gateway
|
||||||
|
|
||||||
|
- 定位:依托 Cloudflare 网络边缘能力的 AI 网关与观测层。
|
||||||
|
- 目标客户:已使用 Cloudflare 的开发团队与企业平台团队。
|
||||||
|
- 关键能力:
|
||||||
|
1. OpenAI 兼容统一端点
|
||||||
|
2. 日志、缓存、限流、重试与回退
|
||||||
|
3. 动态路由(条件、比例、预算限制、版本管理)
|
||||||
|
- 商业模型:
|
||||||
|
1. 核心功能免费(文档明确)
|
||||||
|
2. 持久日志与 Logpush 基于配额/计划计费
|
||||||
|
3. 企业版价格走客户经理/合同
|
||||||
|
- 优势:边缘网络与全球可用性、与现有 Cloudflare 体系集成紧密。
|
||||||
|
- 短板:跨云中立能力虽有,但企业常会被其生态绑定。
|
||||||
|
|
||||||
|
## 3.4 Helicone
|
||||||
|
|
||||||
|
- 定位:LLM 观测与成本优化平台,内含网关能力。
|
||||||
|
- 目标客户:先监控再治理的成长型 AI 团队。
|
||||||
|
- 关键能力:
|
||||||
|
1. 请求级追踪、分析、评估
|
||||||
|
2. 网关能力(缓存、限流、fallback 等)
|
||||||
|
3. 注重成本可视化与运营闭环
|
||||||
|
- 商业模型:
|
||||||
|
1. 公开分层:Hobby 免费、Pro/Team/Enterprise
|
||||||
|
2. 座席 + 用量计费并行
|
||||||
|
3. 企业版提供 SSO、私有化、合同能力
|
||||||
|
- 优势:观测产品成熟、上手快、性价比高。
|
||||||
|
- 短板:在“企业级治理深度”上通常弱于重平台化网关。
|
||||||
|
|
||||||
|
## 3.5 Kong AI Gateway
|
||||||
|
|
||||||
|
- 定位:从 API Gateway 领军平台延展至 AI 流量治理与安全。
|
||||||
|
- 目标客户:大型企业、已有 API 平台治理基础的组织。
|
||||||
|
- 关键能力:
|
||||||
|
1. Universal API(多模型统一接入)
|
||||||
|
2. AI 插件体系(安全、治理、路由、观测、RAG 注入)
|
||||||
|
3. 企业级身份、审计、控制面整合
|
||||||
|
- 商业模型:
|
||||||
|
1. 平台订阅(Konnect Plus/Enterprise)
|
||||||
|
2. AI 能力作为 add-on 或模型代理配额收费
|
||||||
|
3. 企业合同主导
|
||||||
|
- 优势:企业采购成熟、治理与安全体系完整、组织落地能力强。
|
||||||
|
- 短板:成本和引入门槛较高,中小团队早期采用摩擦大。
|
||||||
|
|
||||||
|
## 3.6 LiteLLM OSS / Enterprise
|
||||||
|
|
||||||
|
- 定位:开发者友好的多模型统一 SDK + Proxy(OpenAI 兼容)。
|
||||||
|
- 目标客户:平台工程团队、希望自建网关的组织。
|
||||||
|
- 关键能力:
|
||||||
|
1. 多 provider 统一调用与代理
|
||||||
|
2. 虚拟 Key、预算、团队治理、回退与负载
|
||||||
|
3. 企业版补充 SSO/SCIM/JWT/支持服务
|
||||||
|
- 商业模型:
|
||||||
|
1. OSS 免费(MIT)
|
||||||
|
2. Enterprise 合同收费(SaaS 或自管)
|
||||||
|
- 优势:生态活跃、扩展快、可作为自建底座。
|
||||||
|
- 短板:OSS 到企业级落地仍需工程投入,复杂场景需要二次治理。
|
||||||
|
|
||||||
|
## 3.7 One-API(开源)
|
||||||
|
|
||||||
|
- 定位:经典 API 聚合与二次分发系统。
|
||||||
|
- 目标客户:预算敏感、追求快速上线的团队。
|
||||||
|
- 关键能力:
|
||||||
|
1. 多模型统一入口
|
||||||
|
2. 基础分发与额度管理
|
||||||
|
3. 部署简单、社区基础大
|
||||||
|
- 商业模型:开源为主(MIT)。
|
||||||
|
- 优势:部署快、学习成本低。
|
||||||
|
- 短板:高级调度、企业治理、可观测深度相对有限。
|
||||||
|
|
||||||
|
## 3.8 New-API(开源)
|
||||||
|
|
||||||
|
- 定位:高活跃多协议聚合平台,强调 OpenAI/Claude/Gemini 兼容。
|
||||||
|
- 目标客户:需要多协议快速接入的中小团队。
|
||||||
|
- 关键能力:
|
||||||
|
1. 协议覆盖广
|
||||||
|
2. 预扣/结算计费链路、重试与通道治理
|
||||||
|
3. 高迭代发布节奏
|
||||||
|
- 商业模型:开源为主,AGPLv3。
|
||||||
|
- 优势:功能覆盖广、更新频繁。
|
||||||
|
- 短板:AGPL 合规约束对闭源商用影响显著。
|
||||||
|
|
||||||
|
## 3.9 Sub2API(开源,高增长)
|
||||||
|
|
||||||
|
- 定位:面向订阅配额分发的 AI API Gateway。
|
||||||
|
- 目标客户:对成本非常敏感、希望快速中转的团队。
|
||||||
|
- 关键能力:
|
||||||
|
1. 多账户/多 key/精细计费
|
||||||
|
2. 调度策略较深(会话粘性 + 负载)
|
||||||
|
3. 运营控制面完整度在开源项目中较强
|
||||||
|
- 商业模型:开源(MIT),以社区传播驱动。
|
||||||
|
- 优势:增长快、调度与运营字段较完整。
|
||||||
|
- 短板:企业合规、条款风险、商业支持体系需自担。
|
||||||
|
|
||||||
|
## 4. 竞品功能清单(54 项)
|
||||||
|
|
||||||
|
说明:以下为“商用 LLM 网关”功能全景清单,按产品能力域分组。建议你们把它作为 PRD 功能池与版本裁剪基线。
|
||||||
|
|
||||||
|
## 4.1 接入与协议(A01-A10)
|
||||||
|
|
||||||
|
| ID | 功能项 | 业务价值 |
|
||||||
|
|---|---|---|
|
||||||
|
| A01 | OpenAI 兼容接口 | 降低迁移成本 |
|
||||||
|
| A02 | 原生协议透传(Anthropic/Gemini 等) | 覆盖更多场景 |
|
||||||
|
| A03 | 多模型统一模型目录 | 减少模型选择成本 |
|
||||||
|
| A04 | 多 provider 凭证管理 | 支持多供应商并行 |
|
||||||
|
| A05 | BYOK(自带上游 Key) | 满足企业合规与成本控制 |
|
||||||
|
| A06 | 统一 SDK/客户端 | 降低接入复杂度 |
|
||||||
|
| A07 | 流式输出兼容 | 保障交互体验 |
|
||||||
|
| A08 | 多模态接口支持(图像/音频) | 拓展业务边界 |
|
||||||
|
| A09 | 批处理接口 | 降本增效 |
|
||||||
|
| A10 | 模型能力标签(上下文、工具调用、响应格式) | 提高路由准确率 |
|
||||||
|
|
||||||
|
## 4.2 路由与弹性(R01-R10)
|
||||||
|
|
||||||
|
| ID | 功能项 | 业务价值 |
|
||||||
|
|---|---|---|
|
||||||
|
| R01 | 基础负载均衡 | 提升吞吐稳定性 |
|
||||||
|
| R02 | 主备回退链 | 提升可用性 |
|
||||||
|
| R03 | 重试策略(指数退避) | 降低瞬时失败 |
|
||||||
|
| R04 | 会话粘性路由 | 保障上下文连续性 |
|
||||||
|
| R05 | 按成本排序路由 | 降低单位调用成本 |
|
||||||
|
| R06 | 按延迟排序路由 | 优化用户体验 |
|
||||||
|
| R07 | 按吞吐排序路由 | 优化批量任务效率 |
|
||||||
|
| R08 | 条件路由(用户分层/场景) | 精细化运营 |
|
||||||
|
| R09 | 灰度/比例路由(A/B) | 降低模型切换风险 |
|
||||||
|
| R10 | 熔断与自动恢复 | 避免故障扩散 |
|
||||||
|
|
||||||
|
## 4.3 治理与安全(G01-G10)
|
||||||
|
|
||||||
|
| ID | 功能项 | 业务价值 |
|
||||||
|
|---|---|---|
|
||||||
|
| G01 | 多租户隔离 | 支持企业组织结构 |
|
||||||
|
| G02 | RBAC 权限模型 | 降低误操作风险 |
|
||||||
|
| G03 | API Key 生命周期管理 | 保障密钥安全 |
|
||||||
|
| G04 | SSO/OIDC/SAML | 企业集成必备 |
|
||||||
|
| G05 | 审计日志 | 满足审计与追责 |
|
||||||
|
| G06 | 敏感信息脱敏/PII 处理 | 合规要求 |
|
||||||
|
| G07 | 模型白名单/黑名单 | 规范调用范围 |
|
||||||
|
| G08 | 提示词防护/内容安全 | 降低安全与品牌风险 |
|
||||||
|
| G09 | 区域与数据边界策略 | 满足数据驻留要求 |
|
||||||
|
| G10 | 策略版本与回滚 | 降低配置变更风险 |
|
||||||
|
|
||||||
|
## 4.4 成本与计费(C01-C08)
|
||||||
|
|
||||||
|
| ID | 功能项 | 业务价值 |
|
||||||
|
|---|---|---|
|
||||||
|
| C01 | 请求级 token 成本核算 | 精准经营 |
|
||||||
|
| C02 | 预扣-结算-退款链路 | 账实一致 |
|
||||||
|
| C03 | 预算(租户/团队/key) | 防止失控消费 |
|
||||||
|
| C04 | 配额周期管理(日/周/月) | 可运营化控制 |
|
||||||
|
| C05 | 成本归因(项目/用户/功能) | ROI 分析 |
|
||||||
|
| C06 | 成本告警(阈值/异常) | 及时止损 |
|
||||||
|
| C07 | 成本优化建议(模型替代) | 持续降本 |
|
||||||
|
| C08 | 发票/账单导出 | 财务对账 |
|
||||||
|
|
||||||
|
## 4.5 可观测与运维(O01-O08)
|
||||||
|
|
||||||
|
| ID | 功能项 | 业务价值 |
|
||||||
|
|---|---|---|
|
||||||
|
| O01 | 请求日志检索 | 故障排查 |
|
||||||
|
| O02 | 成功率/延迟监控 | SLA 管理 |
|
||||||
|
| O03 | 模型级性能对比 | 路由优化 |
|
||||||
|
| O04 | Trace 级链路追踪 | 定位瓶颈 |
|
||||||
|
| O05 | 自定义标签/元数据 | 业务分析 |
|
||||||
|
| O06 | 告警中心(邮件/IM/Webhook) | 快速响应 |
|
||||||
|
| O07 | 仪表盘与运营看板 | 管理层可视化 |
|
||||||
|
| O08 | 长期日志归档与检索 | 审计与复盘 |
|
||||||
|
|
||||||
|
## 4.6 生态与交付(E01-E08)
|
||||||
|
|
||||||
|
| ID | 功能项 | 业务价值 |
|
||||||
|
|---|---|---|
|
||||||
|
| E01 | SaaS 部署 | 快速上线 |
|
||||||
|
| E02 | 私有化部署 | 满足合规 |
|
||||||
|
| E03 | 混合部署 | 平衡灵活与安全 |
|
||||||
|
| E04 | Terraform/声明式配置 | 平台工程标准化 |
|
||||||
|
| E05 | Webhook/事件订阅 | 与业务系统联动 |
|
||||||
|
| E06 | 插件机制 | 可扩展能力 |
|
||||||
|
| E07 | SIEM/Observability 集成 | 融合企业工具链 |
|
||||||
|
| E08 | 商业支持与 SLA | 降低生产风险 |
|
||||||
|
|
||||||
|
## 5. 关键能力覆盖矩阵(P0 级 20 项)
|
||||||
|
|
||||||
|
说明:`✅`=公开明确支持,`◐`=部分/需组合实现,`-`=未见公开明确说明。
|
||||||
|
|
||||||
|
| 功能 | OpenRouter | Portkey | Cloudflare | Helicone | Kong | LiteLLM OSS | One-API | New-API | Sub2API |
|
||||||
|
|---|---|---|---|---|---|---|---|---|---|
|
||||||
|
| OpenAI 兼容接口 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||||
|
| 多 provider 路由 | ✅ | ✅ | ✅ | ◐ | ✅ | ✅ | ◐ | ✅ | ✅ |
|
||||||
|
| 自动 fallback | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ◐ | ✅ | ✅ |
|
||||||
|
| 负载均衡 | ◐ | ✅ | ✅ | ◐ | ✅ | ✅ | ◐ | ✅ | ✅ |
|
||||||
|
| BYOK | ✅ | ✅ | ✅ | ◐ | ✅ | ✅ | ◐ | ◐ | ◐ |
|
||||||
|
| 缓存 | ◐ | ✅ | ✅ | ✅ | ✅ | ✅ | ◐ | ◐ | ◐ |
|
||||||
|
| 限流 | ◐ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||||
|
| 预算管理 | ◐ | ✅ | ✅(动态路由预算节点) | ◐ | ✅ | ✅ | ◐ | ✅ | ✅ |
|
||||||
|
| 成本看板 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ◐ | ✅ | ✅ |
|
||||||
|
| 组织/团队管理 | ◐ | ✅ | ◐ | ✅ | ✅ | ✅ | ◐ | ✅ | ✅ |
|
||||||
|
| SSO/SAML/OIDC | 企业版 | 企业版 | Cloudflare 体系 | 企业版 | 企业版 | 企业版 | - | - | - |
|
||||||
|
| 审计日志 | ◐ | ✅ | ✅ | ✅ | ✅ | 企业版 | - | ◐ | ◐ |
|
||||||
|
| 可观测追踪 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ◐ | ✅ | ✅ |
|
||||||
|
| 灰度/比例路由 | ◐ | ✅ | ✅ | ◐ | ✅ | ◐ | - | ◐ | ◐ |
|
||||||
|
| 数据策略/隐私控制 | ✅ | ✅ | ✅ | ✅ | ✅ | ◐ | - | ◐ | ◐ |
|
||||||
|
| 策略可组合 | ◐ | ✅ | ✅ | ◐ | ✅ | ◐ | - | ◐ | ◐ |
|
||||||
|
| 私有化部署 | - | 可选 | - | 企业版 | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||||
|
| 插件/扩展能力 | ◐ | ◐ | ◐ | ◐ | ✅ | ✅ | ◐ | ✅ | ✅ |
|
||||||
|
| 企业 SLA 支持 | 企业版 | ✅ | ✅ | ✅ | ✅ | 企业版 | - | - | - |
|
||||||
|
| 合规能力(SOC2/HIPAA 等) | ◐ | 企业版 | Cloudflare 体系 | Team/Enterprise 提供 | 企业版 | 企业版 | - | - | - |
|
||||||
|
|
||||||
|
## 6. 商业模型矩阵(重点)
|
||||||
|
|
||||||
|
## 6.1 模型类型
|
||||||
|
|
||||||
|
| 商业模型 | 代表玩家 | 收费逻辑 | 对客户吸引点 | 对我方的启示 |
|
||||||
|
|---|---|---|---|---|
|
||||||
|
| 推理透传 + 平台费 | OpenRouter | 模型价透传,充值/服务费 | 成本透明、上手快 | 可作为“低摩擦入口”模型 |
|
||||||
|
| 订阅 + 用量 | Helicone、Portkey(常见) | 基础套餐 + 请求/存储/事件增量 | 可预测支出 + 可扩展 | 适合 SaaS 中腰部客户 |
|
||||||
|
| 平台订阅 + AI Add-on | Kong | 基础平台 + AI 模块附加 | 企业治理统一采购 | 高客单价但销售周期长 |
|
||||||
|
| 云生态捆绑计费 | Cloudflare | 计划配额 + 增量特性计费 | 与现有云资源整合 | 强生态对抗靠跨云中立 |
|
||||||
|
| 开源免费 + 企业授权 | LiteLLM | OSS 免费,企业版收费 | 先试后买、开发者友好 | 建议采用双轮驱动策略 |
|
||||||
|
| 开源自建 | One-API/New-API/Sub2API | license 免费,运维自担 | 最低软件成本 | 迫使商用版必须提供“非代码价值” |
|
||||||
|
|
||||||
|
## 6.2 定价指标(行业常见)
|
||||||
|
|
||||||
|
1. 按请求量(requests)
|
||||||
|
2. 按日志事件/存储量(logs/events/storage)
|
||||||
|
3. 按模型代理数(model proxy)
|
||||||
|
4. 按组织/座席(org/seats)
|
||||||
|
5. 按账单量或平台分润(billing volume/fee)
|
||||||
|
6. 按企业功能包(SSO/审计/私有化/SLA)
|
||||||
|
|
||||||
|
## 6.3 商业模型优劣
|
||||||
|
|
||||||
|
| 模式 | 优点 | 缺点 | 适配阶段 |
|
||||||
|
|---|---|---|---|
|
||||||
|
| 纯加价分润 | 规则简单、现金流快 | 同质化强、被价格战击穿 | 不建议作为主模式 |
|
||||||
|
| 纯订阅 | 收入稳定、可预测 | 初期成交门槛高 | 中后期可强化 |
|
||||||
|
| 混合(BYOK + 订阅 + 企业年约) | 可兼顾增长与利润 | 产品和销售复杂度更高 | 建议主路径 |
|
||||||
|
|
||||||
|
## 7. 技术优劣(平台成熟度视角)
|
||||||
|
|
||||||
|
评分范围:1(弱)-5(强),用于商业判断,不代表代码质量绝对值。
|
||||||
|
|
||||||
|
| 维度 | OpenRouter | Portkey | Cloudflare | Helicone | Kong | LiteLLM | One-API | New-API | Sub2API |
|
||||||
|
|---|---:|---:|---:|---:|---:|---:|---:|---:|---:|
|
||||||
|
| 接入广度 | 5 | 5 | 4 | 4 | 5 | 5 | 4 | 5 | 4 |
|
||||||
|
| 路由策略深度 | 5 | 5 | 4 | 3 | 5 | 4 | 3 | 4 | 4 |
|
||||||
|
| 成本治理能力 | 4 | 5 | 4 | 4 | 4 | 4 | 3 | 4 | 4 |
|
||||||
|
| 企业治理能力 | 3 | 5 | 4 | 4 | 5 | 4 | 2 | 3 | 3 |
|
||||||
|
| 可观测成熟度 | 4 | 5 | 4 | 5 | 5 | 4 | 2 | 4 | 4 |
|
||||||
|
| 部署灵活性 | 2 | 4 | 2 | 4 | 5 | 5 | 5 | 5 | 5 |
|
||||||
|
| 合规与采购友好 | 3 | 4 | 5 | 4 | 5 | 4 | 2 | 2 | 2 |
|
||||||
|
| 生态与渠道能力 | 4 | 4 | 5 | 3 | 5 | 4 | 3 | 3 | 3 |
|
||||||
|
|
||||||
|
### 关键观察
|
||||||
|
|
||||||
|
1. 商业化赛道中,Portkey/Kong 更偏“企业治理平台”;OpenRouter 更偏“交易聚合路由”。
|
||||||
|
2. Cloudflare 的边缘与平台集成强,但跨云中立价值是其薄弱环节。
|
||||||
|
3. Helicone 在观测与运营视角强,适合作为“先观测后治理”的切入。
|
||||||
|
4. 开源赛道在“基础能力”上已足够成熟,商业产品必须强化治理、合规、SLA、组织协同价值。
|
||||||
|
|
||||||
|
## 8. 开源项目运营信号(GitHub 官方 API)
|
||||||
|
|
||||||
|
采样时间:2026-03-16。
|
||||||
|
|
||||||
|
| 项目 | Star | Fork | License | 最近推送 | 备注 |
|
||||||
|
|---|---:|---:|---|---|---|
|
||||||
|
| LiteLLM | 39,228 | 6,434 | MIT(仓库 API 显示 NOASSERTION,仓库内 LICENSE 为 MIT) | 2026-03-16 | 社区与生态最强 |
|
||||||
|
| One-API | 30,599 | 5,838 | MIT | 2026-01-09 | 中文社区影响力大 |
|
||||||
|
| New-API | 21,019 | 4,067 | AGPLv3 | 2026-03-16 | 高活跃,高合规约束 |
|
||||||
|
| Sub2API | 5,915 | 894 | MIT | 2026-03-16 | 增长快,运营导向明显 |
|
||||||
|
|
||||||
|
## 9. 机会空白(可用于你们产品定位)
|
||||||
|
|
||||||
|
## 9.1 市场尚未被充分满足的需求
|
||||||
|
|
||||||
|
1. 多租户 FinOps 自动化:不仅展示成本,还要自动给出路由降本建议并可回放验证。
|
||||||
|
2. 业务语义路由:按“场景目标(准确率/速度/预算)”驱动路由,而非仅按模型名。
|
||||||
|
3. 财务对账闭环:预算、账单、团队归因、异常追责一体化。
|
||||||
|
4. 合规策略模板:按行业(金融/医疗/出海)提供可落地策略包。
|
||||||
|
5. “开源友好 + 企业可升级”:让技术团队先低成本接入,再平滑升级到企业治理版。
|
||||||
|
|
||||||
|
## 9.2 建议定位语(可选)
|
||||||
|
|
||||||
|
“不是另一个 API 转发器,而是企业 AI 成本与治理控制面。”
|
||||||
|
|
||||||
|
## 10. 对你项目的可执行结论
|
||||||
|
|
||||||
|
1. 基础接入和转发能力不是壁垒,必须用“治理 + FinOps + 合规”打差异化。
|
||||||
|
2. 商业模型建议从第一天就采用混合:
|
||||||
|
- 自助版(BYOK + 免费额度)
|
||||||
|
- 增长版(订阅 + 用量)
|
||||||
|
- 企业版(年约 + 私有化 + SLA)
|
||||||
|
3. 首发版本不要追“模型最多”,而要追“企业采购可解释价值”:
|
||||||
|
- 成本可控
|
||||||
|
- 风险可控
|
||||||
|
- 责任可追溯
|
||||||
|
|
||||||
|
## 11. 下一轮补充调研(建议 2 周内)
|
||||||
|
|
||||||
|
1. 客户访谈:
|
||||||
|
- 10 家中型 AI 团队(技术负责人)
|
||||||
|
- 5 家企业安全/合规负责人
|
||||||
|
2. 商务访谈:
|
||||||
|
- 5 位有采购权的 IT/平台负责人
|
||||||
|
3. 定价调研:
|
||||||
|
- 做一轮 Van Westendorp(100+ 样本)
|
||||||
|
- 验证“按组织 + 按请求 + 企业功能包”接受度
|
||||||
|
4. 竞品实测:
|
||||||
|
- 选 3 家商业网关做 2 周试运行
|
||||||
|
- 对比接入时长、治理能力、成本可解释性
|
||||||
|
|
||||||
|
## 12. 参考来源(官方优先)
|
||||||
|
|
||||||
|
- OpenRouter Provider Routing: https://openrouter.ai/docs/features/provider-routing
|
||||||
|
- OpenRouter FAQ(费用与 BYOK): https://openrouter.ai/docs/faq
|
||||||
|
- Portkey 官网: https://portkey.ai/
|
||||||
|
- Portkey Fallbacks: https://docs.portkey.ai/docs/product/ai-gateway/fallbacks
|
||||||
|
- Portkey Load Balancing: https://docs.portkey.ai/docs/product/ai-gateway/load-balancing
|
||||||
|
- Cloudflare AI Gateway 概览: https://developers.cloudflare.com/ai-gateway/
|
||||||
|
- Cloudflare Unified API: https://developers.cloudflare.com/ai-gateway/usage/chat-completion/
|
||||||
|
- Cloudflare Dynamic Routing: https://developers.cloudflare.com/ai-gateway/features/dynamic-routing/
|
||||||
|
- Cloudflare Pricing: https://developers.cloudflare.com/ai-gateway/reference/pricing/
|
||||||
|
- Helicone Pricing: https://www.helicone.ai/pricing
|
||||||
|
- Kong AI Gateway 产品页: https://konghq.com/products/kong-ai-gateway
|
||||||
|
- Kong AI Gateway 文档: https://developer.konghq.com/ai-gateway/
|
||||||
|
- Kong AI Proxy 插件: https://docs.konghq.com/hub/kong-inc/ai-proxy/
|
||||||
|
- Kong Pricing: https://konghq.com/pricing
|
||||||
|
- LiteLLM 官网: https://www.litellm.ai/
|
||||||
|
- LiteLLM Enterprise: https://www.litellm.ai/enterprise
|
||||||
|
- LiteLLM Virtual Keys: https://docs.litellm.ai/docs/proxy/virtual_keys
|
||||||
|
- GitHub API - one-api: https://api.github.com/repos/songquanpeng/one-api
|
||||||
|
- GitHub API - new-api: https://api.github.com/repos/QuantumNous/new-api
|
||||||
|
- GitHub API - litellm: https://api.github.com/repos/BerriAI/litellm
|
||||||
|
- GitHub API - sub2api: https://api.github.com/repos/Wei-Shaw/sub2api
|
||||||
|
|
||||||
636
docs/llm_gateway_market_competition_report_2026-03-14.md
Normal file
636
docs/llm_gateway_market_competition_report_2026-03-14.md
Normal file
@@ -0,0 +1,636 @@
|
|||||||
|
# 商用 LLM 通用转发网关市场竞争调研报告(详版)
|
||||||
|
|
||||||
|
- 版本:v1.1
|
||||||
|
- 日期:2026-03-16
|
||||||
|
- 适用对象:创业团队、产品负责人、投融资材料准备、企业内部立项评审
|
||||||
|
- 研究对象:面向企业/开发者的“多模型统一接入 + 路由 + 治理 + 计费”的商用网关平台
|
||||||
|
|
||||||
|
## 1. 执行摘要
|
||||||
|
|
||||||
|
本报告结论:
|
||||||
|
|
||||||
|
1. “统一 API 转发”本身已经商品化,单点价值不足,难形成长期壁垒。
|
||||||
|
2. 市场仍有空间,但价值中心已从“接模型”转向“成本治理 + 质量路由 + 企业合规”。
|
||||||
|
3. 可行切入点是“企业级多租户网关 + FinOps(成本优化)+ Governance(策略治理)”。
|
||||||
|
4. 竞争环境可分三层:
|
||||||
|
- 第一层:AI 原生网关平台(OpenRouter、Portkey、LiteLLM、Cloudflare AI Gateway、Kong AI Gateway、Helicone)。
|
||||||
|
- 第二层:云厂商模型平台(AWS Bedrock、Google Vertex AI、Azure OpenAI/AI Foundry、阿里云百炼、百度千帆、火山方舟、腾讯混元)。
|
||||||
|
- 第三层:开源自建与代理方案(LiteLLM OSS、One API/New API、Sub2api 等)。
|
||||||
|
5. 商业化上,推荐采用“BYOK 订阅 + 代付加价 + 企业版年约”的混合模型,而不是纯 token 差价。
|
||||||
|
|
||||||
|
[推断] 未来 12-24 个月,赛道核心竞争将集中在:
|
||||||
|
- 路由质量评估能力(不是规则配置数量)
|
||||||
|
- 跨供应商成本优化与预算闭环
|
||||||
|
- 企业合规(数据边界、审计、权限)
|
||||||
|
- 生态集成(CI/CD、API 网关、可观测、计费系统)
|
||||||
|
|
||||||
|
## 2. 研究方法与边界
|
||||||
|
|
||||||
|
## 2.1 方法
|
||||||
|
|
||||||
|
1. 以官方文档、官方定价页、官方产品页为主(优先级最高)。
|
||||||
|
2. 对同一能力尽量使用“功能-定价-政策”三证合一验证。
|
||||||
|
3. 对缺失字段明确标注“公开信息未披露”,不做强行推断。
|
||||||
|
|
||||||
|
## 2.2 时间边界
|
||||||
|
|
||||||
|
- 数据核验时间:2026-03-16。
|
||||||
|
- 由于 LLM 价格、模型列表、预览版策略变化频繁,建议每月滚动更新一次关键字段。
|
||||||
|
|
||||||
|
## 2.3 范围边界
|
||||||
|
|
||||||
|
- 本报告聚焦“网关与路由平台竞争”,不展开基础模型能力评测(例如推理准确率 benchmark)。
|
||||||
|
- 不将纯应用层 AI 产品(聊天助手、Agent 应用)视作直接竞争者。
|
||||||
|
|
||||||
|
## 3. 赛道定义与价值链
|
||||||
|
|
||||||
|
目标产品(你要做的)在价值链中的位置:
|
||||||
|
|
||||||
|
1. 上游:模型与推理供应商(OpenAI、Anthropic、Google、DeepSeek、xAI、云厂商托管模型)。
|
||||||
|
2. 中游:网关与治理层(统一 API、路由、缓存、回退、策略、计费、审计)。
|
||||||
|
3. 下游:企业应用与开发团队(客服、销售、研发助手、内容生产、行业 Copilot)。
|
||||||
|
|
||||||
|
网关层必须解决的核心业务问题:
|
||||||
|
|
||||||
|
1. 成本:在质量约束下持续降本。
|
||||||
|
2. 稳定性:供应商故障/限流时的弹性切换。
|
||||||
|
3. 可控性:多租户预算、权限、审计、数据策略。
|
||||||
|
4. 运维效率:统一日志、可观测、告警、SLA 追踪。
|
||||||
|
|
||||||
|
## 4. 竞争格局总览(分层)
|
||||||
|
|
||||||
|
## 4.1 第一层:AI 原生网关(直接竞争)
|
||||||
|
|
||||||
|
### 4.1.1 OpenRouter
|
||||||
|
|
||||||
|
- 定位:聚合多模型、多提供商路由与支付结算。
|
||||||
|
- 关键能力(官方):
|
||||||
|
- Provider 路由排序(价格、上下文长度、吞吐、延迟等维度)。
|
||||||
|
- 自动回退(allow_fallbacks)。
|
||||||
|
- 支持提示词缓存控制与缓存命中观测字段。
|
||||||
|
- 隐私策略默认日志保留期与零日志能力选项。
|
||||||
|
- 风险:
|
||||||
|
- 在“聚合器 + 交易撮合”模式下,企业客户可能关注合规与专属部署能力。
|
||||||
|
|
||||||
|
### 4.1.2 Portkey
|
||||||
|
|
||||||
|
- 定位:AI Gateway + Guardrails + Observability 的一体平台。
|
||||||
|
- 关键能力(官方):
|
||||||
|
- 统一网关、流量管控、策略与插件式增强。
|
||||||
|
- 明确的套餐分层(含免费、增长、专业与企业)。
|
||||||
|
- 风险:
|
||||||
|
- 面向中大型团队时,功能覆盖较完整,你的差异化需要集中在更强的成本优化算法和行业模板。
|
||||||
|
|
||||||
|
### 4.1.3 LiteLLM(OSS + 企业版生态)
|
||||||
|
|
||||||
|
- 定位:OpenAI 兼容的多供应商接入层,偏开发者与平台工程团队。
|
||||||
|
- 关键能力(官方):
|
||||||
|
- 广泛 provider 兼容(文档列出大量模型来源)。
|
||||||
|
- 支持网关代理方式,易作为企业自建基础层。
|
||||||
|
- 风险:
|
||||||
|
- 作为开源底座,压缩了“基础转发”的商业溢价空间。
|
||||||
|
|
||||||
|
### 4.1.4 Cloudflare AI Gateway
|
||||||
|
|
||||||
|
- 定位:依托 Cloudflare 边缘网络的 AI 请求网关与日志治理。
|
||||||
|
- 关键能力(官方):
|
||||||
|
- 统一代理接入、日志与监控。
|
||||||
|
- 计费以日志量为核心(公开有免费档与按量单价)。
|
||||||
|
- 风险:
|
||||||
|
- 基础网关能力强,若客户已在 Cloudflare 体系内,你的切入点需强调“跨云/跨地域统一治理 + 成本优化深度”。
|
||||||
|
|
||||||
|
### 4.1.5 Kong AI Gateway
|
||||||
|
|
||||||
|
- 定位:企业 API 管理厂商向 AI 治理延展。
|
||||||
|
- 关键能力(官方):
|
||||||
|
- 基于成熟 API Gateway 体系,提供 AI 流量治理、令牌使用控制、敏感信息防护等。
|
||||||
|
- 风险:
|
||||||
|
- 在大型企业采购链路中,Kong 具有既有渠道优势(API 平台团队熟悉)。
|
||||||
|
|
||||||
|
### 4.1.6 Helicone(网关 + 可观测方向)
|
||||||
|
|
||||||
|
- 定位:LLM 可观测与优化平台,提供网关能力。
|
||||||
|
- 关键能力(公开页面):
|
||||||
|
- 强调日志、追踪、成本可视化,网关能力迭代中。
|
||||||
|
- 风险:
|
||||||
|
- 若其网关成熟度继续提升,会与中小团队“先监控后治理”的需求强耦合。
|
||||||
|
|
||||||
|
## 4.2 第二层:云厂商模型平台(强替代竞争)
|
||||||
|
|
||||||
|
这类平台不总以“独立网关”售卖,但通过“官方托管模型 + 统一调用 + 企业治理”形成强替代。
|
||||||
|
|
||||||
|
### 4.2.1 AWS Bedrock
|
||||||
|
|
||||||
|
- 强项:
|
||||||
|
- 多模型家族接入、企业合规、与 AWS 生态深度融合。
|
||||||
|
- 官方明确不同模型/模式计费(包括按需、批处理等差异)。
|
||||||
|
- 对你威胁:
|
||||||
|
- 对 AWS 深度用户,Bedrock 可直接替代第三方网关。
|
||||||
|
|
||||||
|
### 4.2.2 Google Vertex AI + Gemini
|
||||||
|
|
||||||
|
- 强项:
|
||||||
|
- 平台侧治理、企业安全、与 GCP 数据平台联动。
|
||||||
|
- Gemini API 与 Vertex 定价路径明确,便于采购。
|
||||||
|
- 对你威胁:
|
||||||
|
- 若客户主要在 GCP,上手成本低,网关外采动力下降。
|
||||||
|
|
||||||
|
### 4.2.3 Azure OpenAI / Azure AI Foundry
|
||||||
|
|
||||||
|
- 强项:
|
||||||
|
- 企业采购与合规优势、区域与数据边界方案、与微软体系整合。
|
||||||
|
- 对你威胁:
|
||||||
|
- 大企业已存在 Azure 合同与安全基线,迁移成本高。
|
||||||
|
|
||||||
|
## 4.3 第三层:中国区平台(本地化替代与区域强竞争)
|
||||||
|
|
||||||
|
### 4.3.1 阿里云百炼(Model Studio)
|
||||||
|
|
||||||
|
- 官方信息显示:
|
||||||
|
- 提供模型 API 平台与 OpenAI SDK 兼容调用方式。
|
||||||
|
- 支持在单平台接入多模型。
|
||||||
|
- 竞争意义:
|
||||||
|
- 对中国区客户,合规与采购便利显著,能直接替代海外网关方案。
|
||||||
|
|
||||||
|
### 4.3.2 百度千帆
|
||||||
|
|
||||||
|
- 官方信息显示:
|
||||||
|
- 提供企业级模型开发平台、模型与应用构建能力。
|
||||||
|
- 明确兼容 OpenAI 调用风格的接口说明。
|
||||||
|
- 竞争意义:
|
||||||
|
- 在政企、传统行业数字化项目中具备渠道与生态优势。
|
||||||
|
|
||||||
|
### 4.3.3 火山方舟(Volcengine Ark)
|
||||||
|
|
||||||
|
- 官方信息显示:
|
||||||
|
- 提供模型服务价格说明与 OpenAI 兼容调用说明。
|
||||||
|
- 竞争意义:
|
||||||
|
- 以“平台 + 模型 + 基础设施”一体化竞争,对中大型客户吸引力高。
|
||||||
|
|
||||||
|
### 4.3.4 腾讯混元(Tencent Hunyuan)
|
||||||
|
|
||||||
|
- 官方信息显示:
|
||||||
|
- 提供 OpenAI API 兼容接入说明与模型计费说明。
|
||||||
|
- 竞争意义:
|
||||||
|
- 与腾讯云生态绑定明显,利于已有云客户快速落地。
|
||||||
|
|
||||||
|
## 4.4 第四层:开源代理与自建方案(价格天花板压制)
|
||||||
|
|
||||||
|
- 代表:LiteLLM OSS、One API/New API 类项目、Sub2api、自建 API Gateway + 策略插件。
|
||||||
|
- 竞争特点:
|
||||||
|
- 几乎零 license 成本,但需要平台工程与运维能力。
|
||||||
|
- 对你影响:
|
||||||
|
- 你的商业版需要把“总拥有成本(TCO)”讲清楚,证明买你比自建更省钱、更稳、更快。
|
||||||
|
|
||||||
|
### 4.4.1 Sub2api(新增:高增长开源中转项目)
|
||||||
|
|
||||||
|
- 项目定位(官方仓库描述):
|
||||||
|
- 面向多类上游渠道的 API 转发与配额管理,强调“一个 API Key”整合调用体验。
|
||||||
|
- 关键信号(截至 2026-03-16):
|
||||||
|
- GitHub Star 约 5.8k,Fork 约 887,公开提交 2,239 次,显示较高社区活跃度。
|
||||||
|
- 发布节奏活跃:公开 Releases 89,最新版本发布时间为 2026-03-16。
|
||||||
|
- 上游关联项目 `claude-relay-service` 已在仓库说明中提示迁移至 `sub2api`,进一步强化流量聚合效应。
|
||||||
|
- 竞争意义:
|
||||||
|
- 对价格敏感、工程人力有限的团队,Sub2api 形成“可快速上线”的低成本替代。
|
||||||
|
- 在“统一转发”需求上,会持续压缩商业网关基础功能的定价空间。
|
||||||
|
- 风险提示:
|
||||||
|
- 项目 README 明确提示“Terms of Service Risk / 仅供学习研究”类免责声明。企业落地时需重点审查上游服务条款、合规边界与账号治理策略。
|
||||||
|
|
||||||
|
### 4.4.2 One API(成熟开源分发系统)
|
||||||
|
|
||||||
|
- 项目定位(官方仓库描述):
|
||||||
|
- LLM API 管理与二次分发系统,覆盖多家模型供应商并提供统一 API 适配。
|
||||||
|
- 关键信号(截至 2026-03-16):
|
||||||
|
- GitHub Star 约 30.6k,Fork 约 5.8k,公开提交 1,210 次,长期社区影响力强。
|
||||||
|
- 公开 Releases 55,最近发布显示为 2025-02-02。
|
||||||
|
- 竞争意义:
|
||||||
|
- 在中文开发者社区渗透深,作为“低成本中转”默认选项之一,会持续压缩商用网关基础能力的付费空间。
|
||||||
|
- 风险提示:
|
||||||
|
- 仓库“注意”章节强调 MIT 协议下的署名要求与风险自担;企业二开或商用需做 license 合规核查。
|
||||||
|
|
||||||
|
### 4.4.3 New API(高活跃一体化网关项目)
|
||||||
|
|
||||||
|
- 项目定位(官方仓库描述):
|
||||||
|
- 统一 AI 模型聚合与分发平台,支持 OpenAI-compatible、Claude-compatible、Gemini-compatible 协议转换。
|
||||||
|
- 关键信号(截至 2026-03-16):
|
||||||
|
- GitHub Star 约 20.9k,Fork 约 4.1k,公开提交 5,403 次,公开 Releases 456(最新 2026-03-14)。
|
||||||
|
- 竞争意义:
|
||||||
|
- 在“统一协议 + 多模型聚合 + 快速部署”上形成明显替代,特别适合预算有限且具备基础运维能力的团队。
|
||||||
|
- 风险提示:
|
||||||
|
- README 明确“for personal learning purposes only”,并提示需遵守 OpenAI ToU 与中国《生成式人工智能服务管理暂行办法》。
|
||||||
|
- 采用 AGPLv3,企业商用与二次分发需重点评估开源义务边界。
|
||||||
|
|
||||||
|
## 5. 竞品能力矩阵(精简版)
|
||||||
|
|
||||||
|
说明:以下“有/部分/未明确”基于公开信息,企业私有能力可能高于公开文档。
|
||||||
|
|
||||||
|
| 竞品 | 统一 API | 多模型路由 | 自动回退 | 成本看板/预算 | 合规/隐私控制 | 部署形态 | 主要商业模式 |
|
||||||
|
|---|---|---|---|---|---|---|---|
|
||||||
|
| OpenRouter | 有 | 强 | 有 | 部分 | 有(含零日志选项) | 公有平台 | 按量 + 平台服务 |
|
||||||
|
| Portkey | 有 | 强 | 有 | 强 | 强 | SaaS + 企业 | 订阅 + 企业合同 |
|
||||||
|
| LiteLLM OSS | 有 | 中 | 部分 | 部分 | 依赖自建策略 | 自建为主 | 开源 + 企业扩展 |
|
||||||
|
| Cloudflare AI Gateway | 有 | 中 | 部分 | 中(日志计费) | 强(云边缘体系) | SaaS | 按量 |
|
||||||
|
| Kong AI Gateway | 有 | 中 | 中 | 中 | 强 | 企业部署/SaaS | 企业授权 |
|
||||||
|
| Helicone | 有 | 部分 | 部分 | 强(可观测) | 部分 | SaaS | 订阅/按量 |
|
||||||
|
| Sub2api(开源) | 有 | 中 | 部分 | 部分 | 依赖自建策略 | 自建为主 | 开源 |
|
||||||
|
| One API(开源) | 有 | 中 | 部分 | 部分 | 依赖自建策略 | 自建为主 | 开源 |
|
||||||
|
| New API(开源) | 有 | 中 | 部分 | 部分 | 依赖自建策略 | 自建为主 | 开源 |
|
||||||
|
| AWS Bedrock | 有(平台统一) | 中 | 部分 | 中 | 强 | 公有云 | 按量 |
|
||||||
|
| Vertex AI | 有(平台统一) | 中 | 部分 | 中 | 强 | 公有云 | 按量 |
|
||||||
|
| Azure OpenAI | 有(平台统一) | 中 | 部分 | 中 | 强 | 公有云/区域 | 按量 |
|
||||||
|
| 阿里云百炼 | 有 | 中 | 部分 | 中 | 强 | 公有云 | 按量 |
|
||||||
|
| 百度千帆 | 有 | 中 | 部分 | 中 | 强 | 公有云 | 按量 |
|
||||||
|
| 火山方舟 | 有 | 中 | 部分 | 中 | 强 | 公有云 | 按量 |
|
||||||
|
| 腾讯混元 | 有 | 中 | 部分 | 中 | 强 | 公有云 | 按量 |
|
||||||
|
|
||||||
|
## 6. 定价竞争观察(关键)
|
||||||
|
|
||||||
|
## 6.1 上游模型价格差异巨大
|
||||||
|
|
||||||
|
公开定价显示,不同供应商在输入/输出 token 维度价差明显,且同一供应商在标准版、轻量版、预览版之间差异较大。
|
||||||
|
|
||||||
|
对网关业务的含义:
|
||||||
|
|
||||||
|
1. 如果你的路由策略只做“可用性”,价值有限。
|
||||||
|
2. 如果能在质量阈值内稳定完成“成本最优路由”,就有稳定货币化空间。
|
||||||
|
3. 需要建立“按任务类型自动路由”的策略库,而非单一模型默认值。
|
||||||
|
|
||||||
|
## 6.2 网关层定价常见三类
|
||||||
|
|
||||||
|
1. 按请求/日志/事件计费(如日志量、请求量)。
|
||||||
|
2. 平台订阅计费(按团队/API key/项目数/功能包)。
|
||||||
|
3. 代付加价(在模型成本上附加服务费)。
|
||||||
|
|
||||||
|
[推断] 最稳健的商业模型是“订阅 + 代付”,因为:
|
||||||
|
- 仅按量容易被大客户压价。
|
||||||
|
- 仅订阅在早期难覆盖平台算力与支持成本。
|
||||||
|
|
||||||
|
## 7. 你的可行差异化战略(建议)
|
||||||
|
|
||||||
|
## 7.1 不建议的定位
|
||||||
|
|
||||||
|
1. “我们接入更多模型”。
|
||||||
|
2. “我们更便宜的 API 转发”。
|
||||||
|
3. “OpenAI 兼容就够了”。
|
||||||
|
|
||||||
|
这些点都容易被开源或云厂商快速覆盖。
|
||||||
|
|
||||||
|
## 7.2 建议的定位
|
||||||
|
|
||||||
|
“企业级 LLM FinOps + Governance Gateway”
|
||||||
|
|
||||||
|
优先做深四个能力:
|
||||||
|
|
||||||
|
1. 质量约束下的成本最优路由
|
||||||
|
- 按场景模板(RAG、代码生成、客服问答、摘要、翻译)自动选型。
|
||||||
|
2. 多租户预算与分账闭环
|
||||||
|
- 组织/项目/用户级预算、阈值告警、超额策略。
|
||||||
|
3. 合规策略中台
|
||||||
|
- 数据留存策略、敏感信息检测、可审计访问链路。
|
||||||
|
4. 可证明的降本结果
|
||||||
|
- 每周自动输出“节省报告”(对比基线模型)。
|
||||||
|
|
||||||
|
## 7.3 12 个月最小可行商业路径(MVP 到 PMF)
|
||||||
|
|
||||||
|
1. 0-3 个月:
|
||||||
|
- 聚焦一个 ICP:月模型成本 > $10k 的 AI SaaS 团队。
|
||||||
|
- 交付能力:统一 API、基本路由、回退、成本看板。
|
||||||
|
2. 4-6 个月:
|
||||||
|
- 引入策略模板与自动化成本优化。
|
||||||
|
- 发布可审计账单与预算控制。
|
||||||
|
3. 7-12 个月:
|
||||||
|
- 推出企业版(SAML/SCIM、审计导出、专属 SLA、私有部署选项)。
|
||||||
|
- 建立渠道合作(SI、云咨询、出海服务商)。
|
||||||
|
|
||||||
|
## 8. 风险清单与应对
|
||||||
|
|
||||||
|
1. 价格透明导致毛利压缩。
|
||||||
|
- 应对:从“代采差价”转向“平台价值收费”。
|
||||||
|
2. 开源替代加速。
|
||||||
|
- 应对:开放核心兼容层,商业化聚焦治理与运营闭环。
|
||||||
|
3. 合规责任上升。
|
||||||
|
- 应对:从 Day 1 引入审计日志、密钥托管、数据策略配置。
|
||||||
|
4. 供应商策略变化。
|
||||||
|
- 应对:连接器抽象层、可热切换 provider、模型策略版本化。
|
||||||
|
|
||||||
|
## 9. 仍需补充的深入调研(下一轮建议)
|
||||||
|
|
||||||
|
为了让市场分析达到“投委会可审阅”级别,建议补三类数据:
|
||||||
|
|
||||||
|
1. 客户访谈证据(15-30 家)
|
||||||
|
- 当前月度模型成本、采购路径、替代方案、合规红线。
|
||||||
|
2. 真实对照实验
|
||||||
|
- 同任务流量下,单模型 vs 策略路由的成本/延迟/质量对照。
|
||||||
|
3. 竞争情报跟踪
|
||||||
|
- 每月更新竞品功能变动、价格变动、渠道动作、融资进展。
|
||||||
|
|
||||||
|
## 10. 可直接用于融资/立项的结论模板
|
||||||
|
|
||||||
|
可复用表述:
|
||||||
|
|
||||||
|
“我们不做模型,我们做企业 AI 的成本与治理操作系统。通过跨供应商智能路由、预算闭环和合规策略,把 AI 从高不确定成本变成可预测、可审计、可规模化的生产能力。”
|
||||||
|
|
||||||
|
## 11. 你 vs 开源替代:销售对比话术(可直接使用)
|
||||||
|
|
||||||
|
本节面向销售与解决方案团队,目标是把“统一转发”从价格战拉回到“业务连续性 + 合规 + 财务可控”。
|
||||||
|
|
||||||
|
### 11.1 客户常见异议与回应模板
|
||||||
|
|
||||||
|
1. 异议:“我们可以直接用 Sub2api/One API/New API,为什么要付费?”
|
||||||
|
回应:
|
||||||
|
“开源方案适合快速试验,但生产环境要承担 24x7 可用性、SLA、审计、权限、账单争议、合规责任。我们卖的不是转发,而是可持续运营能力:稳定性、可审计、可追责和可控成本。”
|
||||||
|
|
||||||
|
2. 异议:“功能看起来差不多,价格却更高。”
|
||||||
|
回应:
|
||||||
|
“如果只看接口转发,确实趋同。差异在于异常恢复、预算闭环、组织级权限、账单归因和合规证据链。我们帮助你降低的是总拥有成本(TCO)和事故成本,不是单次请求成本。”
|
||||||
|
|
||||||
|
3. 异议:“我们有 DevOps,可以自建。”
|
||||||
|
回应:
|
||||||
|
“可以自建,但请同时评估维护成本:版本跟进、故障值班、安全审计、合规整改和跨团队对账。我们建议先做 4 周并行 PoC,对比真实人力投入和事故率,再决定买还是自建。”
|
||||||
|
|
||||||
|
### 11.2 场景化定位话术
|
||||||
|
|
||||||
|
1. 面向 AI SaaS(月模型成本 > $10k)
|
||||||
|
“第一阶段先帮你每月稳定降本,再在同一条链路上补齐预算控制和客户分账,避免规模增长后财务失控。”
|
||||||
|
|
||||||
|
2. 面向中大型企业平台团队
|
||||||
|
“我们不是替代你们的基础设施,而是补齐多模型治理层:统一策略、统一审计、统一计费与统一故障响应。”
|
||||||
|
|
||||||
|
3. 面向 SI/咨询实施商
|
||||||
|
“同一套平台同时服务多个终端客户,天然支持租户隔离与账单归因,显著降低项目交付和后期运维成本。”
|
||||||
|
|
||||||
|
## 12. 开源替代可替代概率评分(决策版)
|
||||||
|
|
||||||
|
说明:本节用于内部优先级决策,不作为对外宣传。
|
||||||
|
[推断] 评分基于公开资料与常见企业采购逻辑,建议每月更新。
|
||||||
|
|
||||||
|
### 12.1 评分模型
|
||||||
|
|
||||||
|
1. 替代压力指数(0-5,越高越危险)
|
||||||
|
`0.30 * 成本优势 + 0.20 * 部署便利 + 0.30 * 功能重叠 + 0.20 * 社区活跃`
|
||||||
|
|
||||||
|
2. 商业防御指数(0-5,越高越安全)
|
||||||
|
`0.40 * 合规能力 + 0.30 * SLA/稳定性 + 0.30 * 治理深度`
|
||||||
|
|
||||||
|
3. 净替代风险(0-5)
|
||||||
|
`替代压力指数 - 商业防御指数 + 2.5`(结果截断到 0-5)
|
||||||
|
|
||||||
|
### 12.2 当前评估(截至 2026-03-16)
|
||||||
|
|
||||||
|
| 竞品 | 替代压力指数 | 商业防御指数(你) | 净替代风险 | 结论 |
|
||||||
|
|---|---:|---:|---:|---|
|
||||||
|
| Sub2api | 4.1 | 3.6 | 3.0 | 中高风险,重点防守中小客户价格带 |
|
||||||
|
| One API | 4.3 | 3.6 | 3.2 | 中高风险,需强化“治理与合规”价值证明 |
|
||||||
|
| New API | 4.4 | 3.6 | 3.3 | 中高风险,需强化“企业可运营性”差异 |
|
||||||
|
|
||||||
|
### 12.3 对应防御动作
|
||||||
|
|
||||||
|
1. 产品侧
|
||||||
|
优先投入“预算控制 + 审计导出 + 异常回放 + 账单归因”,弱化基础转发功能堆叠。
|
||||||
|
|
||||||
|
2. 定价侧
|
||||||
|
采用“基础订阅 + 成本节省分成”混合模式,避免被纯 token 差价锚定。
|
||||||
|
|
||||||
|
3. 销售侧
|
||||||
|
强制要求所有商机提供“自建人力成本估算表”和“事故责任矩阵”,避免只做接口价格比较。
|
||||||
|
|
||||||
|
## 13. 六个月竞争情报监控模板(运营版)
|
||||||
|
|
||||||
|
目标:将竞品调研从“季度汇报”升级为“每月可执行机制”。
|
||||||
|
|
||||||
|
### 13.1 月度监控指标
|
||||||
|
|
||||||
|
| 维度 | 指标 | 采集频率 | 预警阈值 | 触发动作 |
|
||||||
|
|---|---|---|---|---|
|
||||||
|
| 增长 | Star/Fork 增速 | 每周 | 月增速 > 15% | 评估对应客群流失风险 |
|
||||||
|
| 迭代 | Release 频率与大版本功能 | 每周 | 出现企业级治理特性 | 更新销售话术与路线图 |
|
||||||
|
| 定价 | 套餐与按量单价变化 | 每月 | 价格下调 >= 10% | 启动价格带复盘 |
|
||||||
|
| 合规 | ToS、隐私、license 变更 | 每月 | 关键条款变化 | 法务与产品联合评审 |
|
||||||
|
| 生态 | 新渠道合作与云市场上架 | 每月 | 进入头部云市场 | 启动渠道应对计划 |
|
||||||
|
|
||||||
|
### 13.2 组织分工建议
|
||||||
|
|
||||||
|
1. 产品负责人:维护“竞品功能差距清单”。
|
||||||
|
2. 解决方案负责人:维护“客户异议与赢单/输单理由”。
|
||||||
|
3. 商业分析负责人:维护“价格带与毛利敏感性模型”。
|
||||||
|
4. 法务/安全负责人:维护“license 与合规风险台账”。
|
||||||
|
|
||||||
|
### 13.3 固定节奏
|
||||||
|
|
||||||
|
1. 每周 30 分钟:更新监控数据与异常信号。
|
||||||
|
2. 每月 90 分钟:完成竞品风险复盘与优先级重排。
|
||||||
|
3. 每季度:更新一次“你 vs 开源替代”总策略,校正产品与定价路线。
|
||||||
|
|
||||||
|
## 14. 销售 Battlecard(1 页可复制版)
|
||||||
|
|
||||||
|
用途:销售、售前、创始人访谈客户时快速统一口径。
|
||||||
|
使用方式:根据客户类型选择 30 秒/2 分钟/10 分钟版本,不建议跨版本混用。
|
||||||
|
|
||||||
|
### 14.1 30 秒版(电梯话术)
|
||||||
|
|
||||||
|
“开源网关适合快速试验,但企业真正买单的是可运营性。我们提供跨模型的稳定路由、预算与分账闭环、审计和合规控制,让 AI 成本可预测、责任可追溯、故障可恢复。你买的不是转发接口,而是生产级运营能力。”
|
||||||
|
|
||||||
|
### 14.2 2 分钟版(首次会议)
|
||||||
|
|
||||||
|
1. 现状确认
|
||||||
|
“你们现在的问题通常不在‘接不到模型’,而在三件事:成本失控、故障难排、审计和权限不清。”
|
||||||
|
|
||||||
|
2. 方案定位
|
||||||
|
“我们在现有云和模型之上,增加一层企业级治理:智能路由降本、SLA 级回退、组织级预算与账单归因、合规审计导出。”
|
||||||
|
|
||||||
|
3. 价值承诺
|
||||||
|
“4 周并行 PoC,不改你们业务接口。我们一起用真实流量比三件事:单位成本、可用性、运维投入。结果好再扩大。”
|
||||||
|
|
||||||
|
### 14.3 10 分钟版(深度评估)
|
||||||
|
|
||||||
|
1. 为什么不用纯开源
|
||||||
|
- 开源适合起步,但生产环境会出现隐性成本:值班、升级、兼容、审计、账单争议处理。
|
||||||
|
- 采购决策最终看 TCO,不是单请求价格。
|
||||||
|
|
||||||
|
2. 我们的核心差异
|
||||||
|
- 成本层:按任务类型自动路由与回退,降低单位输出成本。
|
||||||
|
- 治理层:组织/项目/用户三级预算与限额策略。
|
||||||
|
- 合规层:密钥托管、审计链路、策略留痕、责任边界清晰。
|
||||||
|
- 运营层:统一观测、异常回放、跨供应商故障演练。
|
||||||
|
|
||||||
|
3. 如何验证
|
||||||
|
- 第 1 周:接入与基线测量(单模型基线)。
|
||||||
|
- 第 2-3 周:策略路由 AB 对照。
|
||||||
|
- 第 4 周:产出成本、可用性、人力投入三项报告。
|
||||||
|
|
||||||
|
### 14.4 异议速查(可直接抄用)
|
||||||
|
|
||||||
|
| 客户异议 | 推荐回答(简版) | 禁忌回答 |
|
||||||
|
|---|---|---|
|
||||||
|
| “开源免费,你们太贵。” | “我们对比的是总成本,不是单价。生产运维、审计与事故成本通常远高于接口差价。” | “我们功能更多所以更贵。” |
|
||||||
|
| “我们团队能自建。” | “可以,我们建议 4 周并行 PoC,用真实人力和事故数据决定买或自建。” | “自建做不出来。” |
|
||||||
|
| “我们只关心价格。” | “可以先按降本目标签约,用节省结果来定扩容。” | “低价我们也能给。” |
|
||||||
|
| “合规暂时不急。” | “合规通常在上量后才爆发,提前布好审计和权限成本最低。” | “不合规会被罚。” |
|
||||||
|
|
||||||
|
### 14.5 关键资格判断(BANT 简化)
|
||||||
|
|
||||||
|
1. Budget:是否有明确 AI 成本预算与负责人。
|
||||||
|
2. Authority:是否能拉到平台、财务、安全三方共同评估。
|
||||||
|
3. Need:是否已出现成本超支、故障频发、账单归因不清。
|
||||||
|
4. Timeline:是否能在 4-8 周内启动 PoC。
|
||||||
|
|
||||||
|
判定规则(建议):
|
||||||
|
- 4 项满足 3 项以上,进入 PoC。
|
||||||
|
- 低于 3 项,转培育线索,不投入深度售前。
|
||||||
|
|
||||||
|
## 15. 月度量化评分模板(可直接填数)
|
||||||
|
|
||||||
|
用途:把“感觉竞争激烈”变成可追踪指标。
|
||||||
|
频率:每月一次,固定在月末更新。
|
||||||
|
|
||||||
|
### 15.1 输入字段(每家竞品 10 个数)
|
||||||
|
|
||||||
|
| 编号 | 字段 | 说明 | 取值建议 |
|
||||||
|
|---|---|---|---|
|
||||||
|
| X1 | 月 Star 增速 | 当月新增 Star / 上月 Star | 0-1 |
|
||||||
|
| X2 | 月 Fork 增速 | 当月新增 Fork / 上月 Fork | 0-1 |
|
||||||
|
| X3 | 月 Release 数 | 当月发布版本数量 | 0-20 |
|
||||||
|
| X4 | 关键功能新增数 | 与你直接冲突功能新增项 | 0-10 |
|
||||||
|
| X5 | 价格变化率 | 负值=降价,正值=涨价 | -1 到 1 |
|
||||||
|
| X6 | 大客户案例数 | 公开新增企业案例 | 0-20 |
|
||||||
|
| X7 | 生态扩展数 | 新云市场/渠道/集成数量 | 0-20 |
|
||||||
|
| X8 | 合规声明强化数 | 新增合规文档或认证动作 | 0-10 |
|
||||||
|
| X9 | 社区活跃度 | 关键 issue/PR 活跃度评分 | 0-5 |
|
||||||
|
| X10 | 你方输单次数 | 当月因该竞品输单数 | 0-20 |
|
||||||
|
|
||||||
|
### 15.2 标准化与得分公式
|
||||||
|
|
||||||
|
1. 标准化(0-5)
|
||||||
|
- 增长类:`score = min(5, 原值 / 阈值 * 5)`
|
||||||
|
- 风险逆向类(例如涨价):按业务设定正负方向后统一映射。
|
||||||
|
|
||||||
|
2. 替代压力指数(0-5)
|
||||||
|
`P = 0.15*S(X1)+0.10*S(X2)+0.10*S(X3)+0.20*S(X4)+0.10*S(X5)+0.10*S(X6)+0.10*S(X7)+0.05*S(X8)+0.05*S(X9)+0.05*S(X10)`
|
||||||
|
|
||||||
|
3. 商业防御指数(0-5,内部填写)
|
||||||
|
`D = 0.35*合规得分 + 0.35*SLA得分 + 0.30*治理深度得分`
|
||||||
|
|
||||||
|
4. 净替代风险(0-5)
|
||||||
|
`R = clamp(P - D + 2.5, 0, 5)`
|
||||||
|
|
||||||
|
风险分级建议:
|
||||||
|
- `0.0-1.9`:低风险(监控即可)
|
||||||
|
- `2.0-2.9`:中风险(准备话术和功能补丁)
|
||||||
|
- `3.0-3.9`:中高风险(进入路线图和销售联动)
|
||||||
|
- `4.0-5.0`:高风险(启动专项战役)
|
||||||
|
|
||||||
|
### 15.3 月报模板(复制即用)
|
||||||
|
|
||||||
|
```markdown
|
||||||
|
# 竞品风险月报 - YYYY-MM
|
||||||
|
|
||||||
|
## 1) 本月结论
|
||||||
|
- 最高风险竞品:
|
||||||
|
- 风险等级变化:
|
||||||
|
- 主要原因:
|
||||||
|
|
||||||
|
## 2) 评分总表
|
||||||
|
| 竞品 | P替代压力 | D商业防御 | R净替代风险 | 环比 | 处理动作 |
|
||||||
|
|---|---:|---:|---:|---:|---|
|
||||||
|
| Sub2api | | | | | |
|
||||||
|
| One API | | | | | |
|
||||||
|
| New API | | | | | |
|
||||||
|
|
||||||
|
## 3) 关键变化记录
|
||||||
|
- 定价变化:
|
||||||
|
- 功能变化:
|
||||||
|
- 合规/License变化:
|
||||||
|
- 销售输单/赢单变化:
|
||||||
|
|
||||||
|
## 4) 下月动作
|
||||||
|
1.
|
||||||
|
2.
|
||||||
|
3.
|
||||||
|
```
|
||||||
|
|
||||||
|
### 15.4 执行注意事项
|
||||||
|
|
||||||
|
1. 评分模型必须保持 3 个月不改权重,避免“追着结果改模型”。
|
||||||
|
2. 同一个指标必须由同一个角色维护,保证口径一致。
|
||||||
|
3. 每月只输出 3 条动作,超过 3 条会削弱执行力。
|
||||||
|
|
||||||
|
## 附录 A:关键证据源(官方链接)
|
||||||
|
|
||||||
|
### A.1 网关与直接竞品
|
||||||
|
|
||||||
|
1. OpenRouter 定价页:https://openrouter.ai/pricing
|
||||||
|
2. OpenRouter Provider Routing:https://openrouter.ai/docs/features/provider-routing
|
||||||
|
3. OpenRouter 隐私与日志:https://openrouter.ai/docs/features/privacy-and-logging
|
||||||
|
4. Portkey 定价:https://portkey.ai/pricing
|
||||||
|
5. Portkey AI Gateway 文档:https://portkey.ai/docs/product/ai-gateway
|
||||||
|
6. LiteLLM 文档(provider 与网关能力):https://docs.litellm.ai/docs/proxy/providers
|
||||||
|
7. Cloudflare AI Gateway:https://developers.cloudflare.com/ai-gateway/
|
||||||
|
8. Cloudflare AI Gateway 定价:https://developers.cloudflare.com/ai-gateway/reference/pricing/
|
||||||
|
9. Kong AI Gateway:https://konghq.com/products/kong-ai-gateway
|
||||||
|
10. Helicone 网关文档:https://docs.helicone.ai/helicone-headers/gateway
|
||||||
|
11. Sub2api GitHub 仓库:https://github.com/Wei-Shaw/sub2api
|
||||||
|
12. claude-relay-service(迁移说明):https://github.com/Wei-Shaw/claude-relay-service
|
||||||
|
13. One API GitHub 仓库:https://github.com/songquanpeng/one-api
|
||||||
|
14. New API GitHub 仓库:https://github.com/QuantumNous/new-api
|
||||||
|
15. Sub2api Releases:https://github.com/Wei-Shaw/sub2api/releases
|
||||||
|
16. One API Releases:https://github.com/songquanpeng/one-api/releases
|
||||||
|
17. New API Releases:https://github.com/QuantumNous/new-api/releases
|
||||||
|
|
||||||
|
### A.2 云厂商与模型平台(替代竞争)
|
||||||
|
|
||||||
|
1. AWS Bedrock 定价:https://aws.amazon.com/bedrock/pricing/
|
||||||
|
2. Google Gemini API 定价:https://ai.google.dev/gemini-api/docs/pricing
|
||||||
|
3. Google Vertex AI 定价:https://cloud.google.com/vertex-ai/pricing
|
||||||
|
4. Azure OpenAI 定价:https://azure.microsoft.com/en-us/pricing/details/cognitive-services/openai-service/
|
||||||
|
5. Azure AI Foundry 定价:https://azure.microsoft.com/en-us/pricing/details/ai-foundry/
|
||||||
|
|
||||||
|
### A.3 模型供应商定价与政策(上游成本约束)
|
||||||
|
|
||||||
|
1. OpenAI API 定价:https://openai.com/api/pricing/
|
||||||
|
2. OpenAI Enterprise Privacy:https://openai.com/enterprise-privacy/
|
||||||
|
3. OpenAI API 数据策略:https://platform.openai.com/docs/guides/your-data
|
||||||
|
4. Anthropic 定价:https://docs.anthropic.com/en/docs/about-claude/pricing
|
||||||
|
5. xAI 模型与定价:https://docs.x.ai/docs/models
|
||||||
|
6. DeepSeek 定价:https://api-docs.deepseek.com/quick_start/pricing
|
||||||
|
|
||||||
|
### A.4 中国区平台
|
||||||
|
|
||||||
|
1. 阿里云百炼产品页:https://www.alibabacloud.com/en/product/model-studio
|
||||||
|
2. 阿里云百炼 OpenAI 兼容说明:https://help.aliyun.com/zh/model-studio/compatibility-of-openai-with-dashscope
|
||||||
|
3. 百度千帆产品页:https://cloud.baidu.com/product/wenxinworkshop
|
||||||
|
4. 百度千帆 OpenAI 兼容说明:https://cloud.baidu.com/doc/qianfan-api/s/9m7p0h2q4
|
||||||
|
5. 火山方舟模型服务价格说明:https://www.volcengine.com/docs/82379/1542535
|
||||||
|
6. 火山方舟 OpenAI 兼容说明:https://www.volcengine.com/docs/82379/1464656
|
||||||
|
7. 腾讯混元 OpenAI 兼容说明:https://cloud.tencent.com/document/product/1729/111008
|
||||||
|
8. 腾讯混元计费说明:https://cloud.tencent.com/document/product/1729/97731
|
||||||
|
|
||||||
|
## 附录 B-技术补充:开源技术竞品深度分析(代码级)
|
||||||
|
|
||||||
|
- 文档:`docs/llm_gateway_open_source_competitors_technical_analysis_2026-03-16.md`
|
||||||
|
- 覆盖项目:`sub2api`、`one-api`、`new-api`、`litellm`
|
||||||
|
- 内容范围:路由架构、调度算法、计费一致性、限流安全、运维能力、License 商用风险、90 天技术路线建议
|
||||||
|
|
||||||
|
## 附录 C-技术蓝图:商用网关 90 天落地方案
|
||||||
|
|
||||||
|
- 文档:`docs/llm_gateway_product_technical_blueprint_v1_2026-03-16.md`
|
||||||
|
- 内容范围:目标边界、系统架构、模块设计、数据模型、API 契约、SLA 指标、周级实施计划
|
||||||
|
|
||||||
|
## 附录 D:声明
|
||||||
|
|
||||||
|
- 本报告采用公开资料分析,不构成法律、财务或投资建议。
|
||||||
|
- 若用于正式融资材料,建议追加第三方法务与合规审阅。
|
||||||
|
|
||||||
|
## 附录 E-竞品补充:功能清单 + 商业模型 + 技术优劣
|
||||||
|
|
||||||
|
- 文档:`docs/llm_gateway_competitor_function_business_2026-03-16.md`
|
||||||
|
- 内容范围:54 项功能清单、P0 功能覆盖矩阵、商业模型矩阵、技术优劣评分、机会空白
|
||||||
|
|
||||||
|
## 附录 F-产品规划路线图(非技术)
|
||||||
|
|
||||||
|
- 文档:`docs/llm_gateway_product_strategy_roadmap_2026-03-16.md`
|
||||||
|
- 内容范围:ICP 分层、定位与价值主张、0-12 月路线图、商业化与 GTM、业务里程碑
|
||||||
|
|
||||||
|
## 附录 G-PRD v0(产品版)
|
||||||
|
|
||||||
|
- 文档:`docs/llm_gateway_prd_v0_2026-03-16.md`
|
||||||
|
- 内容范围:背景与目标、用户画像、P0/P1/P2 需求、核心流程、验收标准、发布计划
|
||||||
@@ -0,0 +1,382 @@
|
|||||||
|
# 商用 LLM 网关开源竞品技术分析(代码级证据版)
|
||||||
|
|
||||||
|
- 版本:v1.0
|
||||||
|
- 日期:2026-03-16
|
||||||
|
- 分析目标:为“商用通用 LLM 转发网关”提供可落地的技术选型与竞争策略依据
|
||||||
|
- 分析方法:以本地源码与文档为主,逐条代码证据比对
|
||||||
|
|
||||||
|
## 1. 样本与本地落地情况
|
||||||
|
|
||||||
|
本次纳入 4 个核心开源样本(与你产品定位最接近的“多模型网关/代理”):
|
||||||
|
|
||||||
|
1. `one-api`
|
||||||
|
2. `new-api`
|
||||||
|
3. `sub2api`(你提到增长很快的项目)
|
||||||
|
4. `litellm`
|
||||||
|
|
||||||
|
### 1.1 本地路径
|
||||||
|
|
||||||
|
- one-api:`/home/long/project/立交桥/llm-gateway-competitors/one-api`
|
||||||
|
- new-api:`/home/long/project/立交桥/llm-gateway-competitors/new-api`
|
||||||
|
- sub2api:`/home/long/project/立交桥/llm-gateway-competitors/sub2api-tar`
|
||||||
|
- litellm:`/home/long/project/立交桥/llm-gateway-competitors/litellm`
|
||||||
|
|
||||||
|
### 1.2 代码规模快照(本地统计)
|
||||||
|
|
||||||
|
| 项目 | 主要语言文件量 | 测试文件量(粗略) | 文档文件量 |
|
||||||
|
|---|---:|---:|---:|
|
||||||
|
| one-api | Go 235 | 5 | 11 |
|
||||||
|
| new-api | Go 511 | 19 | 22 |
|
||||||
|
| sub2api | Go 1004, Vue 165, TS 139 | 417 | 18 |
|
||||||
|
| litellm | Python 3500 | 1846(测试目录与测试文件) | 869 |
|
||||||
|
|
||||||
|
注:对“测试覆盖率”只做“可见测试资产规模”判断,不等同于真实覆盖率。
|
||||||
|
|
||||||
|
## 2. 对比维度(商用网关视角)
|
||||||
|
|
||||||
|
本报告统一按 10 个维度做横向对比:
|
||||||
|
|
||||||
|
1. 协议与接口覆盖(OpenAI/Claude/Gemini/Realtime/Responses 等)
|
||||||
|
2. 调度与路由能力(负载、亲和、回退、重试、熔断)
|
||||||
|
3. 计费与额度一致性(预扣/后扣/退款、幂等)
|
||||||
|
4. 多租户与权限模型(用户、团队、Key、策略)
|
||||||
|
5. 限流与抗滥用(RPM/TPM、入口保护、故障策略)
|
||||||
|
6. 可观测与运营(日志、指标、后台运营)
|
||||||
|
7. 数据模型与可扩展性(Schema、状态字段、索引)
|
||||||
|
8. 工程成熟度(代码组织、测试资产、配置治理)
|
||||||
|
9. License 与商用合规风险
|
||||||
|
10. 二次开发成本与上线复杂度
|
||||||
|
|
||||||
|
## 3. 分项目技术剖析(含代码证据)
|
||||||
|
|
||||||
|
## 3.1 sub2api(生产中台倾向最强)
|
||||||
|
|
||||||
|
### 3.1.1 协议覆盖与网关组织
|
||||||
|
|
||||||
|
`sub2api` 的路由组织不是单一 OpenAI 兼容层,而是“多协议并列 + 专用平台路由”:
|
||||||
|
|
||||||
|
- `/v1` 兼容 Claude/OpenAI 风格,并提供 `messages`、`responses`、`chat/completions` 等能力
|
||||||
|
证据:`backend/internal/server/routes/gateway.go:40-78`
|
||||||
|
- `/v1beta` 兼容 Gemini 原生路径格式
|
||||||
|
证据:`backend/internal/server/routes/gateway.go:80-93`
|
||||||
|
- 提供不带 `/v1` 前缀的别名路由 `/responses`、`/chat/completions`,降低客户端迁移摩擦
|
||||||
|
证据:`backend/internal/server/routes/gateway.go:95-101`
|
||||||
|
- 提供 Antigravity / Sora 专用路由组(强制平台)
|
||||||
|
证据:`backend/internal/server/routes/gateway.go:105-157`
|
||||||
|
|
||||||
|
结论:接口层设计明显面向“多来源客户端兼容 + 业务隔离路由”。
|
||||||
|
|
||||||
|
### 3.1.2 调度能力(核心竞争点)
|
||||||
|
|
||||||
|
`OpenAIAccountScheduler` 明确实现三层选择:
|
||||||
|
|
||||||
|
- `previous_response_id` 粘性层
|
||||||
|
- `session_hash` 会话粘性层
|
||||||
|
- `load_balance` 负载层
|
||||||
|
|
||||||
|
证据:`backend/internal/service/openai_account_scheduler.go:17-21`, `224-289`
|
||||||
|
|
||||||
|
负载层不是随机,而是“多指标打分 + TopK + 加权随机顺序”:
|
||||||
|
|
||||||
|
- 指标包含优先级、负载、队列、错误率 EWMA、TTFT EWMA
|
||||||
|
证据:`.../openai_account_scheduler.go:656-676`
|
||||||
|
- 先选 TopK 再加权顺序,减少单账号长期垄断
|
||||||
|
证据:`.../openai_account_scheduler.go:416-449`, `506-559`, `678-687`
|
||||||
|
|
||||||
|
运行时反馈机制:
|
||||||
|
|
||||||
|
- 实时维护 `errorRateEWMABits`、`ttftEWMABits`
|
||||||
|
证据:`.../openai_account_scheduler.go:107-109`, `148-179`
|
||||||
|
|
||||||
|
结论:这是“调度系统”而非“随机通道器”。
|
||||||
|
|
||||||
|
### 3.1.3 限流与安全策略
|
||||||
|
|
||||||
|
认证高风险入口(注册/登录/2FA/验证码等)采用 Redis 限流且明确 `fail-close`:
|
||||||
|
|
||||||
|
- `auth-register`/`auth-login`/... 均配置 `FailureMode: RateLimitFailClose`
|
||||||
|
证据:`backend/internal/server/routes/auth.go:30-64`
|
||||||
|
- 限流器 Lua 原子计数 + TTL 修复,避免并发下 TTL 丢失
|
||||||
|
证据:`backend/internal/middleware/rate_limiter.go:28-39`, `98-121`
|
||||||
|
|
||||||
|
结论:安全设计偏保守,适合企业场景。
|
||||||
|
|
||||||
|
### 3.1.4 数据模型与运营可控性
|
||||||
|
|
||||||
|
`Account` schema 字段覆盖“可调度状态 + 限流恢复 + 过载恢复 + 会话窗口”:
|
||||||
|
|
||||||
|
- `schedulable`, `rate_limit_reset_at`, `overload_until`, `temp_unschedulable_until`
|
||||||
|
证据:`backend/ent/schema/account.go:143-180`
|
||||||
|
|
||||||
|
`APIKey` schema 含多窗口额度限制(5h/1d/7d)与窗口起点:
|
||||||
|
|
||||||
|
- `rate_limit_5h/1d/7d`, `usage_5h/1d/7d`, `window_*_start`
|
||||||
|
证据:`backend/ent/schema/api_key.go:78-117`
|
||||||
|
|
||||||
|
配置层默认值非常细,包含:
|
||||||
|
|
||||||
|
- billing circuit breaker
|
||||||
|
证据:`backend/internal/config/config.go:1180-1184`
|
||||||
|
- Redis 连接池与 DB 连接池高并发参数
|
||||||
|
证据:`.../config.go:1212-1227`
|
||||||
|
- OpenAI WS sticky TTL、TopK、scheduler 权重
|
||||||
|
证据:`.../config.go:1371-1383`
|
||||||
|
- 对 scheduler 权重合法性做校验(非负且总和>0)
|
||||||
|
证据:`.../config.go:2091-2105`
|
||||||
|
|
||||||
|
结论:`sub2api` 在“运营可控 + 调度深度 + 安全策略”上最接近企业中台。
|
||||||
|
|
||||||
|
## 3.2 new-api(协议覆盖强、功能扩展快)
|
||||||
|
|
||||||
|
### 3.2.1 协议覆盖面广
|
||||||
|
|
||||||
|
`new-api` 路由覆盖非常广,包含:
|
||||||
|
|
||||||
|
- OpenAI Chat/Completions/Embeddings/Audio/Responses/Realtime
|
||||||
|
- Claude messages
|
||||||
|
- Gemini `/v1beta/models/*`
|
||||||
|
- Midjourney、Suno、Video 等任务型接口
|
||||||
|
|
||||||
|
证据:`router/relay-router.go:69-201`, `168-223`
|
||||||
|
|
||||||
|
结论:协议与场景覆盖是其主要优势。
|
||||||
|
|
||||||
|
### 3.2.2 通道选择与重试策略
|
||||||
|
|
||||||
|
`Distribute` 层能力包含:
|
||||||
|
|
||||||
|
- token 级模型白名单限制
|
||||||
|
- auto 分组选择
|
||||||
|
- 通道亲和策略
|
||||||
|
- 重试通道重新选择
|
||||||
|
|
||||||
|
证据:`middleware/distributor.go:55-151`
|
||||||
|
|
||||||
|
`Relay` 主流程包含:
|
||||||
|
|
||||||
|
- 预扣费
|
||||||
|
- 请求重试循环(记录 used channel)
|
||||||
|
- 错误归一化
|
||||||
|
- 自动封禁异常通道(auto ban)
|
||||||
|
|
||||||
|
证据:`controller/relay.go:160-177`, `180-241`, `350-357`
|
||||||
|
|
||||||
|
### 3.2.3 计费一致性设计
|
||||||
|
|
||||||
|
`PreConsumeBilling` + `SettleBilling` 实现预扣与结算分离,支持 delta(补扣/返还):
|
||||||
|
|
||||||
|
- `delta = actualQuota - preConsumedQuota`
|
||||||
|
- 统一会话对象执行 `Settle` / `Refund`
|
||||||
|
|
||||||
|
证据:`service/billing.go:17-26`, `32-78`
|
||||||
|
|
||||||
|
### 3.2.4 通道模型复杂度
|
||||||
|
|
||||||
|
`ChannelInfo` 支持 multi-key、轮询/随机、多 key 状态与禁用原因追踪:
|
||||||
|
|
||||||
|
证据:`model/channel.go:60-68`, `105-190`
|
||||||
|
|
||||||
|
结论:`new-api` 适合“快速接入多协议 + 高迭代需求”,但需要更强工程治理。
|
||||||
|
|
||||||
|
### 3.2.5 商用合规风险
|
||||||
|
|
||||||
|
License 明确 AGPLv3,并提示若组织不能接受需联系商业授权:
|
||||||
|
|
||||||
|
证据:`README.md:446-452`, `README.zh_CN.md:446-452`
|
||||||
|
|
||||||
|
结论:对闭源商用产品是显著法律约束点。
|
||||||
|
|
||||||
|
## 3.3 one-api(经典、简洁、低门槛)
|
||||||
|
|
||||||
|
### 3.3.1 网关结构与能力边界
|
||||||
|
|
||||||
|
`/v1` 路由具备 OpenAI 基础接口,但大量高级接口为 `RelayNotImplemented`:
|
||||||
|
|
||||||
|
证据:`router/relay.go:20-73`
|
||||||
|
|
||||||
|
### 3.3.2 分发策略
|
||||||
|
|
||||||
|
核心选择逻辑是“按 group+model 找可用通道 + 同优先级随机”:
|
||||||
|
|
||||||
|
- `CacheGetRandomSatisfiedChannel(...)`
|
||||||
|
- 同一优先级集合中 `rand.Intn`
|
||||||
|
|
||||||
|
证据:`middleware/distributor.go:45-59`, `model/cache.go:227-255`
|
||||||
|
|
||||||
|
### 3.3.3 计费模型
|
||||||
|
|
||||||
|
存在预扣返还与后扣逻辑,结构直接易懂:
|
||||||
|
|
||||||
|
证据:`relay/billing/billing.go:11-21`, `23-48`
|
||||||
|
|
||||||
|
### 3.3.4 商用注意
|
||||||
|
|
||||||
|
项目 LICENSE 文件是 MIT,但 README 对署名提出额外要求说明:
|
||||||
|
|
||||||
|
证据:`README.md:476-480`, `LICENSE`
|
||||||
|
|
||||||
|
结论:`one-api` 优势在“学习/部署快”,短板在“调度深度与企业治理能力”。
|
||||||
|
|
||||||
|
## 3.4 litellm(生态广、路由策略丰富、平台化强)
|
||||||
|
|
||||||
|
### 3.4.1 样本来源说明
|
||||||
|
|
||||||
|
本地分析样本来自 `litellm` 完整仓库代码(含 `tests/`、`docs/`、`enterprise/`、`proxy`)。
|
||||||
|
|
||||||
|
- 仓库 LICENSE 为 MIT
|
||||||
|
证据:`LICENSE`
|
||||||
|
|
||||||
|
### 3.4.2 Provider 与协议覆盖
|
||||||
|
|
||||||
|
Provider 列表非常长(含 OpenAI、Anthropic、Gemini、Bedrock、Azure、Groq、DeepSeek、OpenRouter 等):
|
||||||
|
|
||||||
|
- `LITELLM_CHAT_PROVIDERS` 列表
|
||||||
|
证据:`litellm/constants.py:479-568`
|
||||||
|
- OpenAI 兼容 provider 列表
|
||||||
|
证据:`litellm/constants.py:728-783`
|
||||||
|
|
||||||
|
Proxy 端点具备:
|
||||||
|
|
||||||
|
- `/v1/models`
|
||||||
|
- `/v1/chat/completions`
|
||||||
|
- `/v1/embeddings`
|
||||||
|
- `/v1/realtime` WebSocket
|
||||||
|
|
||||||
|
证据:
|
||||||
|
- `litellm/proxy/proxy_server.py:6479-6709`
|
||||||
|
- `.../proxy_server.py:7074-7098`
|
||||||
|
- `.../proxy_server.py:7691-7704`
|
||||||
|
|
||||||
|
### 3.4.3 路由与可靠性策略
|
||||||
|
|
||||||
|
Router 构造参数内建:
|
||||||
|
|
||||||
|
- retries / fallbacks / content-policy fallbacks
|
||||||
|
- 多路由策略(least-busy/usage/latency/cost)
|
||||||
|
- provider budget config
|
||||||
|
|
||||||
|
证据:`litellm/router.py:242-299`, `516-539`, `617-642`, `756-833`
|
||||||
|
|
||||||
|
并发与限速控制:
|
||||||
|
|
||||||
|
- 每部署 `max_parallel_requests` semaphore
|
||||||
|
- 预调用检查 RPM/TPM
|
||||||
|
|
||||||
|
证据:`litellm/router.py:2057-2075`
|
||||||
|
|
||||||
|
`usage-based-routing-v2` 的跨实例限速逻辑明确依赖 Redis 计数:
|
||||||
|
|
||||||
|
- 设计注释“Meant to work across instances”
|
||||||
|
- `increment_cache` + TTL
|
||||||
|
|
||||||
|
证据:`litellm/router_strategy/lowest_tpm_rpm_v2.py:33-43`, `115-119`
|
||||||
|
|
||||||
|
### 3.4.4 预算与费用治理
|
||||||
|
|
||||||
|
`RouterBudgetLimiting` 支持按:
|
||||||
|
|
||||||
|
- provider budget
|
||||||
|
- deployment budget
|
||||||
|
- request tag budget
|
||||||
|
|
||||||
|
过滤超预算部署:
|
||||||
|
|
||||||
|
证据:`litellm/router_strategy/budget_limiter.py:116-189`, `191-280`
|
||||||
|
|
||||||
|
Proxy 端有大量 spend/budget 管理组件与后台任务:
|
||||||
|
|
||||||
|
- spend 更新、budget 重置任务、spend log 清理
|
||||||
|
|
||||||
|
证据:`litellm/proxy/proxy_server.py:910-933`, `5758-5781`, `5915-5945`
|
||||||
|
|
||||||
|
### 3.4.5 平台化能力
|
||||||
|
|
||||||
|
`proxy_server.py` 中 `include_router` 规模很大,涵盖 key/team/budget/model/fallback/compliance/analytics/guardrails 等:
|
||||||
|
|
||||||
|
证据:`litellm/proxy/proxy_server.py:13249-13318`
|
||||||
|
|
||||||
|
结论:`litellm` 在“生态接入广度 + 平台化管理能力”上非常强,适合作为“路由内核或能力参考源”。
|
||||||
|
|
||||||
|
## 4. 竞品对比矩阵(技术+商用)
|
||||||
|
|
||||||
|
评分区间:1(弱)~5(强)
|
||||||
|
|
||||||
|
| 维度 | sub2api | new-api | one-api | litellm |
|
||||||
|
|---|---:|---:|---:|---:|
|
||||||
|
| 协议覆盖 | 4.5 | 5.0 | 3.0 | 5.0 |
|
||||||
|
| 调度与路由深度 | 5.0 | 4.0 | 2.5 | 4.5 |
|
||||||
|
| 计费一致性 | 4.5 | 4.5 | 3.5 | 4.5 |
|
||||||
|
| 多租户与权限治理 | 4.5 | 4.0 | 3.0 | 4.5 |
|
||||||
|
| 限流与抗滥用 | 4.5 | 4.0 | 3.0 | 4.0 |
|
||||||
|
| 可观测与运营能力 | 4.5 | 4.0 | 3.0 | 4.5 |
|
||||||
|
| 工程成熟度(代码/测试/配置) | 4.7 | 4.0 | 3.2 | 4.2 |
|
||||||
|
| License 商用友好度 | 4.5 (MIT) | 2.0 (AGPLv3) | 4.0 (MIT+README附加说明) | 4.5 (MIT) |
|
||||||
|
| 二开难度(低分=容易) | 2.5 | 3.0 | 4.5 | 2.8 |
|
||||||
|
|
||||||
|
解读:
|
||||||
|
|
||||||
|
1. 如果你要做“高可信商用中台”:`sub2api` 的调度与治理思路最值得吸收。
|
||||||
|
2. 如果你要做“快速多协议覆盖”:`new-api` 与 `litellm` 的路由广度更有参考价值。
|
||||||
|
3. 如果你要“极快 MVP”:`one-api` 仍是最低心智负担底座,但后续重构成本高。
|
||||||
|
4. `new-api` 的 AGPLv3 需尽早法务评估,不建议直接深度嵌入闭源核心。
|
||||||
|
|
||||||
|
## 5. 对你产品的可执行技术路线(90 天)
|
||||||
|
|
||||||
|
目标:构建“企业可采购”的通用 LLM 网关,不陷入纯转发同质化。
|
||||||
|
|
||||||
|
## 阶段 A(第 1-3 周):稳定底座
|
||||||
|
|
||||||
|
1. 统一北向协议:先收敛到 OpenAI + Anthropic + Gemini 三套核心接口。
|
||||||
|
2. 路由内核:实现 `fallback + retry + health + affinity` 四件套。
|
||||||
|
3. 计费一致性:上线 `pre-consume / settle / refund` 全链路,所有扣费具备 request_id 幂等键。
|
||||||
|
|
||||||
|
验收指标:
|
||||||
|
|
||||||
|
- P95 延迟、错误率、切换成功率可观测
|
||||||
|
- 扣费差错率 < 0.1%
|
||||||
|
|
||||||
|
## 阶段 B(第 4-8 周):成本与治理
|
||||||
|
|
||||||
|
1. 按租户/团队/API Key/标签做预算体系。
|
||||||
|
2. 上线“策略层”:模型白名单、限流、敏感词、回退策略模板化。
|
||||||
|
3. 控制面增加运营能力:账号池状态、异常封禁、手动降级开关。
|
||||||
|
|
||||||
|
验收指标:
|
||||||
|
|
||||||
|
- 预算超限拦截准确率 > 99.9%
|
||||||
|
- 故障演练中自动切换恢复时间 < 30s
|
||||||
|
|
||||||
|
## 阶段 C(第 9-12 周):企业化与差异化
|
||||||
|
|
||||||
|
1. 审计与合规:全链路审计日志、角色权限、密钥托管策略。
|
||||||
|
2. FinOps 面板:按团队/模型/供应商展示成本归因 + 优化建议。
|
||||||
|
3. 企业集成:Webhook、SIEM、账单导出、告警路由(Slack/飞书/钉钉)。
|
||||||
|
|
||||||
|
验收指标:
|
||||||
|
|
||||||
|
- 支持企业 PoC 的最小安全审计清单
|
||||||
|
- 提供“降本证明报表”(对比基线)
|
||||||
|
|
||||||
|
## 6. 关键风险与规避
|
||||||
|
|
||||||
|
1. **License 风险**:AGPLv3 组件若进入核心链路,闭源商用风险高。
|
||||||
|
规避:核心路径优先 MIT/Apache 组件,自研关键模块。
|
||||||
|
2. **转发同质化**:只做 API 兼容会被价格战挤压。
|
||||||
|
规避:把“成本优化 + 策略治理 + 合规审计”作为主卖点。
|
||||||
|
3. **计费可信度风险**:多模型多重试导致“账实不一致”。
|
||||||
|
规避:所有计费事件幂等化 + 对账作业 + 失败补偿队列。
|
||||||
|
4. **调度复杂度风险**:过早引入复杂路由导致系统不稳。
|
||||||
|
规避:先实现可解释策略,再逐步引入多指标打分。
|
||||||
|
|
||||||
|
## 7. 最终建议(针对你的项目定位)
|
||||||
|
|
||||||
|
结论性建议:
|
||||||
|
|
||||||
|
1. 参考 `sub2api` 的“调度中台化设计”作为核心能力蓝本。
|
||||||
|
2. 参考 `litellm` 的“多策略路由 + provider 生态接入”提升覆盖速度。
|
||||||
|
3. 参考 `new-api` 的协议覆盖清单,但避免 AGPL 风险进入闭源核心。
|
||||||
|
4. 不建议以 `one-api` 作为长期架构终态,可作为过渡 MVP 参考。
|
||||||
|
|
||||||
|
一句话战略:
|
||||||
|
|
||||||
|
**你的商用壁垒不在“接了多少模型”,而在“企业如何可控地、可审计地、持续降本地用模型”。**
|
||||||
217
docs/llm_gateway_prd_v0_2026-03-16.md
Normal file
217
docs/llm_gateway_prd_v0_2026-03-16.md
Normal file
@@ -0,0 +1,217 @@
|
|||||||
|
# 商用 LLM 通用转发网关 PRD v0(产品版)
|
||||||
|
|
||||||
|
> 状态说明:本文件为历史评审稿,当前生效版本为 `llm_gateway_prd_v1_2026-03-25.md`。
|
||||||
|
|
||||||
|
- 版本:v0.1(评审稿)
|
||||||
|
- 日期:2026-03-16
|
||||||
|
- 文档类型:产品需求文档(不含实现细节)
|
||||||
|
- 关联文档:
|
||||||
|
- `llm_gateway_competitor_function_business_2026-03-16.md`
|
||||||
|
- `llm_gateway_product_strategy_roadmap_2026-03-16.md`
|
||||||
|
|
||||||
|
## 1. 背景与问题定义
|
||||||
|
|
||||||
|
## 1.1 背景
|
||||||
|
|
||||||
|
企业与团队正在同时使用多个 LLM 供应商,常见问题是:
|
||||||
|
|
||||||
|
1. 调用入口分散,难以统一管理
|
||||||
|
2. 成本快速增长但不可解释
|
||||||
|
3. 供应商波动导致业务不稳定
|
||||||
|
4. 预算、权限、审计缺少统一控制面
|
||||||
|
|
||||||
|
## 1.2 目标问题
|
||||||
|
|
||||||
|
构建一个统一网关产品,帮助客户完成:
|
||||||
|
|
||||||
|
1. 统一接入(降低研发复杂度)
|
||||||
|
2. 统一路由(提升可用性与效率)
|
||||||
|
3. 统一治理(预算、权限、审计)
|
||||||
|
4. 统一经营(成本归因、告警、账单)
|
||||||
|
|
||||||
|
## 1.3 非目标(v0 阶段)
|
||||||
|
|
||||||
|
1. 不做模型训练与微调平台
|
||||||
|
2. 不做复杂 Agent 工作流编排
|
||||||
|
3. 不做行业化应用层模板市场
|
||||||
|
|
||||||
|
## 2. 用户画像与 JTBD
|
||||||
|
|
||||||
|
## 2.1 角色画像
|
||||||
|
|
||||||
|
1. 平台管理员(Admin)
|
||||||
|
- 负责组织级策略、预算、权限
|
||||||
|
2. AI 应用开发者(Developer)
|
||||||
|
- 负责接入模型与业务落地
|
||||||
|
3. 财务/运营负责人(Ops/FinOps)
|
||||||
|
- 负责成本追踪、对账与预算控制
|
||||||
|
|
||||||
|
## 2.2 JTBD
|
||||||
|
|
||||||
|
1. 作为平台管理员,我要在一个控制台里管理团队与 key,确保调用可控。
|
||||||
|
2. 作为开发者,我要用统一 API 调用多个模型,减少改造成本。
|
||||||
|
3. 作为运营负责人,我要按团队/项目看到成本和异常,便于追责与优化。
|
||||||
|
|
||||||
|
## 3. 价值主张与成功标准
|
||||||
|
|
||||||
|
## 3.1 价值主张
|
||||||
|
|
||||||
|
1. 更简单:统一 API 和统一控制台
|
||||||
|
2. 更可控:预算、限流、回退、审计
|
||||||
|
3. 更省钱:可追踪、可归因、可优化
|
||||||
|
|
||||||
|
## 3.2 成功标准(产品层)
|
||||||
|
|
||||||
|
1. 首次接入时间:新团队 1 天内完成接入
|
||||||
|
2. 成本可解释率:90%+ 调用可归因到团队/项目
|
||||||
|
3. 预算风控有效率:预算超限前告警命中率 95%+
|
||||||
|
4. 月活团队留存:30 天留存 >= 40%
|
||||||
|
|
||||||
|
## 4. 需求范围与优先级
|
||||||
|
|
||||||
|
## 4.1 P0(首发必须)
|
||||||
|
|
||||||
|
1. 统一 API 接入
|
||||||
|
- OpenAI 兼容调用
|
||||||
|
- 模型目录展示与选择
|
||||||
|
2. 基础路由与稳定性
|
||||||
|
- 多 provider 负载与 fallback
|
||||||
|
- 请求重试与错误可见
|
||||||
|
3. 身份与密钥管理
|
||||||
|
- 组织、团队、API Key 管理
|
||||||
|
- Key 状态与权限控制
|
||||||
|
4. 预算与配额
|
||||||
|
- 团队级预算阈值
|
||||||
|
- 基础限流策略
|
||||||
|
5. 成本看板
|
||||||
|
- 按团队/模型/时间维度查看调用成本
|
||||||
|
6. 告警与通知
|
||||||
|
- 预算、错误率、异常波动告警
|
||||||
|
7. 账单导出
|
||||||
|
- 周期账单汇总与 CSV 导出
|
||||||
|
|
||||||
|
## 4.2 P1(3-6 个月)
|
||||||
|
|
||||||
|
1. 多角色权限(管理员、开发者、只读)
|
||||||
|
2. 审计日志(策略与 key 变更)
|
||||||
|
3. 项目级成本归因
|
||||||
|
4. 路由策略模板(按场景)
|
||||||
|
5. 可观测增强(失败聚类、异常洞察)
|
||||||
|
|
||||||
|
## 4.3 P2(6-12 个月)
|
||||||
|
|
||||||
|
1. 企业身份集成(SSO/SAML/OIDC)
|
||||||
|
2. 合规能力包(审计报表、策略模板)
|
||||||
|
3. 更长周期账务与财务对接
|
||||||
|
4. 生态集成(工单/告警/数据平台)
|
||||||
|
|
||||||
|
## 5. 核心用户流程
|
||||||
|
|
||||||
|
## 5.1 流程 1:组织开通与初始化
|
||||||
|
|
||||||
|
1. 管理员创建组织
|
||||||
|
2. 创建团队与成员
|
||||||
|
3. 设置默认预算与告警阈值
|
||||||
|
4. 生成首个 API Key
|
||||||
|
|
||||||
|
验收标准:首次开通流程在 30 分钟内可完成。
|
||||||
|
|
||||||
|
## 5.2 流程 2:开发者接入调用
|
||||||
|
|
||||||
|
1. 开发者获取组织 API Key
|
||||||
|
2. 替换 base URL 与 key 完成调用
|
||||||
|
3. 在控制台查看请求结果与成本
|
||||||
|
|
||||||
|
验收标准:开发者无需修改业务协议即可发起调用。
|
||||||
|
|
||||||
|
## 5.3 流程 3:预算与告警
|
||||||
|
|
||||||
|
1. 管理员设置预算阈值
|
||||||
|
2. 系统实时监测并触发告警
|
||||||
|
3. 管理员查看告警上下文并调整策略
|
||||||
|
|
||||||
|
验收标准:阈值触发后 1 分钟内完成通知。
|
||||||
|
|
||||||
|
## 5.4 流程 4:账单与复盘
|
||||||
|
|
||||||
|
1. 运营负责人按周期查看账单
|
||||||
|
2. 按团队/模型拆分成本
|
||||||
|
3. 导出账单用于财务核对
|
||||||
|
|
||||||
|
验收标准:账单数据和调用数据一致性可验证。
|
||||||
|
|
||||||
|
## 6. 关键产品规则
|
||||||
|
|
||||||
|
1. 所有请求必须可追踪到组织与团队。
|
||||||
|
2. 预算阈值触发后必须有可操作建议(降级/限流/告警升级)。
|
||||||
|
3. 用户可在同一控制台查看“调用结果 + 成本 + 告警”。
|
||||||
|
4. 策略变更必须可审计(谁、何时、改了什么)。
|
||||||
|
|
||||||
|
## 7. 指标体系(KPI)
|
||||||
|
|
||||||
|
## 7.1 增长指标
|
||||||
|
|
||||||
|
1. 新增组织数
|
||||||
|
2. 激活团队数(周活/月活)
|
||||||
|
3. 试用转付费转化率
|
||||||
|
|
||||||
|
## 7.2 价值指标
|
||||||
|
|
||||||
|
1. 受管调用量
|
||||||
|
2. 预算告警命中率
|
||||||
|
3. 成本归因覆盖率
|
||||||
|
|
||||||
|
## 7.3 商业指标
|
||||||
|
|
||||||
|
1. MRR
|
||||||
|
2. 付费客户数
|
||||||
|
3. 续费率/扩容率
|
||||||
|
|
||||||
|
## 8. 发布计划
|
||||||
|
|
||||||
|
## 8.1 里程碑
|
||||||
|
|
||||||
|
1. Alpha(内部可用)
|
||||||
|
- 完成 P0 主流程闭环
|
||||||
|
2. Beta(设计合作伙伴)
|
||||||
|
- 引入 5-10 家团队试用
|
||||||
|
3. GA(正式发布)
|
||||||
|
- 推出 Free/Growth 套餐
|
||||||
|
|
||||||
|
## 8.2 发布门槛(Go/No-Go)
|
||||||
|
|
||||||
|
1. 关键流程无阻塞(开通、调用、预算、账单)
|
||||||
|
2. 数据可追踪与可归因达标
|
||||||
|
3. 客户试用反馈达到最低满意度阈值(内部定义)
|
||||||
|
|
||||||
|
## 9. 依赖与风险
|
||||||
|
|
||||||
|
## 9.1 关键依赖
|
||||||
|
|
||||||
|
1. 供应商接入稳定性
|
||||||
|
2. 计费与账单口径统一
|
||||||
|
3. 告警渠道与通知系统稳定
|
||||||
|
|
||||||
|
## 9.2 主要风险
|
||||||
|
|
||||||
|
1. 功能边界过宽导致首发延期
|
||||||
|
2. 缺少真实客户数据导致价值表达不足
|
||||||
|
3. 定价方案与客户价值感知不匹配
|
||||||
|
|
||||||
|
## 9.3 风险缓解
|
||||||
|
|
||||||
|
1. 坚持 P0 边界,P1/P2 延后
|
||||||
|
2. 以设计合作伙伴反馈驱动迭代
|
||||||
|
3. 发布前完成小规模定价验证访谈
|
||||||
|
|
||||||
|
## 10. 待决策问题(评审会需拍板)
|
||||||
|
|
||||||
|
1. 首发主打“成本治理”还是“治理合规”?
|
||||||
|
2. Growth 套餐核心计价指标采用“请求量”还是“受管成本”?
|
||||||
|
3. 企业版首批必须包含的三项能力是什么?
|
||||||
|
|
||||||
|
## 11. 术语表
|
||||||
|
|
||||||
|
1. 受管成本:通过本平台网关记录并可归因的模型调用成本。
|
||||||
|
2. 预算账户:具备独立预算阈值与告警规则的组织单元。
|
||||||
|
3. 成本归因:将调用成本映射到组织/团队/项目/模型维度。
|
||||||
231
docs/llm_gateway_prd_v1_2026-03-25.md
Normal file
231
docs/llm_gateway_prd_v1_2026-03-25.md
Normal file
@@ -0,0 +1,231 @@
|
|||||||
|
# 商用 LLM 通用转发网关 PRD v1(冻结版)
|
||||||
|
|
||||||
|
- 版本:v1.0(冻结稿)
|
||||||
|
- 日期:2026-03-25
|
||||||
|
- 文档类型:产品需求文档(冻结版,含执行映射)
|
||||||
|
- 关联文档:
|
||||||
|
- `llm_gateway_competitor_function_business_2026-03-16.md`
|
||||||
|
- `llm_gateway_product_strategy_roadmap_2026-03-16.md`
|
||||||
|
- `llm_gateway_subapi_evolution_plan_v4_2_2026-03-24.md`
|
||||||
|
- `supply_button_level_prd_v1_2026-03-25.md`
|
||||||
|
- `supply_api_contract_openapi_draft_v1_2026-03-25.yaml`
|
||||||
|
- `supply_ui_test_cases_executable_v1_2026-03-25.md`
|
||||||
|
|
||||||
|
## 1. 背景与问题定义
|
||||||
|
|
||||||
|
## 1.1 背景
|
||||||
|
|
||||||
|
企业与团队正在同时使用多个 LLM 供应商,常见问题是:
|
||||||
|
|
||||||
|
1. 调用入口分散,难以统一管理
|
||||||
|
2. 成本快速增长但不可解释
|
||||||
|
3. 供应商波动导致业务不稳定
|
||||||
|
4. 预算、权限、审计缺少统一控制面
|
||||||
|
|
||||||
|
## 1.2 目标问题
|
||||||
|
|
||||||
|
构建一个统一网关产品,帮助客户完成:
|
||||||
|
|
||||||
|
1. 统一接入(降低研发复杂度)
|
||||||
|
2. 统一路由(提升可用性与效率)
|
||||||
|
3. 统一治理(预算、权限、审计)
|
||||||
|
4. 统一经营(成本归因、告警、账单)
|
||||||
|
|
||||||
|
## 1.3 非目标(v0 阶段)
|
||||||
|
|
||||||
|
1. 不做模型训练与微调平台
|
||||||
|
2. 不做复杂 Agent 工作流编排
|
||||||
|
3. 不做行业化应用层模板市场
|
||||||
|
|
||||||
|
## 2. 用户画像与 JTBD
|
||||||
|
|
||||||
|
## 2.1 角色画像
|
||||||
|
|
||||||
|
1. 平台管理员(Admin)
|
||||||
|
- 负责组织级策略、预算、权限
|
||||||
|
2. AI 应用开发者(Developer)
|
||||||
|
- 负责接入模型与业务落地
|
||||||
|
3. 财务/运营负责人(Ops/FinOps)
|
||||||
|
- 负责成本追踪、对账与预算控制
|
||||||
|
|
||||||
|
## 2.2 JTBD
|
||||||
|
|
||||||
|
1. 作为平台管理员,我要在一个控制台里管理团队与 key,确保调用可控。
|
||||||
|
2. 作为开发者,我要用统一 API 调用多个模型,减少改造成本。
|
||||||
|
3. 作为运营负责人,我要按团队/项目看到成本和异常,便于追责与优化。
|
||||||
|
|
||||||
|
## 3. 价值主张与成功标准
|
||||||
|
|
||||||
|
## 3.1 价值主张
|
||||||
|
|
||||||
|
1. 更简单:统一 API 和统一控制台
|
||||||
|
2. 更可控:预算、限流、回退、审计
|
||||||
|
3. 更省钱:可追踪、可归因、可优化
|
||||||
|
|
||||||
|
## 3.2 成功标准(产品层)
|
||||||
|
|
||||||
|
1. 首次接入时间:新团队 1 天内完成接入
|
||||||
|
2. 成本可解释率:90%+ 调用可归因到团队/项目
|
||||||
|
3. 预算风控有效率:预算超限前告警命中率 95%+
|
||||||
|
4. 月活团队留存:30 天留存 >= 40%
|
||||||
|
|
||||||
|
## 4. 需求范围与优先级
|
||||||
|
|
||||||
|
## 4.1 P0(首发必须)
|
||||||
|
|
||||||
|
1. 统一 API 接入
|
||||||
|
- OpenAI 兼容调用
|
||||||
|
- 模型目录展示与选择
|
||||||
|
2. 基础路由与稳定性
|
||||||
|
- 多 provider 负载与 fallback
|
||||||
|
- 请求重试与错误可见
|
||||||
|
3. 身份与密钥管理
|
||||||
|
- 组织、团队、API Key 管理
|
||||||
|
- Key 状态与权限控制
|
||||||
|
4. 预算与配额
|
||||||
|
- 团队级预算阈值
|
||||||
|
- 基础限流策略
|
||||||
|
5. 成本看板
|
||||||
|
- 按团队/模型/时间维度查看调用成本
|
||||||
|
6. 告警与通知
|
||||||
|
- 预算、错误率、异常波动告警
|
||||||
|
7. 账单导出
|
||||||
|
- 周期账单汇总与 CSV 导出
|
||||||
|
|
||||||
|
## 4.2 P1(3-6 个月)
|
||||||
|
|
||||||
|
1. 多角色权限(管理员、开发者、只读)
|
||||||
|
2. 审计日志(策略与 key 变更)
|
||||||
|
3. 项目级成本归因
|
||||||
|
4. 路由策略模板(按场景)
|
||||||
|
5. 可观测增强(失败聚类、异常洞察)
|
||||||
|
|
||||||
|
## 4.3 P2(6-12 个月)
|
||||||
|
|
||||||
|
1. 企业身份集成(SSO/SAML/OIDC)
|
||||||
|
2. 合规能力包(审计报表、策略模板)
|
||||||
|
3. 更长周期账务与财务对接
|
||||||
|
4. 生态集成(工单/告警/数据平台)
|
||||||
|
|
||||||
|
## 5. 核心用户流程
|
||||||
|
|
||||||
|
## 5.1 流程 1:组织开通与初始化
|
||||||
|
|
||||||
|
1. 管理员创建组织
|
||||||
|
2. 创建团队与成员
|
||||||
|
3. 设置默认预算与告警阈值
|
||||||
|
4. 生成首个 API Key
|
||||||
|
|
||||||
|
验收标准:首次开通流程在 30 分钟内可完成。
|
||||||
|
|
||||||
|
## 5.2 流程 2:开发者接入调用
|
||||||
|
|
||||||
|
1. 开发者获取组织 API Key
|
||||||
|
2. 替换 base URL 与 key 完成调用
|
||||||
|
3. 在控制台查看请求结果与成本
|
||||||
|
|
||||||
|
验收标准:开发者无需修改业务协议即可发起调用。
|
||||||
|
|
||||||
|
## 5.3 流程 3:预算与告警
|
||||||
|
|
||||||
|
1. 管理员设置预算阈值
|
||||||
|
2. 系统实时监测并触发告警
|
||||||
|
3. 管理员查看告警上下文并调整策略
|
||||||
|
|
||||||
|
验收标准:阈值触发后 1 分钟内完成通知。
|
||||||
|
|
||||||
|
## 5.4 流程 4:账单与复盘
|
||||||
|
|
||||||
|
1. 运营负责人按周期查看账单
|
||||||
|
2. 按团队/模型拆分成本
|
||||||
|
3. 导出账单用于财务核对
|
||||||
|
|
||||||
|
验收标准:账单数据和调用数据一致性可验证。
|
||||||
|
|
||||||
|
## 6. 关键产品规则
|
||||||
|
|
||||||
|
1. 所有请求必须可追踪到组织与团队。
|
||||||
|
2. 预算阈值触发后必须有可操作建议(降级/限流/告警升级)。
|
||||||
|
3. 用户可在同一控制台查看“调用结果 + 成本 + 告警”。
|
||||||
|
4. 策略变更必须可审计(谁、何时、改了什么)。
|
||||||
|
|
||||||
|
## 7. 指标体系(KPI)
|
||||||
|
|
||||||
|
## 7.1 增长指标
|
||||||
|
|
||||||
|
1. 新增组织数
|
||||||
|
2. 激活团队数(周活/月活)
|
||||||
|
3. 试用转付费转化率
|
||||||
|
|
||||||
|
## 7.2 价值指标
|
||||||
|
|
||||||
|
1. 受管调用量
|
||||||
|
2. 预算告警命中率
|
||||||
|
3. 成本归因覆盖率
|
||||||
|
|
||||||
|
## 7.3 商业指标
|
||||||
|
|
||||||
|
1. MRR
|
||||||
|
2. 付费客户数
|
||||||
|
3. 续费率/扩容率
|
||||||
|
|
||||||
|
## 8. 发布计划
|
||||||
|
|
||||||
|
## 8.1 里程碑
|
||||||
|
|
||||||
|
1. Alpha(内部可用)
|
||||||
|
- 完成 P0 主流程闭环
|
||||||
|
2. Beta(设计合作伙伴)
|
||||||
|
- 引入 5-10 家团队试用
|
||||||
|
3. GA(正式发布)
|
||||||
|
- 推出 Free/Growth 套餐
|
||||||
|
|
||||||
|
## 8.2 发布门槛(Go/No-Go)
|
||||||
|
|
||||||
|
1. 关键流程无阻塞(开通、调用、预算、账单)
|
||||||
|
2. 数据可追踪与可归因达标
|
||||||
|
3. 客户试用反馈达到最低满意度阈值(内部定义)
|
||||||
|
|
||||||
|
## 9. 依赖与风险
|
||||||
|
|
||||||
|
## 9.1 关键依赖
|
||||||
|
|
||||||
|
1. 供应商接入稳定性
|
||||||
|
2. 计费与账单口径统一
|
||||||
|
3. 告警渠道与通知系统稳定
|
||||||
|
|
||||||
|
## 9.2 主要风险
|
||||||
|
|
||||||
|
1. 功能边界过宽导致首发延期
|
||||||
|
2. 缺少真实客户数据导致价值表达不足
|
||||||
|
3. 定价方案与客户价值感知不匹配
|
||||||
|
|
||||||
|
## 9.3 风险缓解
|
||||||
|
|
||||||
|
1. 坚持 P0 边界,P1/P2 延后
|
||||||
|
2. 以设计合作伙伴反馈驱动迭代
|
||||||
|
3. 发布前完成小规模定价验证访谈
|
||||||
|
|
||||||
|
## 10. 已冻结决策(v1.0 生效)
|
||||||
|
|
||||||
|
1. 首发主轴:`成本治理 + 合规可审计` 双主轴并行,不做二选一。
|
||||||
|
2. Growth 套餐核心计价指标:以“受管成本”为主,请求量作为辅助观测指标。
|
||||||
|
3. 企业版首批必含能力(三项):
|
||||||
|
- SSO/SAML/OIDC 企业身份接入
|
||||||
|
- 审计报表与策略留痕导出
|
||||||
|
- 账务争议 SLA 与补偿闭环
|
||||||
|
4. 凭证边界:用户A仅向平台供给上游凭证,用户B仅使用平台凭证,不得获取上游凭证。
|
||||||
|
|
||||||
|
## 11. 需求到执行映射(P0)
|
||||||
|
|
||||||
|
| Requirement ID | 需求 | 接口契约 | 测试用例 | 门禁指标 |
|
||||||
|
|---|---|---|---|---|
|
||||||
|
| PRD-P0-01 | 供应账号挂载与验证 | `POST /api/v1/supply/accounts/verify`、`POST /api/v1/supply/accounts` | `UI-SUP-ACC-001~006` | M-013/M-014 |
|
||||||
|
| PRD-P0-02 | 套餐发布与上下架 | `POST /api/v1/supply/packages/*` | `UI-SUP-PKG-001~006` | M-011 |
|
||||||
|
| PRD-P0-03 | 收益结算与提现 | `POST /api/v1/supply/settlements/withdraw` 等 | `UI-SUP-SET-001~005` | 账务差错率 M-004 |
|
||||||
|
| PRD-P0-04 | 凭证边界强制约束 | 鉴权与导出相关全部接口 | `SEC-SUP-001~002`、`CB-001~004` | M-013~M-016 |
|
||||||
|
## 12. 术语表
|
||||||
|
|
||||||
|
1. 受管成本:通过本平台网关记录并可归因的模型调用成本。
|
||||||
|
2. 预算账户:具备独立预算阈值与告警规则的组织单元。
|
||||||
|
3. 成本归因:将调用成本映射到组织/团队/项目/模型维度。
|
||||||
234
docs/llm_gateway_product_strategy_roadmap_2026-03-16.md
Normal file
234
docs/llm_gateway_product_strategy_roadmap_2026-03-16.md
Normal file
@@ -0,0 +1,234 @@
|
|||||||
|
# 商用 LLM 通用转发网关产品战略与路线图(2026)
|
||||||
|
|
||||||
|
- 版本:v1.0(可评审版)
|
||||||
|
- 日期:2026-03-16
|
||||||
|
- 阶段:市场与产品规划(非实现设计)
|
||||||
|
|
||||||
|
## 1. 北极星与战略目标
|
||||||
|
|
||||||
|
## 1.1 北极星指标(North Star)
|
||||||
|
|
||||||
|
`月度受管 Token 成本(GMV)` 与 `受管预算账户数` 的同步增长。
|
||||||
|
|
||||||
|
原因:
|
||||||
|
|
||||||
|
1. 受管成本代表平台真实业务控制面价值。
|
||||||
|
2. 预算账户数代表组织级渗透,而非单一开发者调用。
|
||||||
|
|
||||||
|
## 1.2 12 个月战略目标
|
||||||
|
|
||||||
|
1. 成为“企业 AI 成本与治理控制面”而非“转发工具”。
|
||||||
|
2. 完成从“开发者采用”到“组织采购”的路径打通。
|
||||||
|
3. 形成“自助增长 + 企业合同”双引擎收入结构。
|
||||||
|
|
||||||
|
## 2. ICP(理想客户画像)分层
|
||||||
|
|
||||||
|
## 2.1 ICP-A:成长型 AI 产品团队(优先级 P1)
|
||||||
|
|
||||||
|
- 规模:20-200 人,已有线上 AI 功能。
|
||||||
|
- 核心痛点:
|
||||||
|
1. 多模型调用杂乱、成本不可控
|
||||||
|
2. 团队无统一预算与归因
|
||||||
|
3. 模型故障导致线上波动
|
||||||
|
- 采购特征:技术负责人主导,自助试用后小单订阅。
|
||||||
|
- 价值主张:`一周内上线统一治理,30 天看到成本可解释与可下降`。
|
||||||
|
|
||||||
|
## 2.2 ICP-B:中大型企业平台团队(优先级 P2)
|
||||||
|
|
||||||
|
- 规模:500+ 人,跨 BU 使用 AI。
|
||||||
|
- 核心痛点:
|
||||||
|
1. 合规与审计要求高
|
||||||
|
2. 采购与安全审批严格
|
||||||
|
3. 各团队各自接模型,治理碎片化
|
||||||
|
- 采购特征:安全/平台/采购多方决策,周期长。
|
||||||
|
- 价值主张:`统一策略、统一审计、统一成本责任`。
|
||||||
|
|
||||||
|
## 2.3 ICP-C:服务商/集成商(优先级 P3)
|
||||||
|
|
||||||
|
- 规模:项目制交付团队。
|
||||||
|
- 核心痛点:
|
||||||
|
1. 多客户多环境重复搭建
|
||||||
|
2. 成本归因与账单拆分复杂
|
||||||
|
- 采购特征:以项目交付驱动。
|
||||||
|
- 价值主张:`标准化交付模板 + 多租户经营后台`。
|
||||||
|
|
||||||
|
## 3. 产品定位与价值主张
|
||||||
|
|
||||||
|
## 3.1 定位陈述
|
||||||
|
|
||||||
|
“面向企业与 AI 团队的 LLM 治理控制面:统一接入、动态路由、预算审计、合规可追溯。”
|
||||||
|
|
||||||
|
## 3.2 三层价值
|
||||||
|
|
||||||
|
1. 业务价值:可控成本、稳定 SLA、减少故障损失。
|
||||||
|
2. 管理价值:预算与责任到团队/项目可追溯。
|
||||||
|
3. 组织价值:支持安全、财务、平台多角色协同。
|
||||||
|
|
||||||
|
## 4. 产品版本路线图(0-12 月)
|
||||||
|
|
||||||
|
## 4.1 M0(0-3 个月):可售卖的最小商业版本
|
||||||
|
|
||||||
|
目标:拿下首批 5-10 家付费客户。
|
||||||
|
|
||||||
|
P0 能力:
|
||||||
|
|
||||||
|
1. 统一 API 接入与模型目录
|
||||||
|
2. 基础路由(负载 + fallback + 限流)
|
||||||
|
3. 多租户 key 管理
|
||||||
|
4. 预算阈值告警
|
||||||
|
5. 成本归因(租户/团队/模型)
|
||||||
|
6. 运营看板与基础账单导出
|
||||||
|
|
||||||
|
商业动作:
|
||||||
|
|
||||||
|
1. 推出 `Free + Growth` 双层计划
|
||||||
|
2. 建立 2 周 PoC 模板
|
||||||
|
3. 签下首个设计合作伙伴(Design Partner)
|
||||||
|
|
||||||
|
里程碑(业务):
|
||||||
|
|
||||||
|
1. 激活团队数 >= 30
|
||||||
|
2. 付费团队数 >= 5
|
||||||
|
3. 30 天留存 >= 40%
|
||||||
|
|
||||||
|
## 4.2 M1(3-6 个月):从“可用”到“可控”
|
||||||
|
|
||||||
|
目标:证明“治理能力”可替代自建。
|
||||||
|
|
||||||
|
P1 能力:
|
||||||
|
|
||||||
|
1. 组织管理(多团队权限)
|
||||||
|
2. 审计日志与策略变更历史
|
||||||
|
3. 更细粒度预算(项目/环境)
|
||||||
|
4. 模型策略模板(按场景)
|
||||||
|
5. 可观测增强(错误归因、异常趋势)
|
||||||
|
|
||||||
|
商业动作:
|
||||||
|
|
||||||
|
1. 上线 `Enterprise` 预售包(年约)
|
||||||
|
2. 引入 1-2 个行业模板(如客服/代码助手)
|
||||||
|
3. 建立渠道合作(咨询/集成商)
|
||||||
|
|
||||||
|
里程碑(业务):
|
||||||
|
|
||||||
|
1. 付费客户 >= 20
|
||||||
|
2. MRR 达到目标区间(内部设定)
|
||||||
|
3. NRR >= 100%
|
||||||
|
|
||||||
|
## 4.3 M2(6-12 个月):企业化与规模化
|
||||||
|
|
||||||
|
目标:从产品增长走向组织级采购。
|
||||||
|
|
||||||
|
P2 能力:
|
||||||
|
|
||||||
|
1. 企业身份集成(SSO/SAML/OIDC)
|
||||||
|
2. 合规能力包(审计报表、数据策略)
|
||||||
|
3. 多环境治理(dev/staging/prod)
|
||||||
|
4. 财务对账增强(账单分摊、异常单追踪)
|
||||||
|
5. 生态集成(主流观测/告警/工单系统)
|
||||||
|
|
||||||
|
商业动作:
|
||||||
|
|
||||||
|
1. 建立企业销售漏斗(线索 -> PoC -> 合同)
|
||||||
|
2. 发布客户案例与 ROI 模板
|
||||||
|
3. 推动行业纵深(2 个重点行业)
|
||||||
|
|
||||||
|
里程碑(业务):
|
||||||
|
|
||||||
|
1. 年约客户 >= 8
|
||||||
|
2. 企业客户续约率 >= 85%
|
||||||
|
3. 头部客户单客 ARR 达标(内部目标)
|
||||||
|
|
||||||
|
## 5. 商业化策略(Packaging & Pricing)
|
||||||
|
|
||||||
|
## 5.1 建议套餐结构
|
||||||
|
|
||||||
|
1. Free(开发者试用)
|
||||||
|
- 目标:降低上手门槛
|
||||||
|
- 价值:快速验证接入与基础路由
|
||||||
|
2. Growth(团队订阅)
|
||||||
|
- 目标:承接可持续增长
|
||||||
|
- 价值:预算治理、告警、看板、协作
|
||||||
|
3. Enterprise(合同年约)
|
||||||
|
- 目标:构建高毛利与稳定收入
|
||||||
|
- 价值:合规、审计、私有化/专属支持、SLA
|
||||||
|
|
||||||
|
## 5.2 定价指标建议
|
||||||
|
|
||||||
|
1. 主指标:组织级调用规模(请求量/受管成本)
|
||||||
|
2. 辅指标:团队数量、日志保留、企业功能包
|
||||||
|
3. 禁忌:只按“基础转发”计价,容易陷入价格战
|
||||||
|
|
||||||
|
## 5.3 收入结构目标
|
||||||
|
|
||||||
|
1. 0-6 个月:订阅收入占主导
|
||||||
|
2. 6-12 个月:企业合同占比提升到可持续区间
|
||||||
|
3. 保留 BYOK 路径,降低采购与合规阻力
|
||||||
|
|
||||||
|
## 6. GTM(Go-To-Market)与渠道
|
||||||
|
|
||||||
|
## 6.1 获取路径
|
||||||
|
|
||||||
|
1. 开发者自助(文档 + demo + 免费版)
|
||||||
|
2. 社区传播(开源生态、技术内容)
|
||||||
|
3. 设计合作伙伴(案例驱动)
|
||||||
|
|
||||||
|
## 6.2 转化路径
|
||||||
|
|
||||||
|
1. PQL(产品合格线索)识别:
|
||||||
|
- 多团队使用
|
||||||
|
- 出现预算告警
|
||||||
|
- 月调用规模超过阈值
|
||||||
|
2. 从自助到销售介入:
|
||||||
|
- 提供 PoC 计划与迁移支持
|
||||||
|
3. 从团队订阅到企业年约:
|
||||||
|
- 引入安全/财务角色
|
||||||
|
|
||||||
|
## 6.3 护城河建设方向
|
||||||
|
|
||||||
|
1. 成本治理算法与策略模板
|
||||||
|
2. 行业合规模板与审计资产
|
||||||
|
3. 运营数据资产(跨模型性能/成本基线)
|
||||||
|
|
||||||
|
## 7. 关键经营指标(非技术 KPI)
|
||||||
|
|
||||||
|
## 7.1 增长指标
|
||||||
|
|
||||||
|
1. 激活组织数(WAU/MAU)
|
||||||
|
2. 新增付费转化率
|
||||||
|
3. 试用到付费周期
|
||||||
|
|
||||||
|
## 7.2 留存与扩张指标
|
||||||
|
|
||||||
|
1. 30/90 天留存
|
||||||
|
2. NRR、扩容率
|
||||||
|
3. 团队内席位渗透率
|
||||||
|
|
||||||
|
## 7.3 经营健康指标
|
||||||
|
|
||||||
|
1. CAC 回收周期
|
||||||
|
2. 毛利率(按套餐)
|
||||||
|
3. 企业赢单率与销售周期
|
||||||
|
|
||||||
|
## 8. 主要风险与缓解策略
|
||||||
|
|
||||||
|
| 风险 | 影响 | 缓解策略 |
|
||||||
|
|---|---|---|
|
||||||
|
| 基础转发同质化 | 价格战、利润被压缩 | 强化治理和合规价值,弱化“转发”叙事 |
|
||||||
|
| 开源替代快速迭代 | 免费替代压力 | 走“开源友好 + 企业升级”路线 |
|
||||||
|
| 大厂平台挤压 | 客户被云厂商锁定 | 强调跨云中立与统一治理 |
|
||||||
|
| 企业采购周期长 | 现金流压力 | 先抓成长型团队,形成自助现金流 |
|
||||||
|
| 合规诉求复杂 | 交付风险高 | 模板化合规能力与标准化交付 |
|
||||||
|
|
||||||
|
## 9. 三个战略决策点(需管理层拍板)
|
||||||
|
|
||||||
|
1. **定价主轴**:以“请求量”为主,还是以“受管成本”为主。
|
||||||
|
2. **客户优先级**:先做成长型 AI 团队,还是直接冲企业平台团队。
|
||||||
|
3. **首发边界**:首发强调“成本治理”还是“合规治理”。
|
||||||
|
|
||||||
|
## 10. 本文档与下游文档关系
|
||||||
|
|
||||||
|
1. 本文档回答“做什么、先做哪类客户、怎么商业化”。
|
||||||
|
2. `PRD v0` 将把路线图拆解为可验收需求。
|
||||||
|
3. 技术蓝图应在 PRD 评审通过后再细化。
|
||||||
|
|
||||||
318
docs/llm_gateway_product_technical_blueprint_v1_2026-03-16.md
Normal file
318
docs/llm_gateway_product_technical_blueprint_v1_2026-03-16.md
Normal file
@@ -0,0 +1,318 @@
|
|||||||
|
# 商用 LLM 通用转发网关技术蓝图(v1)
|
||||||
|
|
||||||
|
- 阶段备注:本文件为阶段性技术草案,需在 `PRD v0` 与产品路线图评审通过后,再进入正式技术评审与实现排期。
|
||||||
|
- 版本:v1.0
|
||||||
|
- 日期:2026-03-16
|
||||||
|
- 适用阶段:0 到 1 立项与首版工程落地
|
||||||
|
- 目标读者:CTO、架构师、后端负责人、平台工程团队
|
||||||
|
|
||||||
|
## 1. 产品目标与范围
|
||||||
|
|
||||||
|
## 1.1 产品目标
|
||||||
|
|
||||||
|
构建一个可商用的多模型网关平台,向企业与开发者提供:
|
||||||
|
|
||||||
|
1. 统一接入:屏蔽上游 LLM 厂商差异,统一 OpenAI/Anthropic/Gemini 入口。
|
||||||
|
2. 智能路由:基于成本、质量、延迟、配额、可用性做实时调度。
|
||||||
|
3. 治理与合规:多租户权限、预算控制、审计留痕、策略管控。
|
||||||
|
4. 成本可控:预扣+结算+退款的可对账计费链路,支持 FinOps 分析。
|
||||||
|
|
||||||
|
## 1.2 明确不做(v1)
|
||||||
|
|
||||||
|
1. 不做模型训练和微调托管平台。
|
||||||
|
2. 不做复杂 Agent 编排引擎(只做网关层能力)。
|
||||||
|
3. 不做全行业模板市场(先聚焦平台能力和企业接入)。
|
||||||
|
|
||||||
|
## 2. 目标客户与核心场景
|
||||||
|
|
||||||
|
## 2.1 目标客户
|
||||||
|
|
||||||
|
1. 有多模型调用需求的 B 端 SaaS 厂商。
|
||||||
|
2. 有合规与审计要求的中大型企业内部平台团队。
|
||||||
|
3. 需要统一计费与预算管理的 AI 应用开发团队。
|
||||||
|
|
||||||
|
## 2.2 核心场景
|
||||||
|
|
||||||
|
1. 多供应商故障切换:主模型失败自动回退,保障 SLA。
|
||||||
|
2. 成本优化路由:在质量阈值内优先低成本模型。
|
||||||
|
3. 多租户预算治理:团队/项目/API Key 级预算、限流、告警。
|
||||||
|
4. 运营可视化:按租户、模型、供应商查看成本与成功率。
|
||||||
|
|
||||||
|
## 3. 非功能性指标(必须)
|
||||||
|
|
||||||
|
1. 可用性:核心转发 API 月可用性 >= 99.95%。
|
||||||
|
2. 时延:网关额外开销 P95 <= 60ms(不含上游模型推理时间)。
|
||||||
|
3. 并发:单集群稳态支持 5k 并发请求(可横向扩展)。
|
||||||
|
4. 数据一致性:计费账实差异率 <= 0.1%。
|
||||||
|
5. 可追踪性:100% 请求可关联 request_id 与审计日志。
|
||||||
|
|
||||||
|
## 4. 总体架构
|
||||||
|
|
||||||
|
采用“控制面(Control Plane)+ 数据面(Data Plane)”双平面架构。
|
||||||
|
|
||||||
|
```mermaid
|
||||||
|
flowchart LR
|
||||||
|
A[Client SDK/Apps] --> B[API Gateway Ingress]
|
||||||
|
B --> C[Data Plane Router]
|
||||||
|
C --> D1[Provider Adapter OpenAI]
|
||||||
|
C --> D2[Provider Adapter Anthropic]
|
||||||
|
C --> D3[Provider Adapter Gemini]
|
||||||
|
D1 --> E[Upstream Providers]
|
||||||
|
D2 --> E
|
||||||
|
D3 --> E
|
||||||
|
|
||||||
|
F[Control Plane API] --> G[Policy Engine]
|
||||||
|
F --> H[Billing Engine]
|
||||||
|
F --> I[Tenant/Auth Service]
|
||||||
|
F --> J[Admin Console]
|
||||||
|
|
||||||
|
C --> K[(Redis)]
|
||||||
|
C --> L[(PostgreSQL)]
|
||||||
|
C --> M[(Message Queue)]
|
||||||
|
C --> N[(Metrics/Logs/Traces)]
|
||||||
|
|
||||||
|
H --> L
|
||||||
|
G --> L
|
||||||
|
I --> L
|
||||||
|
```
|
||||||
|
|
||||||
|
## 4.1 数据面职责
|
||||||
|
|
||||||
|
1. 北向协议解析与标准化。
|
||||||
|
2. 路由决策与重试/回退。
|
||||||
|
3. 上游适配、流式转发、超时与熔断控制。
|
||||||
|
4. 请求级计费事件写入与异步落库。
|
||||||
|
|
||||||
|
## 4.2 控制面职责
|
||||||
|
|
||||||
|
1. 租户、团队、Key、角色权限管理。
|
||||||
|
2. 策略配置(路由、限流、模型白名单、预算)。
|
||||||
|
3. 账单与结算、对账、报表。
|
||||||
|
4. 运维后台(告警、审计、配置发布)。
|
||||||
|
|
||||||
|
## 5. 核心模块设计
|
||||||
|
|
||||||
|
## 5.1 协议接入层(Northbound API)
|
||||||
|
|
||||||
|
v1 支持:
|
||||||
|
|
||||||
|
1. OpenAI 兼容:`/v1/chat/completions`、`/v1/embeddings`、`/v1/models`。
|
||||||
|
2. Anthropic 兼容:`/v1/messages`。
|
||||||
|
3. Gemini 兼容:`/v1beta/models/*`(优先核心生成路径)。
|
||||||
|
4. Realtime:先灰度,仅在 OpenAI 兼容路径上线。
|
||||||
|
|
||||||
|
设计要求:
|
||||||
|
|
||||||
|
1. 每个请求必须注入全局 `request_id`。
|
||||||
|
2. 统一请求上下文结构(tenant_id/team_id/key_id/model_group)。
|
||||||
|
3. 统一错误码体系,向上兼容 OpenAI 风格。
|
||||||
|
|
||||||
|
## 5.2 路由引擎(Router Core)
|
||||||
|
|
||||||
|
路由采用“三阶段过滤 + 多因子打分”:
|
||||||
|
|
||||||
|
1. 硬过滤:模型支持、租户策略、预算可用、健康状态。
|
||||||
|
2. 软排序:分值 = `w1*cost + w2*latency + w3*success_rate + w4*load + w5*quality`。
|
||||||
|
3. 选择策略:TopK + 加权随机,避免单节点长期垄断。
|
||||||
|
|
||||||
|
失败处理链路:
|
||||||
|
|
||||||
|
1. 同供应商重试(指数退避,限定最大重试次数)。
|
||||||
|
2. 跨供应商回退(按策略优先级)。
|
||||||
|
3. 熔断器打开后跳过异常线路,半开探活恢复。
|
||||||
|
|
||||||
|
## 5.3 Provider Adapter 层
|
||||||
|
|
||||||
|
每个 Provider Adapter 对外统一接口:
|
||||||
|
|
||||||
|
1. `prepare_request()`
|
||||||
|
2. `send()`
|
||||||
|
3. `normalize_response()`
|
||||||
|
4. `normalize_error()`
|
||||||
|
5. `extract_usage()`
|
||||||
|
|
||||||
|
保证点:
|
||||||
|
|
||||||
|
1. 上游差异只在 Adapter 内部消化。
|
||||||
|
2. 统一输出 usage(prompt/completion/total tokens、model、provider)。
|
||||||
|
|
||||||
|
## 5.4 计费与账务引擎(Billing Ledger)
|
||||||
|
|
||||||
|
采用“预扣-结算-退款”三段式:
|
||||||
|
|
||||||
|
1. 预扣:请求入站后按估算额度冻结。
|
||||||
|
2. 结算:响应完成按真实 usage 结算差额。
|
||||||
|
3. 退款:失败或中断按规则退还。
|
||||||
|
|
||||||
|
关键要求:
|
||||||
|
|
||||||
|
1. 所有账务事件必须带幂等键(`request_id + stage`)。
|
||||||
|
2. 账务流水不可变(append-only ledger)。
|
||||||
|
3. 支持异步补偿任务与对账任务。
|
||||||
|
|
||||||
|
## 5.5 租户权限与安全(Auth + Governance)
|
||||||
|
|
||||||
|
1. 多租户隔离:租户 > 团队 > Key > 用户。
|
||||||
|
2. RBAC:管理员、计费管理员、开发者、只读审计。
|
||||||
|
3. Key 策略:过期、IP 白名单、模型白名单、速率限制。
|
||||||
|
4. 数据安全:敏感字段加密存储,日志脱敏。
|
||||||
|
|
||||||
|
## 5.6 可观测与运维(Observability)
|
||||||
|
|
||||||
|
1. Metrics:QPS、成功率、P95/P99、错误码分布、每模型成本。
|
||||||
|
2. Logs:结构化日志,统一 request_id 贯穿。
|
||||||
|
3. Traces:跨服务链路追踪,定位路由与上游瓶颈。
|
||||||
|
4. 告警:错误率、延迟突增、预算超限、上游可用性下降。
|
||||||
|
|
||||||
|
## 6. 数据模型(v1 最小集)
|
||||||
|
|
||||||
|
建议存储:PostgreSQL + Redis + 对象存储(导出报表)
|
||||||
|
|
||||||
|
核心表:
|
||||||
|
|
||||||
|
1. `tenants`:租户基础信息、套餐、状态。
|
||||||
|
2. `teams`:团队与租户映射、预算策略。
|
||||||
|
3. `users`:账号主体、角色、状态。
|
||||||
|
4. `api_keys`:Key、权限、限流、过期策略。
|
||||||
|
5. `providers`:供应商元数据(OpenAI/Anthropic/Gemini...)。
|
||||||
|
6. `provider_accounts`:上游账号、凭证、并发、优先级、健康状态。
|
||||||
|
7. `model_catalog`:模型映射、上下文、价格快照、能力标签。
|
||||||
|
8. `routing_policies`:路由规则、回退链、权重配置。
|
||||||
|
9. `requests`:请求索引(request_id、租户、模型、状态、时延)。
|
||||||
|
10. `usage_events`:usage 明细(token、成本、provider、model)。
|
||||||
|
11. `billing_ledger`:账务流水(pre/settle/refund)。
|
||||||
|
12. `budgets`:租户/团队/Key 预算与周期窗口。
|
||||||
|
13. `audit_logs`:配置变更与敏感操作审计。
|
||||||
|
|
||||||
|
Redis 用途:
|
||||||
|
|
||||||
|
1. 热路径限流计数(RPM/TPM)。
|
||||||
|
2. 路由短期状态(负载、错误率、熔断状态)。
|
||||||
|
3. 幂等键与去重锁。
|
||||||
|
|
||||||
|
## 7. API 设计(对外与控制面)
|
||||||
|
|
||||||
|
## 7.1 数据面 API(对业务方)
|
||||||
|
|
||||||
|
1. `POST /v1/chat/completions`
|
||||||
|
2. `POST /v1/embeddings`
|
||||||
|
3. `GET /v1/models`
|
||||||
|
4. `POST /v1/messages`
|
||||||
|
5. `POST /v1beta/models/{model}:{action}`
|
||||||
|
|
||||||
|
统一响应头:
|
||||||
|
|
||||||
|
1. `x-request-id`
|
||||||
|
2. `x-provider`
|
||||||
|
3. `x-upstream-model`
|
||||||
|
4. `x-route-policy`
|
||||||
|
5. `x-billing-estimate`(可选)
|
||||||
|
|
||||||
|
## 7.2 控制面 API(后台与自动化)
|
||||||
|
|
||||||
|
1. 租户与团队:`/admin/tenants/*`、`/admin/teams/*`
|
||||||
|
2. Key 管理:`/admin/keys/*`
|
||||||
|
3. 路由策略:`/admin/routing/policies/*`
|
||||||
|
4. 预算策略:`/admin/budgets/*`
|
||||||
|
5. 账务查询:`/admin/billing/*`
|
||||||
|
6. 审计查询:`/admin/audits/*`
|
||||||
|
|
||||||
|
## 8. 技术栈建议(面向商用)
|
||||||
|
|
||||||
|
后端建议:
|
||||||
|
|
||||||
|
1. 语言:Go(数据面)+ Go/Java(控制面可同构)。
|
||||||
|
2. 框架:Gin/Fiber(数据面高性能)+ 标准化中间件。
|
||||||
|
3. 数据库:PostgreSQL(主存储)+ Redis(状态缓存)。
|
||||||
|
4. 队列:Kafka 或 NATS(账务与日志异步化)。
|
||||||
|
5. 可观测:Prometheus + Loki + Tempo 或 ELK + Jaeger。
|
||||||
|
|
||||||
|
部署建议:
|
||||||
|
|
||||||
|
1. K8s 部署,数据面无状态横向扩展。
|
||||||
|
2. 控制面与数据面独立扩缩容策略。
|
||||||
|
3. 灰度发布与回滚(Canary + Feature Flag)。
|
||||||
|
|
||||||
|
## 9. 90 天周级落地计划
|
||||||
|
|
||||||
|
## 阶段 1(W1-W4):可用 MVP
|
||||||
|
|
||||||
|
1. W1:项目骨架、统一上下文、请求链路与错误码标准化。
|
||||||
|
2. W2:OpenAI 兼容核心接口 + 两个上游 Adapter。
|
||||||
|
3. W3:基础路由(健康检查 + 重试 + 回退)+ Redis 限流。
|
||||||
|
4. W4:预扣/结算最小账务闭环 + 基础监控看板。
|
||||||
|
|
||||||
|
里程碑:
|
||||||
|
|
||||||
|
1. 单租户端到端可用。
|
||||||
|
2. 支持最小商用 PoC。
|
||||||
|
|
||||||
|
## 阶段 2(W5-W8):企业治理能力
|
||||||
|
|
||||||
|
1. W5:多租户、团队、Key 与 RBAC。
|
||||||
|
2. W6:预算策略(租户/团队/Key)与超限拦截。
|
||||||
|
3. W7:路由评分引擎 v1(cost/latency/success/load)。
|
||||||
|
4. W8:审计日志、告警规则、运营后台基础页。
|
||||||
|
|
||||||
|
里程碑:
|
||||||
|
|
||||||
|
1. 支持真实客户灰度。
|
||||||
|
2. 可输出成本归因报表。
|
||||||
|
|
||||||
|
## 阶段 3(W9-W12):稳定性与商业化增强
|
||||||
|
|
||||||
|
1. W9:熔断半开与自动恢复、故障演练体系。
|
||||||
|
2. W10:账务对账任务、补偿任务、异常工单闭环。
|
||||||
|
3. W11:控制面 API 对接企业内部系统(Webhook/报表导出)。
|
||||||
|
4. W12:性能压测、SLA 验收、上线手册与值班手册。
|
||||||
|
|
||||||
|
里程碑:
|
||||||
|
|
||||||
|
1. 首个企业客户可生产试运行。
|
||||||
|
2. 具备可销售的企业版能力说明。
|
||||||
|
|
||||||
|
## 10. 与开源竞品的“借鉴/自研”边界
|
||||||
|
|
||||||
|
建议原则:
|
||||||
|
|
||||||
|
1. 借鉴架构思路,不直接耦合高法律风险组件到核心路径。
|
||||||
|
2. 核心账务、租户、策略、审计建议自研,确保可控。
|
||||||
|
|
||||||
|
建议映射:
|
||||||
|
|
||||||
|
1. 路由策略设计:可借鉴 `litellm`。
|
||||||
|
2. 调度中台化与运行时反馈:可借鉴 `sub2api`。
|
||||||
|
3. 快速协议覆盖清单:可参考 `new-api` 路由组织。
|
||||||
|
4. 入门与兼容思路:可参考 `one-api`。
|
||||||
|
|
||||||
|
## 11. 风险清单与应对
|
||||||
|
|
||||||
|
1. 法律风险(License 不兼容)
|
||||||
|
应对:核心链路只使用 MIT/Apache 友好组件,法务前置审查。
|
||||||
|
|
||||||
|
2. 账务风险(多重试导致对账偏差)
|
||||||
|
应对:幂等账本 + 日对账 + 异常自动补偿。
|
||||||
|
|
||||||
|
3. 性能风险(策略过重影响网关时延)
|
||||||
|
应对:策略分层,热路径仅保留 O(1)/O(logN) 逻辑。
|
||||||
|
|
||||||
|
4. 运维风险(上游大面积故障)
|
||||||
|
应对:多供应商冗余 + 熔断 + 降级模板 + 演练机制。
|
||||||
|
|
||||||
|
## 12. 立项验收标准(Go/No-Go)
|
||||||
|
|
||||||
|
上线前必须全部满足:
|
||||||
|
|
||||||
|
1. 连续 7 天灰度环境可用性 >= 99.9%。
|
||||||
|
2. 账务差错率 <= 0.1%,且可追溯。
|
||||||
|
3. 单租户压测达到目标并发阈值。
|
||||||
|
4. 审计日志覆盖 100% 管理操作。
|
||||||
|
5. 至少 1 家试点客户完成可用性验收。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 13. 下一步执行建议(立即)
|
||||||
|
|
||||||
|
1. 一周内冻结 v1 领域模型与 API 契约(避免边做边漂移)。
|
||||||
|
2. 两周内完成 W1-W2 的可运行最小链路(先连通再优化)。
|
||||||
|
3. 并行建立“账务正确性测试集”和“故障演练脚本”,从第一天就纳入 CI。
|
||||||
305
docs/llm_gateway_subapi_evolution_plan_v2_2026-03-17.md
Normal file
305
docs/llm_gateway_subapi_evolution_plan_v2_2026-03-17.md
Normal file
@@ -0,0 +1,305 @@
|
|||||||
|
# 商用 LLM 通用转发网关演进方案(Subapi First)
|
||||||
|
|
||||||
|
- 版本:v2.0(续规划稿)
|
||||||
|
- 日期:2026-03-17
|
||||||
|
- 基线文档:
|
||||||
|
- `llm_gateway_prd_v0_2026-03-16.md`
|
||||||
|
- `llm_gateway_product_technical_blueprint_v1_2026-03-16.md`
|
||||||
|
- `sub2api_integration_readiness_checklist_2026-03-16.md`
|
||||||
|
- 文档目标:在既有 PRD/技术蓝图基础上,明确“先集成 subapi,再逐步替代客户端 subapi,最终形成企业级控制面”的演进路径与边界。
|
||||||
|
|
||||||
|
## 1. 本轮续规划目标
|
||||||
|
|
||||||
|
围绕你提出的方向,形成四个连续目标:
|
||||||
|
|
||||||
|
1. 第一阶段以 `subapi` 作为集成底座,快速形成可售卖能力。
|
||||||
|
2. 逐步替代用户端 `subapi` 接入,统一到我方网关入口与控制面。
|
||||||
|
3. 后续增强“机器人客户(Bot Customer)”能力,支持自动化运营与客户成功流程。
|
||||||
|
4. 增加独立模块:
|
||||||
|
- LLM 供应商账号导入(BYOA/BYOK)
|
||||||
|
- 低成本 LLM 账号获取与治理(仅限合规来源)
|
||||||
|
- 合规与审计增强(ToS、隐私、风控)
|
||||||
|
|
||||||
|
## 2. 三种演进路径对比(先决策)
|
||||||
|
|
||||||
|
### 路径 A:`subapi` 外部服务模块化(推荐)
|
||||||
|
|
||||||
|
1. 做法:
|
||||||
|
- `subapi` 作为独立服务部署在数据面旁路。
|
||||||
|
- 我方网关只通过内部 API 调用 `subapi` 的能力。
|
||||||
|
- 核心账务、租户、权限、审计在我方控制面自研。
|
||||||
|
2. 优点:
|
||||||
|
- 集成最快,风险隔离最好,回滚简单。
|
||||||
|
- 可逐步替换,不会一次性重构核心链路。
|
||||||
|
3. 缺点:
|
||||||
|
- 双系统运维复杂度增加。
|
||||||
|
- 部分路由决策需跨服务,调优链路更长。
|
||||||
|
|
||||||
|
### 路径 B:反向代理链路串联(次选)
|
||||||
|
|
||||||
|
1. 做法:我方网关前置,`subapi` 作为后置中转层。
|
||||||
|
2. 优点:接入快,早期改造少。
|
||||||
|
3. 缺点:链路故障定位复杂,长期可维护性差。
|
||||||
|
|
||||||
|
### 路径 C:Fork 深度改造(谨慎)
|
||||||
|
|
||||||
|
1. 做法:维护私有 `subapi` 分支并深改为主系统。
|
||||||
|
2. 优点:短期控制力强。
|
||||||
|
3. 缺点:长期升级成本高,版本漂移和维护负担重。
|
||||||
|
|
||||||
|
结论:推荐路径 A,保持“快上线 + 可替换 + 可审计”的平衡。
|
||||||
|
|
||||||
|
## 3. 目标架构(控制面优先,数据面渐进替换)
|
||||||
|
|
||||||
|
## 3.1 架构原则
|
||||||
|
|
||||||
|
1. 控制面主权:租户、计费、权限、审计、合规全部沉淀在我方。
|
||||||
|
2. 数据面兼容:北向接口保持 OpenAI/Anthropic/Gemini 兼容,不强推客户改协议。
|
||||||
|
3. 双轨演进:短期借力 `subapi`,中期逐步把关键能力迁入自研 Router Core。
|
||||||
|
4. 风险前置:把合规和 ToS 校验纳入请求前置策略,而非事后补救。
|
||||||
|
|
||||||
|
## 3.2 分层模块
|
||||||
|
|
||||||
|
1. Ingress & Compatibility Layer
|
||||||
|
- 统一 `/v1/*` 入口
|
||||||
|
- 客户端兼容适配(历史 subapi 客户端参数映射)
|
||||||
|
2. Routing Orchestration Layer
|
||||||
|
- Provider Registry
|
||||||
|
- Policy Engine(预算、白名单、限流、回退)
|
||||||
|
- `subapi connector`(阶段 1-2)
|
||||||
|
3. Provider Adapter Layer
|
||||||
|
- OpenAI / Anthropic / Gemini / OpenRouter / 其他国产模型
|
||||||
|
- 统一 usage 与错误标准化
|
||||||
|
4. Control Plane
|
||||||
|
- Tenant/RBAC
|
||||||
|
- Billing Ledger(pre-settle-refund)
|
||||||
|
- Audit & Compliance
|
||||||
|
- Provider Account Vault
|
||||||
|
5. Bot Customer Layer(后续)
|
||||||
|
- 客户成功机器人
|
||||||
|
- 运维机器人
|
||||||
|
- 成本优化机器人
|
||||||
|
|
||||||
|
## 4. 分阶段路线图(以 2026-03-17 为起点)
|
||||||
|
|
||||||
|
## 阶段 S1:Subapi 集成上线(2026-03-17 至 2026-05-15)
|
||||||
|
|
||||||
|
目标:在 8 周内构建“可售卖 MVP”,验证付费与稳定性。
|
||||||
|
|
||||||
|
1. 产品能力
|
||||||
|
- 统一入口 + 基础路由 + fallback
|
||||||
|
- 租户/团队/Key 管理
|
||||||
|
- 预算阈值告警 + 成本归因
|
||||||
|
- 账单导出
|
||||||
|
2. 技术动作
|
||||||
|
- 采用路径 A:`subapi` 外部服务模块化接入
|
||||||
|
- 建立 `subapi connector` 与 `adapter registry`
|
||||||
|
- 接入监控基线:错误率、P95、成本突增、熔断次数
|
||||||
|
3. Go/No-Go
|
||||||
|
- 灰度 7 天可用性 >= 99.9%
|
||||||
|
- 成本差错率 <= 0.1%
|
||||||
|
- 回滚演练通过(30 分钟内)
|
||||||
|
|
||||||
|
## 阶段 S2:替代用户端 subapi(2026-05-16 至 2026-08-15)
|
||||||
|
|
||||||
|
目标:将客户“直接用 subapi”的模式迁到“统一用我方网关”。
|
||||||
|
|
||||||
|
1. 客户迁移机制
|
||||||
|
- SDK/配置迁移向导(base URL、key、模型映射)
|
||||||
|
- 兼容层保留旧参数与错误码映射
|
||||||
|
- 按租户分批迁移(10%/30%/60%/100%)
|
||||||
|
2. 技术动作
|
||||||
|
- 自研 Router Core 接管 >= 60% 主路径请求(全供应商口径)
|
||||||
|
- 国内 LLM 供应商主路径请求由自研 Router Core 接管率 = 100%
|
||||||
|
- `subapi` 仅保留长尾协议或备用回退
|
||||||
|
- 完成 Provider 扩容到至少 6 家可用供应商
|
||||||
|
3. 商业目标
|
||||||
|
- 新签客户默认走我方入口,不再直接依赖客户侧 subapi
|
||||||
|
- 迁移客户 30 天留存与成功率不低于迁移前
|
||||||
|
|
||||||
|
## 阶段 S3:机器人客户能力(2026-08-16 至 2026-10-31)
|
||||||
|
|
||||||
|
目标:把“人肉运维与客户成功”升级为“机器人辅助闭环”。
|
||||||
|
|
||||||
|
1. Bot Customer 能力包
|
||||||
|
- 成本优化机器人:给出模型替换建议与预算调优建议
|
||||||
|
- 运维机器人:异常解释、回滚建议、故障摘要
|
||||||
|
- 客户成功机器人:新客户接入引导、健康巡检、续费风险提示
|
||||||
|
2. 边界
|
||||||
|
- 机器人只提供建议和自动化执行草案,关键动作仍需审批
|
||||||
|
- 保留审计轨迹(谁触发、依据什么策略、最终执行结果)
|
||||||
|
3. 技术依赖
|
||||||
|
- 事件总线与规则引擎
|
||||||
|
- 对话上下文存储
|
||||||
|
- 可观测数据聚合与指标解释层
|
||||||
|
|
||||||
|
## 阶段 S4:供应商账号导入 + 低成本账号模块 + 合规增强(2026-11-01 至 2027-03-31)
|
||||||
|
|
||||||
|
目标:形成差异化护城河,但严格遵循合规边界。
|
||||||
|
|
||||||
|
1. 供应商账号导入(BYOA/BYOK)
|
||||||
|
- 支持客户导入 OpenAI/Anthropic/Gemini/其他账号凭证
|
||||||
|
- Vault 加密存储、轮换、最小权限与可撤销
|
||||||
|
- 账号健康状态、额度状态与风险状态统一可视化
|
||||||
|
2. 低成本账号模块(仅合规来源)
|
||||||
|
- 仅允许官方渠道或授权分销渠道
|
||||||
|
- 建立账号来源证明、合同/授权凭证、风控评分
|
||||||
|
- 禁止任何违反上游 ToS 的灰色接入方式
|
||||||
|
3. 合规增强
|
||||||
|
- ToS Policy Engine:按供应商、区域、模型动态拦截
|
||||||
|
- 数据与隐私策略:PII 脱敏、日志保留策略、跨境策略
|
||||||
|
- 审计报表:面向法务/安全/财务的统一合规报表
|
||||||
|
|
||||||
|
## 5. 核心模块设计补充(本轮新增)
|
||||||
|
|
||||||
|
## 5.1 Provider Account Vault
|
||||||
|
|
||||||
|
1. 凭证存储:KMS + 字段级加密 + 审计日志。
|
||||||
|
2. 生命周期:导入、校验、轮换、停用、吊销。
|
||||||
|
3. 访问控制:仅策略引擎短时解密,禁止控制台明文展示。
|
||||||
|
|
||||||
|
## 5.2 ToS & Compliance Engine
|
||||||
|
|
||||||
|
1. 规则维度:供应商条款、区域限制、模型限制、使用场景限制。
|
||||||
|
2. 执行位置:请求前置硬拦截 + 请求后审计与告警。
|
||||||
|
3. 输出:
|
||||||
|
- 合规判定结果
|
||||||
|
- 拦截原因码
|
||||||
|
- 审计证据链
|
||||||
|
|
||||||
|
## 5.3 Bot Customer Orchestrator
|
||||||
|
|
||||||
|
1. 事件输入:预算超阈值、错误率突增、客户健康分下降。
|
||||||
|
2. 决策输出:建议动作(降级模型、加权切换、限流、回滚)。
|
||||||
|
3. 安全机制:高风险动作必须人工审批。
|
||||||
|
|
||||||
|
## 6. 指标与验收(续规划版)
|
||||||
|
|
||||||
|
## 6.1 产品与商业指标
|
||||||
|
|
||||||
|
1. 从“客户侧 subapi”迁移到“我方网关入口”的迁移完成率。
|
||||||
|
2. 单客户月度受管成本(GMV)与毛利率。
|
||||||
|
3. 预算超限前告警命中率与执行闭环率。
|
||||||
|
|
||||||
|
## 6.2 技术与稳定性指标
|
||||||
|
|
||||||
|
1. 网关附加时延 P95(不含上游推理) <= 60ms。
|
||||||
|
2. 请求可追踪率(request_id 全链路覆盖) = 100%。
|
||||||
|
3. 账务差错率 <= 0.1%,并可追溯到请求级。
|
||||||
|
4. S2 结束时主路径接管率(全供应商) >= 60%。
|
||||||
|
5. S2 结束时国内 LLM 供应商主路径接管率 = 100%。
|
||||||
|
|
||||||
|
## 6.3 合规与风控指标
|
||||||
|
|
||||||
|
1. 高风险策略误放行率接近 0(以红线规则为硬约束)。
|
||||||
|
2. 供应商 ToS 规则覆盖率(已接入供应商) = 100%。
|
||||||
|
3. 关键管理操作审计覆盖率 = 100%。
|
||||||
|
|
||||||
|
## 7. 风险与应对(针对本轮新增能力)
|
||||||
|
|
||||||
|
1. 低成本账号模块的法律风险
|
||||||
|
- 应对:只做合规来源,法务前置审批,建立可审计证据链。
|
||||||
|
2. 机器人误触发导致运营事故
|
||||||
|
- 应对:高风险动作审批制 + 演练 + 回滚预案。
|
||||||
|
3. 迁移期间双轨链路复杂度上升
|
||||||
|
- 应对:按租户灰度、双写观测、阶段性去耦目标明确。
|
||||||
|
4. `subapi` 快速迭代导致兼容性回归
|
||||||
|
- 应对:版本锁定 + 契约测试 + 周级升级窗口 + 自动回滚。
|
||||||
|
|
||||||
|
## 8. 本周可执行动作(2026-03-17 当周)
|
||||||
|
|
||||||
|
1. 冻结 S1 范围和不做清单(避免范围膨胀)。
|
||||||
|
2. 明确 `subapi connector` 契约:
|
||||||
|
- 请求标准化字段
|
||||||
|
- usage 对账字段
|
||||||
|
- 错误码映射表
|
||||||
|
- 契约文档:`subapi_connector_contract_v1_2026-03-17.md`
|
||||||
|
3. 建立迁移基线文档:
|
||||||
|
- 客户侧 subapi -> 我方网关 的迁移手册 v0
|
||||||
|
4. 和法务定义 S4 的“低成本账号模块”红线条款模板。
|
||||||
|
|
||||||
|
## 9. 已拍板决策(2026-03-17)
|
||||||
|
|
||||||
|
1. 采用路径 A:`subapi` 外部服务模块化。
|
||||||
|
2. S2 结束时:自研 Router Core 主路径接管率(全供应商) >= 60%。
|
||||||
|
3. S2 结束时:国内 LLM 供应商主路径接管率 = 100%。
|
||||||
|
|
||||||
|
## 10. 待拍板的 4 个决策
|
||||||
|
|
||||||
|
1. S3 机器人能力优先顺序(成本优化优先 or 运维优先)。
|
||||||
|
2. S4 是否把“低成本账号模块”作为独立付费包。
|
||||||
|
3. 合规策略默认是“严格拦截”还是“告警+人工复核”。
|
||||||
|
4. 迁移节奏是“行业试点优先”还是“全客户统一批次”。
|
||||||
|
|
||||||
|
## 11. `subapi` 快速迭代集成保障机制(新增)
|
||||||
|
|
||||||
|
1. 版本治理
|
||||||
|
- 生产环境固定 `subapi` 次版本(例如 `vX.Y.*` 锁定为 `vX.Y.Z`)。
|
||||||
|
- 仅在周级升级窗口内升级,不做临时线上直升。
|
||||||
|
2. 契约治理
|
||||||
|
- 建立 `subapi connector` 契约测试集(请求字段、错误码、usage、流式行为)。
|
||||||
|
- 每次升级必须通过“兼容矩阵”后才允许灰度。
|
||||||
|
3. 发布治理
|
||||||
|
- 灰度比例:5% -> 20% -> 50% -> 100%。
|
||||||
|
- 每阶段观察至少 2 小时关键指标(5xx、P95、token 成本偏差、fallback 比例)。
|
||||||
|
4. 回滚治理
|
||||||
|
- 保留上一稳定版本镜像和配置快照。
|
||||||
|
- 任一红线触发(错误率、延迟、成本偏差)立即自动回滚到上一稳定版本。
|
||||||
|
5. 变更治理
|
||||||
|
- 每周固定一次上游变更扫描(Release/Commit/Breaking Change)。
|
||||||
|
- 对潜在破坏性变更创建“影响单”,先评估再排期升级。
|
||||||
|
|
||||||
|
## 12. S2 执行基线文档(新增)
|
||||||
|
|
||||||
|
为落实你已确认的 S2 目标(全供应商 `>=60%`、国内供应商 `100%`),新增执行层文档:
|
||||||
|
|
||||||
|
- `router_core_takeover_execution_plan_v3_2026-03-17.md`
|
||||||
|
|
||||||
|
该文档补齐了三类执行要素:
|
||||||
|
|
||||||
|
1. 接管率统一口径与计算公式(避免统计歧义)
|
||||||
|
2. 模块拆分 + 迁移优先级 + 批次节奏
|
||||||
|
3. 可验收测试矩阵与阶段门槛(质量/账务/接管率)
|
||||||
|
|
||||||
|
## 13. S2 执行附件(新增)
|
||||||
|
|
||||||
|
在 S2 阶段,除执行基线文档外,补充两份落地附件:
|
||||||
|
|
||||||
|
1. `router_core_takeover_metrics_sql_dashboard_v1_2026-03-17.md`
|
||||||
|
- 定义接管率统计 SQL 与看板口径(overall/cn)。
|
||||||
|
2. `router_core_s2_acceptance_test_cases_v1_2026-03-17.md`
|
||||||
|
- 按模块展开验收测试清单与分波次门禁。
|
||||||
|
|
||||||
|
## 14. 兼容性、安全与运维可靠性设计附件(新增)
|
||||||
|
|
||||||
|
为回应“subapi 快速迭代 + 商用集成”的实施风险,补充设计文档:
|
||||||
|
|
||||||
|
1. `subapi_integration_compat_security_reliability_design_v1_2026-03-17.md`
|
||||||
|
- 定义兼容闸门、风险分级、强制配置硬化基线与运维可靠性机制。
|
||||||
|
|
||||||
|
## 15. 风险控制执行任务单(新增)
|
||||||
|
|
||||||
|
为避免“设计有了但执行失焦”,补充两周执行任务单:
|
||||||
|
|
||||||
|
1. `subapi_integration_risk_controls_execution_tasks_v1_2026-03-17.md`
|
||||||
|
- 以绝对日期管理实施节奏(2026-03-18 至 2026-03-31)
|
||||||
|
- 绑定责任角色、验收标准和证据产物
|
||||||
|
- 纳入 daily/weekly gate 与回滚演练要求
|
||||||
|
|
||||||
|
## 16. 专家审核与博弈机制(新增)
|
||||||
|
|
||||||
|
为避免实施过程出现“团队内共识偏差”,补充独立专家审核机制:
|
||||||
|
|
||||||
|
1. `subapi_expert_review_wargame_plan_v1_2026-03-17.md`
|
||||||
|
- 明确专家构成与独立性约束(含安全/合规一票否决)
|
||||||
|
- 通过对抗式评审验证集成可行性、替换路径可行性与商用可靠性
|
||||||
|
- 输出 GO / CONDITIONAL GO / NO-GO 结论与整改闭环
|
||||||
|
|
||||||
|
## 17. 三角色联合评审与优化(新增,2026-03-18)
|
||||||
|
|
||||||
|
为进一步降低实施期“用户体验断层、质量门禁失效、网关替换不可逆”风险,新增:
|
||||||
|
|
||||||
|
1. `subapi_role_based_review_wargame_optimization_v1_2026-03-18.md`
|
||||||
|
- 角色范围:用户代表、测试专家、网关专家
|
||||||
|
- 输出三角色 Red/Blue 博弈结论与条件放行门槛
|
||||||
|
- 绑定新增任务:`UXR-001/002`、`TST-001/002/003`、`GAT-001/002/003`、`EXP-007`
|
||||||
18
docs/llm_gateway_subapi_evolution_plan_v3_2026-03-18.md
Normal file
18
docs/llm_gateway_subapi_evolution_plan_v3_2026-03-18.md
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
# 商用 LLM 通用转发网关演进方案(Subapi First)
|
||||||
|
|
||||||
|
- 版本:v3.0(历史归档)
|
||||||
|
- 日期:2026-03-18
|
||||||
|
- 状态:已废弃(仅保留兼容引用)
|
||||||
|
|
||||||
|
## 说明
|
||||||
|
|
||||||
|
本文件已由 v4.1 基线版本替代,不再作为执行与验收依据。
|
||||||
|
|
||||||
|
请使用以下主基线文档:
|
||||||
|
|
||||||
|
- `llm_gateway_subapi_evolution_plan_v4_1_2026-03-18.md`
|
||||||
|
|
||||||
|
## 变更治理
|
||||||
|
|
||||||
|
1. 从 2026-03-18 起,阶段日期、S2 接管率口径、验收门禁以 v4.1 主基线为唯一事实源。
|
||||||
|
2. 本文件保留的唯一用途是兼容旧文档的历史链接。
|
||||||
600
docs/llm_gateway_subapi_evolution_plan_v4_1_2026-03-18.md
Normal file
600
docs/llm_gateway_subapi_evolution_plan_v4_1_2026-03-18.md
Normal file
@@ -0,0 +1,600 @@
|
|||||||
|
# 商用 LLM 通用转发网关演进方案(Subapi First)
|
||||||
|
|
||||||
|
- 版本:v4.1(基线收敛版)
|
||||||
|
- 日期:2026-03-18
|
||||||
|
- 基线文档:
|
||||||
|
- `llm_gateway_prd_v0_2026-03-16.md`
|
||||||
|
- `llm_gateway_product_technical_blueprint_v1_2026-03-16.md`
|
||||||
|
- `subapi_integration_readiness_checklist_2026-03-16.md` → **统一使用 "subapi"**
|
||||||
|
- `supply_side_product_design_v1_2026-03-18.md`
|
||||||
|
- `s2_staged_verification_mechanism_v1_2026-03-18.md`
|
||||||
|
- `tos_compliance_engine_design_v1_2026-03-18.md`
|
||||||
|
- 文档目标:在既有 PRD/技术蓝图基础上,明确"先集成 subapi,再逐步替代客户端 subapi,最终形成企业级控制面"的演进路径与边界,整合评审新增内容。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 0. 术语字典(v3.1新增)
|
||||||
|
|
||||||
|
| 术语 | 英文 | 定义 | 备注 |
|
||||||
|
|------|------|------|------|
|
||||||
|
| **subapi** | Subapi | 开源 LLM 转发项目,本方案的核心集成对象 | 统一使用 "subapi",不再使用 "sub2api" |
|
||||||
|
| **供应方** | Provider | 在平台挂载多余LLM配额的个人或企业 | 平台的用户角色 |
|
||||||
|
| **供应商** | Supplier | LLM 服务提供商(如 OpenAI、Anthropic、百度等) | 上游账号来源 |
|
||||||
|
| **采购折扣系数** | Procurement Discount | 供应方获得官方价格的折扣比例 | 本方案定义为 60% |
|
||||||
|
| **毛利率** | Gross Margin | 平台销售收入与采购成本的差额比例 | 目标 15-50% |
|
||||||
|
| **接管率** | Takeover Rate | 自研 Router Core 处理请求的比例 | S2目标60%(全供应商) |
|
||||||
|
| **国内供应商** | Domestic Provider | 国内 LLM 服务商(百度、讯飞、腾讯等) | S2目标100%接管 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 1. 本轮续规划目标
|
||||||
|
|
||||||
|
围绕你提出的方向,形成四个连续目标:
|
||||||
|
|
||||||
|
1. 第一阶段以 `subapi` 作为集成底座,快速形成可售卖能力。
|
||||||
|
2. 逐步替代用户端 `subapi` 接入,统一到我方网关入口与控制面。
|
||||||
|
3. 后续增强"机器人客户(Bot Customer)"能力,支持自动化运营与客户成功流程。
|
||||||
|
4. 增加独立模块:
|
||||||
|
- LLM 供应商账号导入(BYOA/BYOK)
|
||||||
|
- 低成本 LLM 账号获取与治理(仅限合规来源)
|
||||||
|
- 合规与审计增强(ToS、隐私、风控)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 2. 三种演进路径对比(先决策)
|
||||||
|
|
||||||
|
### 路径 A:`subapi` 外部服务模块化(推荐)
|
||||||
|
|
||||||
|
1. 做法:
|
||||||
|
- `subapi` 作为独立服务部署在数据面旁路。
|
||||||
|
- 我方网关只通过内部 API 调用 `subapi` 的能力。
|
||||||
|
- 核心账务、租户、权限、审计在我方控制面自研。
|
||||||
|
2. 优点:
|
||||||
|
- 集成最快,风险隔离最好,回滚简单。
|
||||||
|
- 可逐步替换,不会一次性重构核心链路。
|
||||||
|
3. 缺点:
|
||||||
|
- 双系统运维复杂度增加。
|
||||||
|
- 部分路由决策需跨服务,调优链路更长。
|
||||||
|
|
||||||
|
### 路径 B:反向代理链路串联(次选)
|
||||||
|
|
||||||
|
1. 做法:我方网关前置,`subapi` 作为后置中转层。
|
||||||
|
2. 优点:接入快,早期改造少。
|
||||||
|
3. 缺点:链路故障定位复杂,长期可维护性差。
|
||||||
|
|
||||||
|
### 路径 C:Fork 深度改造(谨慎)
|
||||||
|
|
||||||
|
1. 做法:维护私有 `subapi` 分支并深改为主系统。
|
||||||
|
2. 优点:短期控制力强。
|
||||||
|
3. 缺点:长期升级成本高,版本漂移和维护负担重。
|
||||||
|
|
||||||
|
**结论:推荐路径 A,保持"快上线 + 可替换 + 可审计"的平衡。**
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 3. 目标架构(控制面优先,数据面渐进替换)
|
||||||
|
|
||||||
|
### 3.1 架构原则
|
||||||
|
|
||||||
|
1. 控制面主权:租户、计费、权限、审计、合规全部沉淀在我方。
|
||||||
|
2. 数据面兼容:北向接口保持 OpenAI/Anthropic/Gemini 兼容,不强推客户改协议。
|
||||||
|
3. 双轨演进:短期借力 `subapi`,中期逐步把关键能力迁入自研 Router Core。
|
||||||
|
4. 风险前置:把合规和 ToS 校验纳入请求前置策略,而非事后补救。
|
||||||
|
|
||||||
|
### 3.2 分层模块
|
||||||
|
|
||||||
|
1. Ingress & Compatibility Layer
|
||||||
|
- 统一 `/v1/*` 入口
|
||||||
|
- 客户端兼容适配(历史 subapi 客户端参数映射)
|
||||||
|
2. Routing Orchestration Layer
|
||||||
|
- Provider Registry
|
||||||
|
- Policy Engine(预算、白名单、限流、回退)
|
||||||
|
- `subapi connector`(阶段 1-2)
|
||||||
|
3. Provider Adapter Layer
|
||||||
|
- OpenAI / Anthropic / Gemini / OpenRouter / 其他国产模型
|
||||||
|
- 统一 usage 与错误标准化
|
||||||
|
4. Control Plane
|
||||||
|
- Tenant/RBAC
|
||||||
|
- Billing Ledger(pre-settle-refund)
|
||||||
|
- Audit & Compliance
|
||||||
|
- Provider Account Vault
|
||||||
|
5. Bot Customer Layer(后续)
|
||||||
|
- 客户成功机器人
|
||||||
|
- 运维机器人
|
||||||
|
- 成本优化机器人
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 4. 分阶段路线图(以 2026-03-18 为起点)
|
||||||
|
|
||||||
|
### 阶段 S0:供应侧产品设计准备(2026-03-18 至 2026-06-08,12周)
|
||||||
|
|
||||||
|
**目标**:验证"用户分享多余LLM套餐"独特场景的产品化可行性
|
||||||
|
|
||||||
|
> 本阶段为评审建议新增,聚焦平台的核心差异化场景
|
||||||
|
|
||||||
|
1. 供应侧 MVP 能力
|
||||||
|
- 供应方入驻与实名认证
|
||||||
|
- 套餐验证引擎 v1(L1+L2)
|
||||||
|
- 手动定价机制
|
||||||
|
- 基础赔付机制
|
||||||
|
2. 验收标准
|
||||||
|
- 引入首批 10 家供应方
|
||||||
|
- 套餐验证成功率 >= 90%
|
||||||
|
|
||||||
|
### 阶段 S1:Subapi 集成上线(2026-03-18 至 2026-05-15,与 S0 并行)
|
||||||
|
|
||||||
|
目标:在 8 周内构建"可售卖 MVP",验证付费与稳定性。
|
||||||
|
|
||||||
|
1. 产品能力
|
||||||
|
- 统一入口 + 基础路由 + fallback
|
||||||
|
- 租户/团队/Key 管理
|
||||||
|
- 预算阈值告警 + 成本归因
|
||||||
|
- 账单导出
|
||||||
|
2. 技术动作
|
||||||
|
- 采用路径 A:`subapi` 外部服务模块化接入
|
||||||
|
- 建立 `subapi connector` 与 `adapter registry`
|
||||||
|
- 接入监控基线:错误率、P95、成本突增、熔断次数
|
||||||
|
3. Go/No-Go
|
||||||
|
- 灰度 7 天可用性 >= 99.9%
|
||||||
|
- 成本差错率 <= 0.1%
|
||||||
|
- 回滚演练通过(30 分钟内)
|
||||||
|
|
||||||
|
### 阶段 S2:替代用户端 subapi(2026-05-16 至 2026-08-15)
|
||||||
|
|
||||||
|
**目标**:将客户"直接用 subapi"的模式迁到"统一用我方网关"。
|
||||||
|
|
||||||
|
> ⚠️ 根据评审建议,增加40%中间检查点,分阶段验证
|
||||||
|
|
||||||
|
1. 客户迁移机制
|
||||||
|
- SDK/配置迁移向导(base URL、key、模型映射)
|
||||||
|
- 兼容层保留旧参数与错误码映射
|
||||||
|
- 按租户分批迁移(10%/30%/40%/60%/100%)
|
||||||
|
|
||||||
|
2. 技术动作
|
||||||
|
- 自研 Router Core 接管 >= 60% 主路径请求(全供应商口径)
|
||||||
|
- **国内 LLM 供应商主路径请求由自研 Router Core 接管率 = 100%**
|
||||||
|
- `subapi` 仅保留长尾协议或备用回退
|
||||||
|
- 完成 Provider 扩容到至少 6 家可用供应商
|
||||||
|
|
||||||
|
3. 分阶段验证(评审建议新增)
|
||||||
|
|
||||||
|
| 阶段 | 时间 | 接管率 | 验收方式 |
|
||||||
|
|------|------|--------|----------|
|
||||||
|
| S2-A | W1-W4 | 10% | Gate A 通过 |
|
||||||
|
| S2-B | W5-W8 | 30% | Gate B 通过 |
|
||||||
|
| **S2-C1** | **W9-W10** | **40%** | **Gate C1 中间检查点** |
|
||||||
|
| S2-C2 | W11-W13 | 60% | Gate C2 通过 |
|
||||||
|
|
||||||
|
4. 商业目标
|
||||||
|
- 新签客户默认走我方入口,不再直接依赖客户侧 subapi
|
||||||
|
- 迁移客户 30 天留存与成功率不低于迁移前
|
||||||
|
|
||||||
|
### 阶段 S3:机器人客户能力(2026-08-16 至 2026-10-31)
|
||||||
|
|
||||||
|
**目标**:把"人肉运维与客户成功"升级为"机器人辅助闭环"。
|
||||||
|
|
||||||
|
> ⚠️ 根据评审建议,机器人能力优先"运维优先",再扩展到成本优化
|
||||||
|
|
||||||
|
1. Bot Customer 能力包(优先级调整)
|
||||||
|
- **运维机器人(最高优先级)**:异常解释、回滚建议、故障摘要
|
||||||
|
- **成本优化机器人**:给出模型替换建议与预算调优建议
|
||||||
|
- **客户成功机器人**:新客户接入引导、健康巡检、续费风险提示
|
||||||
|
2. 边界
|
||||||
|
- 机器人只提供建议和自动化执行草案,关键动作仍需审批
|
||||||
|
- 保留审计轨迹(谁触发、依据什么策略、最终执行结果)
|
||||||
|
3. 技术依赖
|
||||||
|
- 事件总线与规则引擎
|
||||||
|
- 对话上下文存储
|
||||||
|
- 可观测数据聚合与指标解释层
|
||||||
|
|
||||||
|
### 阶段 S4:供应商账号导入 + 低成本账号模块 + 合规增强(2026-11-01 至 2027-03-31)
|
||||||
|
|
||||||
|
**目标**:形成差异化护城河,但严格遵循合规边界。
|
||||||
|
|
||||||
|
> ⚠️ 根据评审建议,合规策略默认采用"告警+人工复核"模式
|
||||||
|
|
||||||
|
1. 供应商账号导入(BYOA/BYOK)
|
||||||
|
- 支持客户导入 OpenAI/Anthropic/Gemini/其他账号凭证
|
||||||
|
- Vault 加密存储、轮换、最小权限与可撤销
|
||||||
|
- 账号健康状态、额度状态与风险状态统一可视化
|
||||||
|
2. 低成本账号模块(仅合规来源)
|
||||||
|
- 仅允许官方渠道或授权分销渠道
|
||||||
|
- 建立账号来源证明、合同/授权凭证、风控评分
|
||||||
|
- **禁止任何违反上游 ToS 的灰色接入方式**
|
||||||
|
3. 合规增强(评审建议新增详细设计)
|
||||||
|
- ToS Policy Engine:按供应商、区域、模型动态拦截
|
||||||
|
- 合规执行模式:**告警+人工复核**(默认)
|
||||||
|
- 数据与隐私策略:PII 脱敏、日志保留策略、跨境策略
|
||||||
|
- 审计报表:面向法务/安全/财务的统一合规报表
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 5. 供应侧产品设计(评审建议新增章节)
|
||||||
|
|
||||||
|
> 本章节详细设计"用户分享多余LLM套餐"这一核心独特场景
|
||||||
|
|
||||||
|
### 5.1 供应侧业务模型
|
||||||
|
|
||||||
|
| 角色 | 定义 | 核心诉求 |
|
||||||
|
|------|------|---------|
|
||||||
|
| **供应方(Provider)** | 拥有多余LLM配额的个人或企业 | 将闲置配额变现,回笼资金 |
|
||||||
|
| **平台(Platform)** | 统一网关平台 | 汇集供应方资源,提供稳定服务,赚取差价 |
|
||||||
|
| **需求方(Consumer)** | 需要LLM调用能力的企业/开发者 | 以优惠价格获取LLM服务,无需自建账号 |
|
||||||
|
|
||||||
|
### 5.2 统购统销定价模型
|
||||||
|
|
||||||
|
| 定价层级 | 定价方式 | 参数 |
|
||||||
|
|----------|----------|------|
|
||||||
|
| **采购价(P0)** | 平台统一定价收购 | **采购折扣系数 = 60%**(供应方获得官方价格60%) |
|
||||||
|
| **出售价(P1)** | 平台加价出售 | **毛利率目标 = 15-50%** |
|
||||||
|
|
||||||
|
### 5.3 套餐有效性验证机制
|
||||||
|
|
||||||
|
| 验证层级 | 验证内容 | 验证方式 |
|
||||||
|
|----------|----------|----------|
|
||||||
|
| **L1 基础验证** | API Key格式、供应商连通性 | 自动调用供应商API检查有效性 |
|
||||||
|
| **L2 额度验证** | 剩余配额、账户状态 | 调用供应商账户API获取额度信息 |
|
||||||
|
| **L3 行为验证** | 账户历史行为、风险评分 | 平台风控模型评估 |
|
||||||
|
| **L4 持续监控** | 配额消耗异常、账户异常 | 实时监控+告警 |
|
||||||
|
|
||||||
|
### 5.4 风险控制
|
||||||
|
|
||||||
|
| 风险类型 | 防控措施 |
|
||||||
|
|----------|----------|
|
||||||
|
| 套餐有效性风险 | 实时监控+提前告警,自动切换备用套餐 |
|
||||||
|
| 滥用风险(薅羊毛) | 供应方需缴纳保证金(个人500/企业5000),实名认证 |
|
||||||
|
| ToS 合规风险 | ToS 合规引擎(详见第8章) |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 6. 核心模块设计补充
|
||||||
|
|
||||||
|
### 6.1 Provider Account Vault
|
||||||
|
|
||||||
|
1. 凭证存储:KMS + 字段级加密 + 审计日志。
|
||||||
|
2. 生命周期:导入、校验、轮换、停用、吊销。
|
||||||
|
3. 访问控制:仅策略引擎短时解密,禁止控制台明文展示。
|
||||||
|
|
||||||
|
### 6.2 ToS & Compliance Engine
|
||||||
|
|
||||||
|
1. 规则维度:供应商条款、区域限制、模型限制、使用场景限制。
|
||||||
|
2. 执行位置:请求前置硬拦截 + 请求后审计与告警。
|
||||||
|
3. 执行模式:**告警+人工复核**(默认),红线规则严格拦截。
|
||||||
|
4. 输出:
|
||||||
|
- 合规判定结果
|
||||||
|
- 拦截原因码
|
||||||
|
- 审计证据链
|
||||||
|
|
||||||
|
### 6.3 Bot Customer Orchestrator
|
||||||
|
|
||||||
|
1. 事件输入:预算超阈值、错误率突增、客户健康分下降。
|
||||||
|
2. 决策输出:建议动作(降级模型、加权切换、限流、回滚)。
|
||||||
|
3. 安全机制:高风险动作必须人工审批。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 7. 指标与验收
|
||||||
|
|
||||||
|
> 唯一门禁口径来源:`acceptance_gate_single_source_v1_2026-03-18.md`。
|
||||||
|
> 若与其他文档阈值冲突,以该门禁表为准。
|
||||||
|
|
||||||
|
### 7.1 产品与商业指标
|
||||||
|
|
||||||
|
1. 从"客户侧 subapi"迁移到"我方网关入口"的迁移完成率。
|
||||||
|
2. 单客户月度受管成本(GMV)与毛利率:**15-50%**。
|
||||||
|
3. 预算超限前告警命中率与执行闭环率。
|
||||||
|
4. 供应侧指标:
|
||||||
|
- S0 结束时:供应方数量 >= 10
|
||||||
|
- S1 结束时:套餐验证成功率 >= 90%
|
||||||
|
|
||||||
|
### 7.2 技术与稳定性指标
|
||||||
|
|
||||||
|
1. 网关附加时延 P95(不含上游推理) <= 60ms。
|
||||||
|
2. 请求可追踪率(request_id 全链路覆盖) = 100%。
|
||||||
|
3. 账务差错率 <= 0.1%,并可追溯到请求级。
|
||||||
|
4. S2 结束时主路径接管率(全供应商) >= 60%。
|
||||||
|
5. **S2 结束时国内 LLM 供应商主路径接管率 = 100%**(硬性目标)
|
||||||
|
|
||||||
|
### 7.3 合规与风控指标
|
||||||
|
|
||||||
|
1. 高风险策略误放行率接近 0(以红线规则为硬约束)。
|
||||||
|
2. 供应商 ToS 规则覆盖率(已接入供应商) = 100%。
|
||||||
|
3. 关键管理操作审计覆盖率 = 100%。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 8. ToS 合规引擎设计(评审建议新增章节)
|
||||||
|
|
||||||
|
### 8.1 合规规则体系
|
||||||
|
|
||||||
|
```
|
||||||
|
ToS 规则体系
|
||||||
|
│
|
||||||
|
├── 🔴 红线规则(Red Line)- 严格拦截
|
||||||
|
│ ├── 账号共享禁令
|
||||||
|
│ ├── 转售禁令
|
||||||
|
│ ├── 商业用途限制
|
||||||
|
│ └── 地区访问限制
|
||||||
|
│
|
||||||
|
├── 🟡 黄线规则(Yellow Line)- 告警+人工复核
|
||||||
|
│ ├── 使用量异常
|
||||||
|
│ ├── 调用模式异常
|
||||||
|
│ ├── 新型使用场景
|
||||||
|
│ └── 未明确允许的用途
|
||||||
|
│
|
||||||
|
└── 🟢 绿线规则(Green Line)- 通过
|
||||||
|
```
|
||||||
|
|
||||||
|
### 8.2 执行模式
|
||||||
|
|
||||||
|
| 阶段 | 执行模式 | 说明 |
|
||||||
|
|------|----------|------|
|
||||||
|
| S1 | 告警+人工复核 | 积累经验,完善规则 |
|
||||||
|
| S2 | 告警+人工复核 | 持续优化 |
|
||||||
|
| S3 | 逐步切换 | 黄线告警+复核,红线拦截 |
|
||||||
|
| S4 | 分类执行 | **红线拦截,黄线复核,绿线放行** |
|
||||||
|
|
||||||
|
### 8.3 供应商合规矩阵
|
||||||
|
|
||||||
|
```
|
||||||
|
供应商 │ 账号共享 │ 转售 │ 代理 │ 地区限制
|
||||||
|
────────────────┼──────────┼──────┼──────┼─────────
|
||||||
|
OpenAI │ 🔴 │ 🔴 │ 🟡 │ 🔴
|
||||||
|
Anthropic │ 🔴 │ 🔴 │ 🔴 │ 🔴
|
||||||
|
Gemini │ 🔴 │ 🔴 │ 🟡 │ 🔴
|
||||||
|
Azure OpenAI │ 🔴 │ 🟢 │ 🟢 │ 🟢
|
||||||
|
国内供应商 │ 🟡 │ 🟡 │ 🟡 │ 🔴
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 9. 风险与应对
|
||||||
|
|
||||||
|
1. **低成本账号模块的法律风险**
|
||||||
|
- 应对:只做合规来源,法务前置审批,建立可审计证据链。
|
||||||
|
|
||||||
|
2. **机器人误触发导致运营事故**
|
||||||
|
- 应对:高风险动作审批制 + 演练 + 回滚预案。
|
||||||
|
|
||||||
|
3. **迁移期间双轨链路复杂度上升**
|
||||||
|
- 应对:按租户灰度、双写观测、阶段性去耦目标明确。
|
||||||
|
|
||||||
|
4. **`subapi` 快速迭代导致兼容性回归**
|
||||||
|
- 应对:版本锁定 + 契约测试 + 周级升级窗口 + 自动回滚。
|
||||||
|
|
||||||
|
5. **国内供应商100%接管风险**(评审新增)
|
||||||
|
- 应对:S2阶段分步骤验证,40%中间检查点,未达标可回滚
|
||||||
|
|
||||||
|
6. **Subapi API Key 安全漏洞(P0)**
|
||||||
|
- 问题:Subapi 的 API Key 只验证算法,不验证来源,不同部署的 Key 可互相串用
|
||||||
|
- 应对:我们的系统必须自建 Key 体系,Key 必须包含平台标识(如 `lgw-`),必须数据库验证
|
||||||
|
- 详见:`security_api_key_vulnerability_analysis_v1_2026-03-18.md`
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 10. 本周可执行动作(2026-03-18 当周)
|
||||||
|
|
||||||
|
1. 冻结 S0 范围和不做清单(避免范围膨胀)。
|
||||||
|
2. 明确供应侧产品MVP范围:
|
||||||
|
- 供应方入驻流程
|
||||||
|
- 套餐验证L1+L2
|
||||||
|
- 统购统销定价(采购60%,毛利15-50%)
|
||||||
|
3. 明确 `subapi connector` 契约。
|
||||||
|
4. 和法务定义 S4 的"低成本账号模块"红线条款模板。
|
||||||
|
5. 启动 S2 分阶段验证机制的技术设计。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 11. 已拍板决策
|
||||||
|
|
||||||
|
| 编号 | 决策项 | 决策结果 | 日期 |
|
||||||
|
|------|--------|----------|------|
|
||||||
|
| D0-1 | 演进路径 | 路径 A:subapi 外部服务模块化 | 2026-03-17 |
|
||||||
|
| D0-2 | S2 全供应商接管率 | >= 60% | 2026-03-17 |
|
||||||
|
| D0-3 | S2 国内供应商接管率 | = 100% | 2026-03-17 |
|
||||||
|
| D1 | 采购折扣系数 | **60%** | 2026-03-18 |
|
||||||
|
| D2 | 毛利率目标 | **15-50%** | 2026-03-18 |
|
||||||
|
| D3 | 合规策略默认模式 | **告警+人工复核** | 2026-03-18 |
|
||||||
|
| D4 | S3 机器人能力优先级 | **运维优先** | 2026-03-18 |
|
||||||
|
| D5 | S2 中间检查点 | **40%** | 2026-03-18 |
|
||||||
|
| D6 | S2 接管率过程预警区间 | **40-60%(仅过程预警,不作为终验口径)** | 2026-03-18 |
|
||||||
|
| D7 | S0 周期 | **12周** | 2026-03-18 |
|
||||||
|
| D8 | S2 周期 | **13周** | 2026-03-18 |
|
||||||
|
|
||||||
|
## 12. 待拍板的决策
|
||||||
|
|
||||||
|
1. ~~S3 机器人能力优先顺序~~ → **已拍板:运维优先**
|
||||||
|
2. ~~合规策略默认模式~~ → **已拍板:告警+人工复核**
|
||||||
|
3. ~~S4 低成本账号模块~~ → **已拍板:不作为独立付费包,并入Enterprise,属于阶段性供应链功能增强**
|
||||||
|
4. ~~迁移节奏~~ → **已拍板:优先用户迁移**
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 13. `subapi` 快速迭代集成保障机制
|
||||||
|
|
||||||
|
1. **版本治理**
|
||||||
|
- 生产环境固定 `subapi` 次版本(例如 `vX.Y.*` 锁定为 `vX.Y.Z`)。
|
||||||
|
- 仅在周级升级窗口内升级,不做临时线上直升。
|
||||||
|
|
||||||
|
2. **契约治理**
|
||||||
|
- 建立 `subapi connector` 契约测试集(请求字段、错误码、usage、流式行为)。
|
||||||
|
- 每次升级必须通过"兼容矩阵"后才允许灰度。
|
||||||
|
|
||||||
|
3. **发布治理**
|
||||||
|
- 灰度比例:5% -> 20% -> 50% -> 100%。
|
||||||
|
- 每阶段观察至少 2 小时关键指标(5xx、P95、token 成本偏差、fallback 比例)。
|
||||||
|
|
||||||
|
4. **回滚治理**
|
||||||
|
- 保留上一稳定版本镜像和配置快照。
|
||||||
|
- 任一红线触发(错误率、延迟、成本偏差)立即自动回滚到上一稳定版本。
|
||||||
|
|
||||||
|
5. **变更治理**
|
||||||
|
- 每周固定一次上游变更扫描(Release/Commit/Breaking Change)。
|
||||||
|
- 对潜在破坏性变更创建"影响单",先评估再排期升级。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 14. S2 分阶段验证机制(评审建议新增)
|
||||||
|
|
||||||
|
详见独立文档:`s2_staged_verification_mechanism_v1_2026-03-18.md`
|
||||||
|
|
||||||
|
核心要点:
|
||||||
|
- 三阶段推进:10% → 30% → 40% → 60%
|
||||||
|
- **40% 中间检查点**作为关键决策点
|
||||||
|
- 明确的 Gate 验收标准和红灯阈值
|
||||||
|
- 回滚机制和责任矩阵
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 15. 关联文档清单
|
||||||
|
|
||||||
|
| 文档 | 状态 | 说明 |
|
||||||
|
|------|------|------|
|
||||||
|
| `llm_gateway_prd_v0_2026-03-16.md` | 已有 | 产品需求文档 |
|
||||||
|
| `llm_gateway_product_technical_blueprint_v1_2026-03-16.md` | 已有 | 技术蓝图 |
|
||||||
|
| `supply_side_product_design_v1_2026-03-18.md` | **新增** | 供应侧产品设计 |
|
||||||
|
| `s2_staged_verification_mechanism_v1_2026-03-18.md` | **新增** | S2分阶段验证机制 |
|
||||||
|
| `tos_compliance_engine_design_v1_2026-03-18.md` | **新增** | ToS合规引擎设计 |
|
||||||
|
| `business_model_profitability_design_v1_2026-03-18.md` | **新增** | 商业模式与盈利能力设计 |
|
||||||
|
| `supply_feature_technical_analysis_v1_2026-03-18.md` | **新增** | Subapi技术能力分析 |
|
||||||
|
| `supply_detailed_design_v1_2026-03-18.md` | **新增** | 用户供应完整详细设计 |
|
||||||
|
| `security_api_key_vulnerability_analysis_v1_2026-03-18.md` | **新增** | API Key安全漏洞分析 |
|
||||||
|
| `resource_assessment_plan_v1_2026-03-18.md` | **新增** | 资源评估与补充方案 |
|
||||||
|
| `s2_takeover_buffer_strategy_v1_2026-03-18.md` | **新增** | S2接管率目标Buffer策略 |
|
||||||
|
| `acceptance_gate_single_source_v1_2026-03-18.md` | **新增** | 唯一验收门禁表(Single Source) |
|
||||||
|
| `test_plan_go_aligned_v1_2026-03-18.md` | **新增** | Go主测试链路对齐方案 |
|
||||||
|
| `technical_architecture_optimized_v2_2026-03-18.md` | **新增** | 优化技术架构(最小栈+触发式扩容) |
|
||||||
|
| `tos_legal_communication_plan_v1_2026-03-18.md` | **新增** | ToS合规法务前置沟通方案 |
|
||||||
|
| `security_solution_v1_2026-03-18.md` | **新增** | 安全解决方案(P0修复) |
|
||||||
|
| `architecture_solution_v1_2026-03-18.md` | **新增** | 架构解决方案(P0修复) |
|
||||||
|
| `api_solution_v1_2026-03-18.md` | **新增** | API设计解决方案(P0修复) |
|
||||||
|
| `business_solution_v1_2026-03-18.md` | **新增** | 业务解决方案(P0修复) |
|
||||||
|
| `p1_optimization_solution_v1_2026-03-18.md` | **新增** | P1优化问题解决方案 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 16. P0/P1问题修复汇总
|
||||||
|
|
||||||
|
### 16.1 P0问题修复状态
|
||||||
|
|
||||||
|
| 问题ID | 问题 | 解决方案 | 状态 |
|
||||||
|
|--------|------|----------|------|
|
||||||
|
| S-01 | 计费数据防篡改缺失 | 双重记账 + 审计日志 | ✅ |
|
||||||
|
| S-02 | 跨租户隔离不完善 | RLS + 强制验证 | ✅ |
|
||||||
|
| S-03 | 密钥轮换机制缺失 | 生命周期管理 | ✅ |
|
||||||
|
| A-01 | Router Core自研风险 | 增加40%中间检查点与分阶段止损(终验目标仍为>=60%) | ✅ |
|
||||||
|
| A-02 | subapi耦合风险 | Adapter抽象层 | ✅ |
|
||||||
|
| A-03 | 数据一致性风险 | 同步预扣+异步确认 | ✅ |
|
||||||
|
| API-01 | API版本管理缺失 | URL版本策略 | ✅ |
|
||||||
|
| API-02 | 错误码体系不完善 | 完整错误码设计 | ✅ |
|
||||||
|
| API-03 | SDK规划缺失 | Python/Node SDK | ✅ |
|
||||||
|
| B-01 | 资金池合规风险 | 资金托管+税务合规 | ✅ |
|
||||||
|
| B-02 | 计费精度风险 | Decimal精确计算 | ✅ |
|
||||||
|
| B-03 | 供应方结算风险 | 对账+保证金+阶梯 | ✅ |
|
||||||
|
|
||||||
|
### 16.2 P1问题修复状态
|
||||||
|
|
||||||
|
| 问题ID | 问题 | 解决方案 | 状态 |
|
||||||
|
|--------|------|----------|------|
|
||||||
|
| S-04 | ToS合规检测不完整 | 动态监控 | ✅ |
|
||||||
|
| S-05 | 激活码安全强度不足 | HMAC-SHA256 | ✅ |
|
||||||
|
| A-04 | 缺乏容量规划 | 基线测试+公式 | ✅ |
|
||||||
|
| A-05 | 故障隔离不完善 | 断路器+舱壁 | ✅ |
|
||||||
|
| A-06 | 可观测性不足 | SLI/SLO体系 | ✅ |
|
||||||
|
| API-04 | 限流设计不足 | 多维度限流 | ✅ |
|
||||||
|
| API-05 | 缺乏批量操作 | Batch API | ✅ |
|
||||||
|
| API-06 | Webhooks缺失 | Webhook机制 | ✅ |
|
||||||
|
| B-04 | 毛利率不稳定 | 动态定价引擎 | ✅ |
|
||||||
|
| B-05 | 风控覆盖不完整 | 需求方风控 | ✅ |
|
||||||
|
| B-06 | 定价模型不清晰 | 明确定价公式 | ✅ |
|
||||||
|
|
||||||
|
> 详见独立文档:
|
||||||
|
> - `supply_feature_technical_analysis_v1_2026-03-18.md` - 技术评估
|
||||||
|
> - `supply_side_product_design_v1_2026-03-18.md` - 产品设计
|
||||||
|
> - `supply_detailed_design_v1_2026-03-18.md` - 完整详细设计
|
||||||
|
|
||||||
|
### 16.1 Subapi 技术评估结论
|
||||||
|
|
||||||
|
| 维度 | 评估 |
|
||||||
|
|------|------|
|
||||||
|
| 技术了解深度 | **充分** - 已有详细 Connector 契约 |
|
||||||
|
| 供应商覆盖 | **10+ 供应商**,100+ 模型 |
|
||||||
|
| 集成可行性 | **可行** - 路径 A 风险可控 |
|
||||||
|
|
||||||
|
### 16.2 Subapi 供应商能力
|
||||||
|
|
||||||
|
| 类别 | 供应商 |
|
||||||
|
|------|--------|
|
||||||
|
| 海外主力 | OpenAI, Anthropic, Gemini, Antigravity, Sora, Bedrock |
|
||||||
|
| 国内支持 | 百度文心, 讯飞星火, 腾讯混元 |
|
||||||
|
|
||||||
|
### 16.3 Subapi 安全能力评估
|
||||||
|
|
||||||
|
| 能力 | 支持情况 | 说明 |
|
||||||
|
|------|----------|------|
|
||||||
|
| API Key 鉴权 | ✅ | 平台统一管理 Key |
|
||||||
|
| Token 级计费 | ✅ | 精确追踪 |
|
||||||
|
| 并发/速率限制 | ✅ | 可配置 |
|
||||||
|
| IP 白名单 | ⚠️ | 有限支持 |
|
||||||
|
| ToS 合规检测 | ❌ | 无专门检测 |
|
||||||
|
|
||||||
|
### 16.4 关键发现:用户供应功能
|
||||||
|
|
||||||
|
⚠️ **Subapi 不支持"用户分享LLM供应"功能**
|
||||||
|
|
||||||
|
- Subapi 模式:平台方统一管理上游账号 → 分发给用户使用
|
||||||
|
- 用户需求:用户可挂载自己账号 → 平台售卖 → 收益分成
|
||||||
|
|
||||||
|
**结论**:用户供应功能需平台层自研,不在 subapi 范围内
|
||||||
|
|
||||||
|
### 16.5 用户供应功能详细设计
|
||||||
|
|
||||||
|
#### 16.5.1 安全机制
|
||||||
|
|
||||||
|
| 模块 | 功能 |
|
||||||
|
|------|------|
|
||||||
|
| 账号挂载 | 格式校验 → 有效性验证 → 额度查询 → ToS检查 → 风险评估 → 加密存储 |
|
||||||
|
| 调用验证 | API Key → 套餐有效性 → 额度检查 → 风控 → ToS合规 |
|
||||||
|
| 防欺诈 | 额度异常/短时大量/新账号高额/跨地区/账号共享检测 |
|
||||||
|
| 保证金 | 个人¥500 / 企业¥5,000 |
|
||||||
|
|
||||||
|
#### 16.5.2 核心数据模型
|
||||||
|
|
||||||
|
| 表名 | 说明 |
|
||||||
|
|------|------|
|
||||||
|
| `supply_accounts` | 供应方账号表 |
|
||||||
|
| `supply_packages` | 供应套餐表 |
|
||||||
|
| `supply_orders` | 订单表 |
|
||||||
|
| `supply_usage_records` | 使用记录表(每次调用) |
|
||||||
|
| `supply_earnings` | 供应方收益表 |
|
||||||
|
| `supply_settlements` | 结算记录表 |
|
||||||
|
|
||||||
|
#### 16.5.3 调度策略
|
||||||
|
|
||||||
|
| 策略 | 说明 |
|
||||||
|
|------|------|
|
||||||
|
| 最低价格 | 选择售价最低的套餐 |
|
||||||
|
| 负载均衡 | 选择负载最低的套餐 |
|
||||||
|
| 轮询 | 依次选择各套餐 |
|
||||||
|
| 供应商偏好 | 优先特定供应商 |
|
||||||
|
|
||||||
|
### 16.6 用户供应功能阶段规划
|
||||||
|
|
||||||
|
| 阶段 | 时间 | 任务 |
|
||||||
|
|------|------|------|
|
||||||
|
| S0-a | W1-W2 | 账号挂载模块(挂载/验证/下架) |
|
||||||
|
| S0-b | W3-W4 | 套餐发布模块(上架/定价/展示) |
|
||||||
|
| S0-c | W5-W6 | 调度与计费模块(实时调度/扣减/账单) |
|
||||||
|
| S0-d | W7-W8 | 风控模块(健康监控/欺诈检测/合规) |
|
||||||
|
| S0-e | W9-W10 | 内部测试 |
|
||||||
|
| S0-f | W11-W12 | 首批供应方引入(10家) |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
**文档版本**:v4.1(整合评审建议 + 基线收敛版)
|
||||||
|
**下次评审**:S0 阶段结束(预计2026-06-08)
|
||||||
|
---
|
||||||
|
|
||||||
|
**文档状态**:v4.1(整合评审建议 + 基线收敛版)
|
||||||
|
**下次评审**:S0 阶段结束(预计2026-06-08)
|
||||||
138
docs/llm_gateway_subapi_evolution_plan_v4_2_2026-03-24.md
Normal file
138
docs/llm_gateway_subapi_evolution_plan_v4_2_2026-03-24.md
Normal file
@@ -0,0 +1,138 @@
|
|||||||
|
# 商用 LLM 通用转发网关演进方案(Subapi First)
|
||||||
|
|
||||||
|
- 版本:v4.2(凭证边界澄清版)
|
||||||
|
- 日期:2026-03-24
|
||||||
|
- 基线性质:SSOT(单一事实源)
|
||||||
|
- 适用阶段:S0-S4
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 0. 本版修订目的
|
||||||
|
|
||||||
|
本版仅做一件事:明确并冻结“用户分享 token 的边界”。
|
||||||
|
|
||||||
|
**本方案认可并采用以下业务链路:**
|
||||||
|
|
||||||
|
`用户A供给 -> 平台 -> 用户B购买服务`
|
||||||
|
|
||||||
|
但该链路必须满足以下前提:
|
||||||
|
|
||||||
|
1. 用户A分享对象是平台,不是用户B。
|
||||||
|
2. 用户B购买的是平台服务能力,不是用户A的上游账号凭证。
|
||||||
|
3. 平台承担统一鉴权、审计、计费、合规责任。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 1. 术语与边界(强制)
|
||||||
|
|
||||||
|
## 1.1 术语
|
||||||
|
|
||||||
|
| 术语 | 定义 |
|
||||||
|
|---|---|
|
||||||
|
| 供应方(用户A) | 向平台提供可调度配额的用户/企业 |
|
||||||
|
| 平台 | 统一网关与控制面,持有上游凭证托管能力 |
|
||||||
|
| 需求方(用户B) | 购买平台服务能力的用户/企业 |
|
||||||
|
| 上游凭证 | 供应商 API Key/OAuth Token 等原始凭证 |
|
||||||
|
| 平台凭证 | 平台签发给用户B的调用凭证(租户级/项目级) |
|
||||||
|
|
||||||
|
## 1.2 红线约束(MUST/MUST NOT)
|
||||||
|
|
||||||
|
1. `MUST`:上游凭证只能由平台密态托管(Vault/KMS),不得明文外发。
|
||||||
|
2. `MUST`:用户B只能使用平台凭证访问平台入口,不得直接调用供应方上游账号。
|
||||||
|
3. `MUST NOT`:任何 API/控制台/导出能力向用户B返回供应方上游凭证。
|
||||||
|
4. `MUST NOT`:日志、报表、告警中出现可复用的上游凭证片段。
|
||||||
|
5. `MUST`:平台在审计中可证明“每次请求均由平台凭证入站 + 平台侧代持上游凭证出站”。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 2. 架构口径(v4.2)
|
||||||
|
|
||||||
|
## 2.1 入口鉴权
|
||||||
|
|
||||||
|
1. 北向入口只接受平台凭证(平台签发)。
|
||||||
|
2. 上游凭证不作为北向接口参数,不作为用户可见字段。
|
||||||
|
3. query key 外部一律拒绝,内部仅允许受控改写并全量审计。
|
||||||
|
|
||||||
|
## 2.2 出站调用
|
||||||
|
|
||||||
|
1. 平台在 Provider Adapter 层按策略选择供给池账号。
|
||||||
|
2. 上游调用由平台运行时短时解密,调用完成即销毁明文态。
|
||||||
|
3. 供应方账号仅用于平台代调度,不形成“用户B直连上游”的能力。
|
||||||
|
|
||||||
|
## 2.3 计费与结算
|
||||||
|
|
||||||
|
1. 用户B账单只面向平台服务计费口径。
|
||||||
|
2. 供应方结算在平台内部完成,不向用户B暴露可复用凭证或上游账户标识。
|
||||||
|
3. 争议处理以平台 request_id 为唯一追踪主键。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 3. ToS 合规执行(v4.2 统一口径)
|
||||||
|
|
||||||
|
1. “禁止账号共享/转售”的供应商条款,必须映射为可执行规则,不允许文档口头豁免。
|
||||||
|
2. 供应商策略分层:
|
||||||
|
- 允许平台代理模式:可进入供给池。
|
||||||
|
- 仅允许 BYOK:仅支持客户自持账号,不进入供给池。
|
||||||
|
- 不允许代理/转售:默认禁入并阻断。
|
||||||
|
3. 未取得法务书面结论前,相关供应商默认按“禁入供给池”处理。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 4. 验收门禁补充(新增硬指标)
|
||||||
|
|
||||||
|
在原 `acceptance_gate_single_source_v1_2026-03-18.md` 基础上增加以下指标:
|
||||||
|
|
||||||
|
| 指标ID | 指标名 | 目标值 | 阻断阈值 | 说明 |
|
||||||
|
|---|---|---|---|---|
|
||||||
|
| M-013 | `supplier_credential_exposure_events` | 0 | >0 即 P0 | 供应方上游凭证泄露事件数 |
|
||||||
|
| M-014 | `platform_credential_ingress_coverage_pct` | 100% | <100% 即阻断 | 入站请求使用平台凭证覆盖率 |
|
||||||
|
| M-015 | `direct_supplier_call_by_consumer_events` | 0 | >0 即 P0 | 用户B绕过平台直连供应方事件 |
|
||||||
|
| M-016 | `query_key_external_reject_rate_pct` | 100% | <100% 即阻断 | 外部 query key 拒绝率 |
|
||||||
|
|
||||||
|
红线触发动作:
|
||||||
|
|
||||||
|
1. 立即停止升波。
|
||||||
|
2. 触发自动回切。
|
||||||
|
3. 24小时内提交根因与修复证据。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 5. P0/P1 整改清单(v4.2 收敛版)
|
||||||
|
|
||||||
|
## 5.1 P0(开工前必须完成)
|
||||||
|
|
||||||
|
| ID | 问题 | v4.2 处置 |
|
||||||
|
|---|---|---|
|
||||||
|
| P0-1 | 凭证边界表达歧义 | 通过第1章冻结“服务可售、凭证不外发”的硬约束 |
|
||||||
|
| P0-2 | ToS 红线与商业口径冲突 | 通过第3章引入供应商分层准入,未确认默认禁入 |
|
||||||
|
| P0-3 | 法务结论未闭环 | 未签署前禁止进入相关上线 Gate |
|
||||||
|
|
||||||
|
## 5.2 P1(两周内完成)
|
||||||
|
|
||||||
|
| ID | 问题 | v4.2 处置 |
|
||||||
|
|---|---|---|
|
||||||
|
| P1-1 | 折扣/毛利/S0周期多口径 | 统一以本文件 + 唯一门禁表为准 |
|
||||||
|
| P1-2 | PostgreSQL 与 MySQL 方言混用 | SQL 示例收敛为 PostgreSQL 可执行版本 |
|
||||||
|
| P1-3 | 安全标准并存(MD5/HMAC) | 统一 HMAC-SHA256,旧方案标注废弃 |
|
||||||
|
| P1-4 | 证据目录缺失 | 补齐 `tests/`、`evidence/` 与可复跑报告链路 |
|
||||||
|
|
||||||
|
参数冻结补充(用于消除 P1-1 多口径):
|
||||||
|
1. 采购折扣系数:`60%`。
|
||||||
|
2. 毛利率目标区间:`15%-50%`。
|
||||||
|
3. S0 周期:以 `acceptance_gate_single_source_v1_2026-03-18.md` 与执行任务单里程碑为准。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 6. 72小时落地计划
|
||||||
|
|
||||||
|
1. `T+24h`:完成全部文档中的凭证边界替换(删除“用户B获取供应方Key”措辞)。
|
||||||
|
2. `T+48h`:更新唯一门禁表并加入 M-013~M-016,接入 CI 阻断。
|
||||||
|
3. `T+72h`:提交法务签署状态、门禁演练报告、一次凭证泄露演练报告。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 7. 与 v4.1 关系
|
||||||
|
|
||||||
|
1. v4.1 作为历史基线保留。
|
||||||
|
2. 从本文件发布起,凡与“凭证边界”冲突的 v4.1 表述全部失效。
|
||||||
|
3. 后续实施、评审、验收均以 v4.2 为准。
|
||||||
144
docs/open_source_gateway_codebase_deep_dive_v1_2026-03-17.md
Normal file
144
docs/open_source_gateway_codebase_deep_dive_v1_2026-03-17.md
Normal file
@@ -0,0 +1,144 @@
|
|||||||
|
# 开源网关代码深度理解(v1)
|
||||||
|
|
||||||
|
- 版本:v1.0
|
||||||
|
- 日期:2026-03-17
|
||||||
|
- 范围:`sub2api-tar`、`new-api`、`one-api`、`litellm`
|
||||||
|
- 目标:为“Subapi First + 自研 Router Core 接管”提供源码级认知基线。
|
||||||
|
|
||||||
|
## 1. 目录与迁移确认
|
||||||
|
|
||||||
|
当前开源仓库已位于统一项目根:
|
||||||
|
|
||||||
|
- `/home/long/project/立交桥/llm-gateway-competitors`
|
||||||
|
|
||||||
|
核心仓库路径:
|
||||||
|
|
||||||
|
1. `/home/long/project/立交桥/llm-gateway-competitors/sub2api-tar`
|
||||||
|
2. `/home/long/project/立交桥/llm-gateway-competitors/new-api`
|
||||||
|
3. `/home/long/project/立交桥/llm-gateway-competitors/one-api`
|
||||||
|
4. `/home/long/project/立交桥/llm-gateway-competitors/litellm`
|
||||||
|
|
||||||
|
## 2. 法务与商用风险快照
|
||||||
|
|
||||||
|
1. `sub2api-tar`:MIT(可商用,仍需遵守上游 ToS)
|
||||||
|
2. `new-api`:AGPLv3(对闭源商用有强约束)
|
||||||
|
3. `one-api`:MIT
|
||||||
|
4. `litellm`:仓库主体 MIT,`enterprise/` 子目录有独立授权边界
|
||||||
|
|
||||||
|
结论:你的主线“商用闭源控制面”不应以 `new-api` 作为核心代码基座。
|
||||||
|
|
||||||
|
## 3. 架构入口(代码级)
|
||||||
|
|
||||||
|
## 3.1 sub2api-tar(推荐重点)
|
||||||
|
|
||||||
|
主入口与启动装配:
|
||||||
|
|
||||||
|
1. `/backend/cmd/server/main.go`(setup 模式 + 主服务启动)
|
||||||
|
2. `/backend/internal/server/routes/gateway.go`(协议路由汇总)
|
||||||
|
|
||||||
|
核心特点:
|
||||||
|
|
||||||
|
1. 明确的多协议入口:`/v1`、`/v1beta`、`responses/chat` 别名
|
||||||
|
2. 账号调度和 failover 逻辑深(适合借鉴路由中台能力)
|
||||||
|
3. 并发槽位、等待队列、流式错误处理比较完整
|
||||||
|
|
||||||
|
建议你优先深读:
|
||||||
|
|
||||||
|
1. `/backend/internal/handler/openai_gateway_handler.go`
|
||||||
|
2. `/backend/internal/handler/gateway_handler.go`
|
||||||
|
3. `/backend/internal/handler/gemini_v1beta_handler.go`
|
||||||
|
4. `/backend/internal/service/openai_gateway_service.go`
|
||||||
|
5. `/backend/internal/service/gateway_service.go`
|
||||||
|
|
||||||
|
## 3.2 new-api(功能广但法务受限)
|
||||||
|
|
||||||
|
主入口:
|
||||||
|
|
||||||
|
1. `/main.go`
|
||||||
|
2. `/router/relay-router.go`
|
||||||
|
3. `/controller/relay.go`
|
||||||
|
|
||||||
|
核心特点:
|
||||||
|
|
||||||
|
1. 协议覆盖广(OpenAI/Claude/Gemini/Responses/Realtime)
|
||||||
|
2. `middleware.Distribute()` + `controller.Relay()` 主干清晰
|
||||||
|
3. 计费预扣/结算逻辑完善,但 AGPL 影响商用策略
|
||||||
|
|
||||||
|
可借鉴点:
|
||||||
|
|
||||||
|
1. RelayFormat 分发模型
|
||||||
|
2. 路由组织和中间件链条设计
|
||||||
|
|
||||||
|
## 3.3 one-api(经典轻量基线)
|
||||||
|
|
||||||
|
主入口:
|
||||||
|
|
||||||
|
1. `/main.go`
|
||||||
|
2. `/router/relay.go`
|
||||||
|
3. `/middleware/distributor.go`
|
||||||
|
|
||||||
|
核心特点:
|
||||||
|
|
||||||
|
1. 架构简单,易读
|
||||||
|
2. 渠道选择以“可用通道 + 随机”为主,策略深度有限
|
||||||
|
3. 适合作为最小网关实现参考,不适合直接承载企业治理能力
|
||||||
|
|
||||||
|
## 3.4 litellm(生态和工程成熟度最高)
|
||||||
|
|
||||||
|
架构说明文件:
|
||||||
|
|
||||||
|
1. `/ARCHITECTURE.md`
|
||||||
|
|
||||||
|
关键目录:
|
||||||
|
|
||||||
|
1. `/litellm`(SDK 核心)
|
||||||
|
2. `/proxy`(AI Gateway)
|
||||||
|
3. `/router.py` + `/router_strategy`(路由策略)
|
||||||
|
4. `/tests`(测试资产非常完整)
|
||||||
|
|
||||||
|
核心特点:
|
||||||
|
|
||||||
|
1. SDK 与 Gateway 分层清晰
|
||||||
|
2. Proxy 在 SDK 上叠加 auth/rate-limit/budget/routing
|
||||||
|
3. 作为“多供应商适配能力参考”价值高
|
||||||
|
|
||||||
|
## 4. 代码体量画像(按文件后缀)
|
||||||
|
|
||||||
|
1. `sub2api-tar`:Go 1004,Vue 165,TS 139
|
||||||
|
2. `new-api`:Go 511,JSX 321
|
||||||
|
3. `one-api`:Go 235,JS 242
|
||||||
|
4. `litellm`:Python 3500,TSX 844,Markdown 869
|
||||||
|
|
||||||
|
解读:
|
||||||
|
|
||||||
|
1. `sub2api` 在 Go 侧中台能力深,适合做 S1/S2 接入基座。
|
||||||
|
2. `litellm` 在多 Provider 抽象和测试体系更强,适合策略与适配层借鉴。
|
||||||
|
|
||||||
|
## 5. 与你当前路线图的对齐建议
|
||||||
|
|
||||||
|
1. S1-S2 直接对齐 `subapi connector`:以 `sub2api-tar` 路由和 handler/service 为准做契约测试。
|
||||||
|
2. S2 国内供应商 100% 接管:优先把国内 Provider 的路由与计费从 `subapi` 迁出到自研 Router Core。
|
||||||
|
3. S3 机器人能力:优先消费你自研控制面的统一事件,不直接耦合 `subapi` 内部事件格式。
|
||||||
|
|
||||||
|
## 6. 下一轮深挖计划(v2)
|
||||||
|
|
||||||
|
建议按以下顺序继续深挖并输出第二版文档:
|
||||||
|
|
||||||
|
1. `sub2api` 调度链路时序图(选账号 -> 并发槽 -> failover -> usage 入账)
|
||||||
|
2. `sub2api` 计费链路字段表(请求级 idempotency 与 ledger 对齐)
|
||||||
|
3. `litellm` Router 策略可移植点(cost/latency/success 权重)
|
||||||
|
4. `new-api/one-api` 的可借鉴中间件清单(仅借鉴,不引入 AGPL 污染)
|
||||||
|
|
||||||
|
## 7. v2 文档已完成
|
||||||
|
|
||||||
|
已完成并落盘的 v2 深挖文档:
|
||||||
|
|
||||||
|
- `/home/long/project/立交桥/docs/sub2api_scheduler_billing_flow_deep_dive_v2_2026-03-17.md`
|
||||||
|
|
||||||
|
覆盖内容:
|
||||||
|
|
||||||
|
1. `SelectAccountWithScheduler` 三层调度策略与评分机制
|
||||||
|
2. 用户/账号并发槽位获取、等待队列与释放保障
|
||||||
|
3. Failover 触发条件、同账号重试、流式 no-replay 边界
|
||||||
|
4. `submitUsageRecordTask -> UsageRecordWorkerPool -> RecordUsage -> applyUsageBilling` 全链路
|
||||||
|
5. request_id + fingerprint 幂等扣费机制与冲突语义
|
||||||
756
docs/p1_optimization_solution_v1_2026-03-18.md
Normal file
756
docs/p1_optimization_solution_v1_2026-03-18.md
Normal file
@@ -0,0 +1,756 @@
|
|||||||
|
# P1优化问题解决方案
|
||||||
|
|
||||||
|
> 版本:v1.0
|
||||||
|
> 日期:2026-03-18
|
||||||
|
> 目的:系统性解决评审发现的P1优化问题
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 1. ToS合规动态监控
|
||||||
|
|
||||||
|
### 1.1 问题
|
||||||
|
当前只检查静态规则,未考虑ToS动态变更
|
||||||
|
|
||||||
|
### 1.2 解决方案
|
||||||
|
|
||||||
|
```python
|
||||||
|
class ToSChangeMonitor:
|
||||||
|
"""ToS变更监控"""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.providers = ['openai', 'anthropic', 'google', 'azure']
|
||||||
|
self.monitoring_interval = 3600 # 每小时检查
|
||||||
|
|
||||||
|
async def start_monitoring(self):
|
||||||
|
"""启动监控"""
|
||||||
|
while True:
|
||||||
|
for provider in self.providers:
|
||||||
|
try:
|
||||||
|
await self.check_provider_tos(provider)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"ToS监控失败: {provider}", e)
|
||||||
|
|
||||||
|
await asyncio.sleep(self.monitoring_interval)
|
||||||
|
|
||||||
|
async def check_provider_tos(self, provider: str):
|
||||||
|
"""检查供应商ToS变更"""
|
||||||
|
# 1. 获取当前ToS
|
||||||
|
current_tos = await self.fetch_provider_tos(provider)
|
||||||
|
|
||||||
|
# 2. 对比历史
|
||||||
|
previous_tos = await self.get_previous_tos(provider)
|
||||||
|
|
||||||
|
if self.has_changes(current_tos, previous_tos):
|
||||||
|
# 3. 检测变更内容
|
||||||
|
changes = self.analyze_changes(current_tos, previous_tos)
|
||||||
|
|
||||||
|
# 4. 评估影响
|
||||||
|
impact = self.assess_impact(provider, changes)
|
||||||
|
|
||||||
|
# 5. 发送告警
|
||||||
|
await self.alert_security_team(provider, changes, impact)
|
||||||
|
|
||||||
|
# 6. 更新存储
|
||||||
|
await self.save_tos_snapshot(provider, current_tos)
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 2. 容量规划
|
||||||
|
|
||||||
|
### 2.1 问题
|
||||||
|
缺乏具体容量规划
|
||||||
|
|
||||||
|
### 2.2 解决方案
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
# 容量规划模型
|
||||||
|
|
||||||
|
## 单实例基线(实测)
|
||||||
|
- QPS: 500-1000
|
||||||
|
- 延迟P99: 50-100ms
|
||||||
|
- 内存: 512MB
|
||||||
|
- CPU: 1核
|
||||||
|
|
||||||
|
## 容量公式
|
||||||
|
实例数 = ceil(峰值QPS / 单实例QPS * 冗余系数)
|
||||||
|
|
||||||
|
冗余系数 = 1.5 # 应对突发流量
|
||||||
|
|
||||||
|
## 阶段规划
|
||||||
|
S0:
|
||||||
|
- 峰值QPS: 100
|
||||||
|
- 推荐实例: 2
|
||||||
|
- Redis: 2GB
|
||||||
|
- DB: 10GB
|
||||||
|
|
||||||
|
S1:
|
||||||
|
- 峰值QPS: 500
|
||||||
|
- 推荐实例: 4
|
||||||
|
- Redis: 10GB
|
||||||
|
- DB: 50GB
|
||||||
|
|
||||||
|
S2:
|
||||||
|
- 峰值QPS: 2000
|
||||||
|
- 推荐实例: 8-10
|
||||||
|
- Redis: 50GB
|
||||||
|
- DB: 200GB
|
||||||
|
|
||||||
|
S3:
|
||||||
|
- 峰值QPS: 10000
|
||||||
|
- 推荐实例: 20+
|
||||||
|
- Redis: 200GB
|
||||||
|
- DB: 1TB
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 3. 故障隔离
|
||||||
|
|
||||||
|
### 3.1 问题
|
||||||
|
缺乏故障隔离设计
|
||||||
|
|
||||||
|
### 3.2 解决方案
|
||||||
|
|
||||||
|
```python
|
||||||
|
class FaultIsolation:
|
||||||
|
"""故障隔离机制"""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.circuit_breakers = {}
|
||||||
|
self.bulkheads = {}
|
||||||
|
|
||||||
|
async def call_provider(
|
||||||
|
self,
|
||||||
|
provider: str,
|
||||||
|
request: Request
|
||||||
|
) -> Response:
|
||||||
|
# 1. 检查断路器
|
||||||
|
if self.is_circuit_open(provider):
|
||||||
|
# 快速失败
|
||||||
|
raise CircuitOpenError(provider)
|
||||||
|
|
||||||
|
try:
|
||||||
|
# 2. 执行调用
|
||||||
|
response = await self.do_call(provider, request)
|
||||||
|
|
||||||
|
# 3. 成功,关闭断路器
|
||||||
|
self.record_success(provider)
|
||||||
|
|
||||||
|
return response
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
# 4. 失败,记录并判断是否断开
|
||||||
|
self.record_failure(provider, e)
|
||||||
|
|
||||||
|
if self.should_open_circuit(provider):
|
||||||
|
self.open_circuit(provider)
|
||||||
|
|
||||||
|
raise
|
||||||
|
|
||||||
|
def should_open_circuit(self, provider: str) -> bool:
|
||||||
|
"""判断是否断开"""
|
||||||
|
stats = self.get_failure_stats(provider)
|
||||||
|
|
||||||
|
# 连续5次失败或失败率>50%
|
||||||
|
return stats.consecutive_failures >= 5 or stats.failure_rate > 0.5
|
||||||
|
|
||||||
|
async def bulkhead_execute(
|
||||||
|
self,
|
||||||
|
group: str,
|
||||||
|
func: callable,
|
||||||
|
*args, **kwargs
|
||||||
|
):
|
||||||
|
"""舱壁模式执行"""
|
||||||
|
# 限制并发数
|
||||||
|
semaphore = self.bulkheads.setdefault(
|
||||||
|
group,
|
||||||
|
asyncio.Semaphore(10) # 最多10个并发
|
||||||
|
)
|
||||||
|
|
||||||
|
async with semaphore:
|
||||||
|
return await func(*args, **kwargs)
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 4. 可观测性体系
|
||||||
|
|
||||||
|
### 4.1 问题
|
||||||
|
缺乏具体SLI/SLO设计
|
||||||
|
|
||||||
|
### 4.2 解决方案
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
# 可观测性体系设计
|
||||||
|
|
||||||
|
## SLI (Service Level Indicators)
|
||||||
|
slis:
|
||||||
|
availability:
|
||||||
|
- name: request_success_rate
|
||||||
|
description: 请求成功率
|
||||||
|
method: sum(rate(requests_total{service="router",status=~"2.."}[5m])) / sum(rate(requests_total{service="router"}[5m]))
|
||||||
|
objective: 99.95%
|
||||||
|
|
||||||
|
latency:
|
||||||
|
- name: latency_p99
|
||||||
|
description: P99延迟
|
||||||
|
method: histogram_quantile(0.99, rate(requests_duration_seconds_bucket{service="router"}[5m]))
|
||||||
|
objective: < 200ms
|
||||||
|
|
||||||
|
accuracy:
|
||||||
|
- name: billing_accuracy
|
||||||
|
description: 计费准确率
|
||||||
|
method: 1 - (billing_discrepancies / total_billing_records)
|
||||||
|
objective: 99.99%
|
||||||
|
|
||||||
|
## SLO (Service Level Objectives)
|
||||||
|
slos:
|
||||||
|
- name: gateway_availability
|
||||||
|
sli: request_success_rate
|
||||||
|
target: 99.95%
|
||||||
|
period: 30d
|
||||||
|
error_budget: 0.05%
|
||||||
|
|
||||||
|
- name: gateway_latency
|
||||||
|
sli: latency_p99
|
||||||
|
target: 99%
|
||||||
|
period: 30d
|
||||||
|
|
||||||
|
## 告警规则
|
||||||
|
alerts:
|
||||||
|
- name: AvailabilityBelowSLO
|
||||||
|
condition: availability < 99.9%
|
||||||
|
severity: P1
|
||||||
|
message: "网关可用性低于SLO,当前{{value}}%,目标99.95%"
|
||||||
|
|
||||||
|
- name: LatencyP99High
|
||||||
|
condition: latency_p99 > 500ms
|
||||||
|
severity: P1
|
||||||
|
message: "延迟过高,当前P99 {{value}}ms"
|
||||||
|
|
||||||
|
- name: BillingDiscrepancy
|
||||||
|
condition: billing_discrepancy_rate > 0.1%
|
||||||
|
severity: P0
|
||||||
|
message: "计费差异率异常,当前{{value}}%"
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 5. 多维度限流
|
||||||
|
|
||||||
|
### 5.1 问题
|
||||||
|
限流设计不足
|
||||||
|
|
||||||
|
### 5.2 解决方案
|
||||||
|
|
||||||
|
```python
|
||||||
|
class MultiDimensionalRateLimiter:
|
||||||
|
"""多维度限流"""
|
||||||
|
|
||||||
|
def __init__(self, redis: Redis):
|
||||||
|
self.redis = redis
|
||||||
|
|
||||||
|
async def check_rate_limit(self, request: Request) -> RateLimitResult:
|
||||||
|
limits = [
|
||||||
|
# 全局限流
|
||||||
|
GlobalRateLimit(
|
||||||
|
key='global',
|
||||||
|
max_requests=100000,
|
||||||
|
window=60
|
||||||
|
),
|
||||||
|
# 租户限流
|
||||||
|
TenantRateLimit(
|
||||||
|
key=f"tenant:{request.tenant_id}",
|
||||||
|
max_requests=10000,
|
||||||
|
window=60,
|
||||||
|
burst=1500
|
||||||
|
),
|
||||||
|
# Key级限流
|
||||||
|
APIKeyRateLimit(
|
||||||
|
key=f"apikey:{request.api_key_id}",
|
||||||
|
max_requests=1000,
|
||||||
|
window=60,
|
||||||
|
max_tokens=100000,
|
||||||
|
window_tokens=60
|
||||||
|
),
|
||||||
|
# 方法级限流
|
||||||
|
MethodRateLimit(
|
||||||
|
key=f"method:{request.method}",
|
||||||
|
max_requests=500,
|
||||||
|
window=60
|
||||||
|
)
|
||||||
|
]
|
||||||
|
|
||||||
|
for limit in limits:
|
||||||
|
result = await self.check(limit, request)
|
||||||
|
if not result.allowed:
|
||||||
|
return result
|
||||||
|
|
||||||
|
return RateLimitResult(allowed=True)
|
||||||
|
|
||||||
|
async def check(self, limit, request):
|
||||||
|
"""检查单个限流"""
|
||||||
|
key = f"ratelimit:{limit.key}"
|
||||||
|
current = await self.redis.get(key)
|
||||||
|
|
||||||
|
if current is None:
|
||||||
|
await self.redis.setex(key, limit.window, 1)
|
||||||
|
return RateLimitResult(allowed=True)
|
||||||
|
|
||||||
|
current = int(current)
|
||||||
|
if current >= limit.max_requests:
|
||||||
|
# 计算重置时间
|
||||||
|
ttl = await self.redis.ttl(key)
|
||||||
|
return RateLimitResult(
|
||||||
|
allowed=False,
|
||||||
|
retry_after=ttl,
|
||||||
|
limit=limit.max_requests,
|
||||||
|
remaining=0
|
||||||
|
)
|
||||||
|
|
||||||
|
# 原子递增
|
||||||
|
await self.redis.incr(key)
|
||||||
|
return RateLimitResult(
|
||||||
|
allowed=True,
|
||||||
|
limit=limit.max_requests,
|
||||||
|
remaining=limit.max_requests - current - 1
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 6. 批量操作API
|
||||||
|
|
||||||
|
### 6.1 问题
|
||||||
|
缺乏批量操作支持
|
||||||
|
|
||||||
|
### 6.2 解决方案
|
||||||
|
|
||||||
|
```python
|
||||||
|
class BatchAPI:
|
||||||
|
"""批量操作API"""
|
||||||
|
|
||||||
|
async def batch_chat(self, requests: List[ChatRequest]) -> List[ChatResponse]:
|
||||||
|
"""批量聊天请求"""
|
||||||
|
# 并发执行
|
||||||
|
tasks = [self.chat( req) for req in requests]
|
||||||
|
results = await asyncio.gather(*tasks, return_exceptions=True)
|
||||||
|
|
||||||
|
# 处理结果
|
||||||
|
responses = []
|
||||||
|
for i, result in enumerate(results):
|
||||||
|
if isinstance(result, Exception):
|
||||||
|
responses.append(ChatResponse(
|
||||||
|
error=str(result),
|
||||||
|
request_id=requests[i].request_id
|
||||||
|
))
|
||||||
|
else:
|
||||||
|
responses.append(result)
|
||||||
|
|
||||||
|
return responses
|
||||||
|
|
||||||
|
async def batch_key_management(
|
||||||
|
self,
|
||||||
|
operations: List[KeyOperation]
|
||||||
|
) -> BatchKeyResult:
|
||||||
|
"""批量Key管理"""
|
||||||
|
results = []
|
||||||
|
|
||||||
|
for op in operations:
|
||||||
|
try:
|
||||||
|
result = await self.execute_key_operation(op)
|
||||||
|
results.append({
|
||||||
|
'key_id': op.key_id,
|
||||||
|
'status': 'success',
|
||||||
|
'result': result
|
||||||
|
})
|
||||||
|
except Exception as e:
|
||||||
|
results.append({
|
||||||
|
'key_id': op.key_id,
|
||||||
|
'status': 'failed',
|
||||||
|
'error': str(e)
|
||||||
|
})
|
||||||
|
|
||||||
|
return BatchKeyResult(
|
||||||
|
total=len(operations),
|
||||||
|
succeeded=sum(1 for r in results if r['status'] == 'success'),
|
||||||
|
failed=sum(1 for r in results if r['status'] == 'failed'),
|
||||||
|
results=results
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 7. Webhooks
|
||||||
|
|
||||||
|
### 7.1 问题
|
||||||
|
缺乏Webhook机制
|
||||||
|
|
||||||
|
### 7.2 解决方案
|
||||||
|
|
||||||
|
```python
|
||||||
|
class WebhookManager:
|
||||||
|
"""Webhook管理器"""
|
||||||
|
|
||||||
|
WEBHOOK_EVENTS = {
|
||||||
|
'billing.low_balance': '余额低于阈值',
|
||||||
|
'billing.balance_depleted': '余额耗尽',
|
||||||
|
'key.created': 'Key创建',
|
||||||
|
'key.expiring': 'Key即将过期',
|
||||||
|
'key.disabled': 'Key被禁用',
|
||||||
|
'account.status_changed': '账户状态变更',
|
||||||
|
'provider.quota_exhausted': '供应商配额耗尽',
|
||||||
|
'settlement.completed': '结算完成',
|
||||||
|
}
|
||||||
|
|
||||||
|
async def register_webhook(
|
||||||
|
self,
|
||||||
|
tenant_id: int,
|
||||||
|
url: str,
|
||||||
|
events: List[str],
|
||||||
|
secret: str
|
||||||
|
) -> Webhook:
|
||||||
|
"""注册Webhook"""
|
||||||
|
webhook = Webhook(
|
||||||
|
tenant_id=tenant_id,
|
||||||
|
url=url,
|
||||||
|
events=events,
|
||||||
|
secret=secret,
|
||||||
|
status='active'
|
||||||
|
)
|
||||||
|
await self.save(webhook)
|
||||||
|
return webhook
|
||||||
|
|
||||||
|
async def trigger_webhook(self, event: str, data: dict):
|
||||||
|
"""触发Webhook"""
|
||||||
|
# 1. 获取订阅者
|
||||||
|
webhooks = await self.get_subscribers(event)
|
||||||
|
|
||||||
|
# 2. 发送事件
|
||||||
|
for webhook in webhooks:
|
||||||
|
await self.send_event(webhook, event, data)
|
||||||
|
|
||||||
|
async def send_event(self, webhook: Webhook, event: str, data: dict):
|
||||||
|
"""发送事件"""
|
||||||
|
# 1. 签名
|
||||||
|
payload = json.dumps({'event': event, 'data': data})
|
||||||
|
signature = hmac.new(
|
||||||
|
webhook.secret.encode(),
|
||||||
|
payload.encode(),
|
||||||
|
hashlib.sha256
|
||||||
|
).hexdigest()
|
||||||
|
|
||||||
|
# 2. 发送
|
||||||
|
try:
|
||||||
|
async with httpx.AsyncClient() as client:
|
||||||
|
await client.post(
|
||||||
|
webhook.url,
|
||||||
|
content=payload,
|
||||||
|
headers={
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'X-Webhook-Signature': signature,
|
||||||
|
'X-Webhook-Event': event
|
||||||
|
},
|
||||||
|
timeout=10.0
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Webhook发送失败: {webhook.url}", e)
|
||||||
|
await self.handle_failure(webhook, event, data)
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 8. 定价模型细化
|
||||||
|
|
||||||
|
### 8.1 问题
|
||||||
|
毛利率15-50%范围过大
|
||||||
|
|
||||||
|
### 8.2 解决方案
|
||||||
|
|
||||||
|
```python
|
||||||
|
class DynamicPricingEngine:
|
||||||
|
"""动态定价引擎"""
|
||||||
|
|
||||||
|
BASE_MARGIN = 0.25 # 基础毛利率25%
|
||||||
|
|
||||||
|
# 定价因素
|
||||||
|
FACTORS = {
|
||||||
|
# 客户层级
|
||||||
|
'customer_tier': {
|
||||||
|
'free': 0.15,
|
||||||
|
'growth': 0.25,
|
||||||
|
'enterprise': 0.40
|
||||||
|
},
|
||||||
|
# 模型类型
|
||||||
|
'model_type': {
|
||||||
|
'gpt-4': 1.2, # 高毛利
|
||||||
|
'gpt-3.5': 1.0, # 标准
|
||||||
|
'claude': 1.1, # 稍高
|
||||||
|
'domestic': 0.9 # 稍低
|
||||||
|
},
|
||||||
|
# 供需关系
|
||||||
|
'supply_demand': {
|
||||||
|
'surplus': 0.8, # 供过于求
|
||||||
|
'balanced': 1.0,
|
||||||
|
'scarce': 1.3 # 供不应求
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def calculate_price(self, cost: Money, context: PricingContext) -> Money:
|
||||||
|
"""计算价格"""
|
||||||
|
# 1. 基础价格
|
||||||
|
base_price = cost.amount / (1 - self.BASE_MARGIN)
|
||||||
|
|
||||||
|
# 2. 应用因素
|
||||||
|
tier_factor = self.FACTORS['customer_tier'][context.tier]
|
||||||
|
model_factor = self.FACTORS['model_type'][context.model_type]
|
||||||
|
sd_factor = self.FACTORS['supply_demand'][context.supply_demand]
|
||||||
|
|
||||||
|
# 3. 计算最终价格
|
||||||
|
final_price = base_price * tier_factor * model_factor * sd_factor
|
||||||
|
|
||||||
|
# 4. 验证毛利率范围
|
||||||
|
actual_margin = (final_price - cost.amount) / final_price
|
||||||
|
|
||||||
|
if not (0.15 <= actual_margin <= 0.50):
|
||||||
|
# 超出范围,调整
|
||||||
|
final_price = self.adjust_to_target_margin(cost.amount, actual_margin)
|
||||||
|
|
||||||
|
return Money(amount=final_price.quantize(Decimal('0.01')), currency=cost.currency)
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 9. 完善需求方风控
|
||||||
|
|
||||||
|
### 9.1 问题
|
||||||
|
需求方风控不足
|
||||||
|
|
||||||
|
### 9.2 解决方案
|
||||||
|
|
||||||
|
```python
|
||||||
|
class ConsumerRiskController:
|
||||||
|
"""需求方风控"""
|
||||||
|
|
||||||
|
RISK_RULES = [
|
||||||
|
# 速度异常
|
||||||
|
RiskRule(
|
||||||
|
name='high_velocity',
|
||||||
|
condition=lambda ctx: ctx.tokens_per_minute > 1000,
|
||||||
|
score=30,
|
||||||
|
action='flag'
|
||||||
|
),
|
||||||
|
# 账户共享嫌疑
|
||||||
|
RiskRule(
|
||||||
|
name='account_sharing',
|
||||||
|
condition=lambda ctx: ctx.unique_ips > 10,
|
||||||
|
score=50,
|
||||||
|
action='block'
|
||||||
|
),
|
||||||
|
# 异常使用模式
|
||||||
|
RiskRule(
|
||||||
|
name='unusual_pattern',
|
||||||
|
condition=lambda ctx: ctx.is_anomalous(),
|
||||||
|
score=40,
|
||||||
|
action='review'
|
||||||
|
),
|
||||||
|
# 新账户大额
|
||||||
|
RiskRule(
|
||||||
|
name='new_account_high_value',
|
||||||
|
condition=lambda ctx: ctx.account_age_days < 7 and ctx.daily_spend > 100,
|
||||||
|
score=35,
|
||||||
|
action='flag'
|
||||||
|
)
|
||||||
|
]
|
||||||
|
|
||||||
|
async def evaluate(self, context: RequestContext) -> RiskDecision:
|
||||||
|
"""评估风险"""
|
||||||
|
total_score = 0
|
||||||
|
triggers = []
|
||||||
|
|
||||||
|
for rule in self.RISK_RULES:
|
||||||
|
if rule.condition(context):
|
||||||
|
total_score += rule.score
|
||||||
|
triggers.append(rule.name)
|
||||||
|
|
||||||
|
# 决策
|
||||||
|
if total_score >= 70:
|
||||||
|
return RiskDecision(action='BLOCK', score=total_score, triggers=triggers)
|
||||||
|
elif total_score >= 40:
|
||||||
|
return RiskDecision(action='REVIEW', score=total_score, triggers=triggers)
|
||||||
|
else:
|
||||||
|
return RiskDecision(action='ALLOW', score=total_score, triggers=triggers)
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 10. 用户体验增强
|
||||||
|
|
||||||
|
### 10.1 迁移自助切换工具
|
||||||
|
|
||||||
|
```python
|
||||||
|
class MigrationSelfService:
|
||||||
|
"""迁移自助服务 - 修复U-D-01"""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.endpoints = {
|
||||||
|
'primary': 'https://api.lgateway.com',
|
||||||
|
'backup': 'https://backup.lgateway.com'
|
||||||
|
}
|
||||||
|
|
||||||
|
async def get_migration_status(self, user_id: int) -> MigrationStatus:
|
||||||
|
"""获取迁移状态"""
|
||||||
|
return MigrationStatus(
|
||||||
|
current_endpoint=self.get_current_endpoint(user_id),
|
||||||
|
is_migrated=True,
|
||||||
|
migration_progress=100,
|
||||||
|
health_status='healthy'
|
||||||
|
)
|
||||||
|
|
||||||
|
async def switch_endpoint(
|
||||||
|
self,
|
||||||
|
user_id: int,
|
||||||
|
target: str
|
||||||
|
) -> SwitchResult:
|
||||||
|
"""一键切换入口点"""
|
||||||
|
# 1. 验证目标可用
|
||||||
|
if not await self.is_endpoint_available(target):
|
||||||
|
raise EndpointUnavailableError()
|
||||||
|
|
||||||
|
# 2. 记录切换
|
||||||
|
await self.record_switch(user_id, target)
|
||||||
|
|
||||||
|
# 3. 返回切换结果
|
||||||
|
return SwitchResult(
|
||||||
|
success=True,
|
||||||
|
target_endpoint=target,
|
||||||
|
switch_time=datetime.now(),
|
||||||
|
estimated_completion=30 # 秒
|
||||||
|
)
|
||||||
|
|
||||||
|
async def emergency_rollback(self, user_id: int) -> RollbackResult:
|
||||||
|
"""紧急回滚"""
|
||||||
|
return await self.switch_endpoint(user_id, 'backup')
|
||||||
|
```
|
||||||
|
|
||||||
|
### 10.2 SLA承诺模板
|
||||||
|
|
||||||
|
```python
|
||||||
|
class SLATemplate:
|
||||||
|
"""SLA模板 - 修复U-D-02"""
|
||||||
|
|
||||||
|
# SLA等级
|
||||||
|
TIERS = {
|
||||||
|
'free': {
|
||||||
|
'availability': 0.99,
|
||||||
|
'latency_p99': 5000,
|
||||||
|
'support': 'community',
|
||||||
|
'compensation': None
|
||||||
|
},
|
||||||
|
'growth': {
|
||||||
|
'availability': 0.999,
|
||||||
|
'latency_p99': 2000,
|
||||||
|
'support': 'email',
|
||||||
|
'compensation': {'credit': 0.1} # 10%积分补偿
|
||||||
|
},
|
||||||
|
'enterprise': {
|
||||||
|
'availability': 0.9999,
|
||||||
|
'latency_p99': 1000,
|
||||||
|
'support': 'dedicated',
|
||||||
|
'compensation': {'credit': 0.25, 'refund': 0.05} # 25%积分+5%退款
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def calculate_compensation(
|
||||||
|
self,
|
||||||
|
tier: str,
|
||||||
|
downtime_minutes: int,
|
||||||
|
affected_requests: int
|
||||||
|
) -> Compensation:
|
||||||
|
"""计算补偿"""
|
||||||
|
config = self.TIERS[tier]
|
||||||
|
|
||||||
|
if not config['compensation']:
|
||||||
|
return Compensation(type='none', amount=0)
|
||||||
|
|
||||||
|
# 计算补偿
|
||||||
|
if config['compensation'].get('credit'):
|
||||||
|
credit_amount = affected_requests * 0.01 * config['compensation']['credit']
|
||||||
|
|
||||||
|
if config['compensation'].get('refund'):
|
||||||
|
refund_amount = affected_requests * 0.01 * config['compensation']['refund']
|
||||||
|
|
||||||
|
return Compensation(
|
||||||
|
type='credit' if credit_amount else 'refund',
|
||||||
|
amount=max(credit_amount or 0, refund_amount or 0)
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
### 10.3 用户状态面板
|
||||||
|
|
||||||
|
```python
|
||||||
|
class UserStatusDashboard:
|
||||||
|
"""用户状态面板 - 修复U-D-03"""
|
||||||
|
|
||||||
|
async def get_status(self, user_id: int) -> UserStatus:
|
||||||
|
"""获取用户状态"""
|
||||||
|
return UserStatus(
|
||||||
|
account={
|
||||||
|
'status': 'active',
|
||||||
|
'tier': 'growth',
|
||||||
|
'balance': 100.0,
|
||||||
|
'quota': 10000
|
||||||
|
},
|
||||||
|
services=[
|
||||||
|
{
|
||||||
|
'name': 'API Gateway',
|
||||||
|
'status': 'healthy',
|
||||||
|
'latency_p99': 150,
|
||||||
|
'uptime': 0.9999
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'name': 'Router Core',
|
||||||
|
'status': 'healthy',
|
||||||
|
'latency_p99': 80,
|
||||||
|
'uptime': 0.9995
|
||||||
|
}
|
||||||
|
],
|
||||||
|
incidents=[
|
||||||
|
{
|
||||||
|
'id': 'INC-001',
|
||||||
|
'title': '延迟增加',
|
||||||
|
'status': 'resolved',
|
||||||
|
'resolved_at': datetime.now() - timedelta(hours=2)
|
||||||
|
}
|
||||||
|
],
|
||||||
|
migrations={
|
||||||
|
'current': 'v2',
|
||||||
|
'progress': 100,
|
||||||
|
'health': 'healthy'
|
||||||
|
}
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 11. 实施计划
|
||||||
|
|
||||||
|
| 任务 | 负责人 | 截止 |
|
||||||
|
|------|--------|------|
|
||||||
|
| ToS动态监控 | 安全 | S1 |
|
||||||
|
| 容量规划 | 架构 | S0-M1 |
|
||||||
|
| 故障隔离 | SRE | S1 |
|
||||||
|
| 可观测性体系 | SRE | S1 |
|
||||||
|
| 限流实现 | 后端 | S0-M1 |
|
||||||
|
| 批量API | 后端 | S1 |
|
||||||
|
| Webhooks | 后端 | S1 |
|
||||||
|
| 动态定价 | 产品 | S0-M2 |
|
||||||
|
| 需求方风控 | 风控 | S0-M1 |
|
||||||
|
| 迁移自助工具 | 产品 | S1 |
|
||||||
|
| SLA模板 | 产品 | S1 |
|
||||||
|
| 用户状态面板 | 前端 | S1 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
**文档状态**:P1优化方案(增强版)
|
||||||
114
docs/plans/2026-03-25-superpowers-execution-tasklist-v1.md
Normal file
114
docs/plans/2026-03-25-superpowers-execution-tasklist-v1.md
Normal file
@@ -0,0 +1,114 @@
|
|||||||
|
# 规划设计闭环执行任务清单(Superpowers 版)
|
||||||
|
|
||||||
|
> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task.
|
||||||
|
|
||||||
|
**Goal:** 关闭当前规划设计文档中的 P0/P1 缺口,完成从“设计对齐”到“执行可发布”的全链路闭环。
|
||||||
|
**Architecture:** 以 SSOT 为唯一约束源,先锁定需求与契约,再打通执行环境与证据链,最后完成门禁签署与发布决策。
|
||||||
|
**Tech Stack:** Markdown/CSV/OpenAPI YAML/Bash 脚本(`scripts/supply-gate`)/CI Gate。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 0. 需求与功能理解确认(执行前)
|
||||||
|
|
||||||
|
1. 业务模型:`用户A供给 -> 平台 -> 用户B购买平台服务`,上游凭证只托管在平台。
|
||||||
|
2. 核心功能:账号挂载、套餐发布、收益结算、凭证边界审计与门禁。
|
||||||
|
3. 最高优先级:M-013~M-016 达标 + SUP-004~SUP-007 真实证据。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 1. 总体任务分组
|
||||||
|
|
||||||
|
1. `WG-A` 需求与功能冻结(P0)
|
||||||
|
2. `WG-B` 接口契约与技术规范对齐(P0)
|
||||||
|
3. `WG-C` 测试追踪矩阵与门禁一致化(P1)
|
||||||
|
4. `WG-D` 执行环境解锁与联调证据(P0)
|
||||||
|
5. `WG-E` 报告签署与发布决策(P1)
|
||||||
|
6. `WG-F` 全局功能映射补齐与一致性收尾(P1/P2)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 2. 最细子任务清单(原子步骤)
|
||||||
|
|
||||||
|
| Step ID | Workstream | 原子动作(2-5分钟) | 输入 | 输出 | Owner | DoD |
|
||||||
|
|---|---|---|---|---|---|---|
|
||||||
|
| A-001 | WG-A | 打开按钮级 PRD 并定位“草案”标记 | `docs/supply_button_level_prd_v1_2026-03-25.md` | 标记行号记录 | 产品 | 已记录证据 |
|
||||||
|
| A-002 | WG-A | 打开按钮级 PRD 并定位“待拍板项” | 同上 | 待决议项清单 | 产品 | 4条待决议被提取 |
|
||||||
|
| A-003 | WG-A | 建立“待拍板->决议”映射表 | A-002 | 决议表草案 | 产品+ARCH | 每条有决议动作 |
|
||||||
|
| A-004 | WG-A | 召开 30 分钟决议会确认 4 条待拍板项 | A-003 | 会议纪要 | 产品+ARCH+FIN+QA | 4条全定稿 |
|
||||||
|
| A-005 | WG-A | 将按钮级 PRD 状态改为“冻结” | A-004 | 文档状态更新 | 产品 | 文档不再含“草案” |
|
||||||
|
| A-006 | WG-A | 删除/替换“待拍板项”章节为“已决议项” | A-004 | 已决议章节 | 产品 | 无未决事项残留 |
|
||||||
|
| A-007 | WG-A | 在任务单中引用最新冻结 PRD | A-005 | 引用链更新 | PMO | 任务单链接可追踪 |
|
||||||
|
| A-008 | WG-A | 在复核报告中更新 P0-01 关闭状态 | A-005/A-006 | 复核报告修订 | ARCH | P0-01 标注 Closed |
|
||||||
|
| B-001 | WG-B | 在 OpenAPI 中新增 `X-Request-Id` 参数定义 | `docs/supply_api_contract_openapi_draft_v1_2026-03-25.yaml` | 参数组件 | PLAT | 参数 schema 完整 |
|
||||||
|
| B-002 | WG-B | 在 OpenAPI 中新增 `Idempotency-Key` 参数定义 | 同上 | 参数组件 | PLAT | 参数 schema 完整 |
|
||||||
|
| B-003 | WG-B | 将两类 header 挂到 `POST /supply/accounts` | B-001/B-002 | 路径参数更新 | PLAT | required=true |
|
||||||
|
| B-004 | WG-B | 将两类 header 挂到 `POST /packages/{id}/publish` | B-001/B-002 | 路径参数更新 | PLAT | required=true |
|
||||||
|
| B-005 | WG-B | 将两类 header 挂到 `POST /packages/batch-price` | B-001/B-002 | 路径参数更新 | PLAT | required=true |
|
||||||
|
| B-006 | WG-B | 将两类 header 挂到 `POST /settlements/withdraw` | B-001/B-002 | 路径参数更新 | PLAT | required=true |
|
||||||
|
| B-007 | WG-B | 将两类 header 挂到 `POST /settlements/{id}/cancel` | B-001/B-002 | 路径参数更新 | PLAT | required=true |
|
||||||
|
| B-008 | WG-B | 为幂等冲突补充 `409` 错误码示例 | B-003~B-007 | 错误响应示例 | PLAT | 含 payload mismatch 场景 |
|
||||||
|
| B-009 | WG-B | 为处理中重放补充 `202` 示例 | B-003~B-007 | 错误响应示例 | PLAT | 含 retry_after_ms |
|
||||||
|
| B-010 | WG-B | 运行 OpenAPI lint 校验 | B-001~B-009 | lint 结果 | PLAT | 无语法错误 |
|
||||||
|
| B-011 | WG-B | 更新技术增强稿中“契约已落地”状态 | B-010 | 文档状态更新 | ARCH | XR-001 对齐 |
|
||||||
|
| B-012 | WG-B | 在复核报告中更新 P0-02 关闭状态 | B-010 | 复核报告修订 | ARCH | P0-02 Closed |
|
||||||
|
| C-001 | WG-C | 打开测试增强文档,提取短路径条目 | `docs/supply_test_plan_enhanced_v1_2026-03-25.md` | 待改路径清单 | QA | 清单完整 |
|
||||||
|
| C-002 | WG-C | 将 `/accounts` 替换为完整 `/api/v1/supply/accounts` | C-001 | 路径修正 | QA | 与 OpenAPI 一致 |
|
||||||
|
| C-003 | WG-C | 将 `/packages` 类路径替换为完整路径 | C-001 | 路径修正 | QA | 与 OpenAPI 一致 |
|
||||||
|
| C-004 | WG-C | 将 `/settlements` 类路径替换为完整路径 | C-001 | 路径修正 | QA | 与 OpenAPI 一致 |
|
||||||
|
| C-005 | WG-C | 在矩阵中新增 `api_alias` 列(如需兼容) | C-002~C-004 | 矩阵字段扩展 | QA | 自动匹配无歧义 |
|
||||||
|
| C-006 | WG-C | 同步更新 `reports/supply_traceability_matrix_2026-03-25.csv` | C-002~C-005 | CSV 更新 | QA | 行列一致 |
|
||||||
|
| C-007 | WG-C | 补充追踪矩阵生成规则文档 | C-006 | 生成说明 | QA | 新人可复跑 |
|
||||||
|
| C-008 | WG-C | 在 XR-002 验收项加入“路径一致性检查” | C-006 | 任务单更新 | ARCH+QA | 验收标准明确 |
|
||||||
|
| D-001 | WG-D | 与平台确认可达 staging 域名 | Preflight 报告 | 可达域名 | PLAT/SRE | 域名可解析 |
|
||||||
|
| D-002 | WG-D | 在 `.env` 更新 `API_BASE_URL` | `scripts/supply-gate/.env` | 新地址配置 | PLAT | 配置已保存 |
|
||||||
|
| D-003 | WG-D | 向平台申请短期 owner token | 安全流程 | token 值 | SEC+PLAT | 获取成功 |
|
||||||
|
| D-004 | WG-D | 向平台申请短期 viewer token | 安全流程 | token 值 | SEC+PLAT | 获取成功 |
|
||||||
|
| D-005 | WG-D | 向平台申请短期 admin token | 安全流程 | token 值 | SEC+PLAT | 获取成功 |
|
||||||
|
| D-006 | WG-D | 写入 `.env` 三类 token | D-003~D-005 | 完整 `.env` | PLAT | 无占位值 |
|
||||||
|
| D-007 | WG-D | 执行 `sup004_accounts.sh` | D-006 | `tests/supply/artifacts/sup004/*` | QA | 产物齐全 |
|
||||||
|
| D-008 | WG-D | 校验 `sup004` 关键字段(account_id/status) | D-007 | 校验记录 | QA | 断言通过 |
|
||||||
|
| D-009 | WG-D | 执行 `sup005_packages.sh` | D-006 | `tests/supply/artifacts/sup005/*` | QA | 产物齐全 |
|
||||||
|
| D-010 | WG-D | 校验 `sup005` 批量调价明细 | D-009 | 校验记录 | QA | success+failed=total |
|
||||||
|
| D-011 | WG-D | 执行 `sup006_settlements.sh` | D-006 | `tests/supply/artifacts/sup006/*` | QA+FIN | 产物齐全 |
|
||||||
|
| D-012 | WG-D | 校验提现状态机与金额回退 | D-011 | 校验记录 | FIN+QA | 无跳态/无双扣 |
|
||||||
|
| D-013 | WG-D | 执行 `sup007_boundary.sh` | D-006 | `tests/supply/artifacts/sup007/*` | SEC+QA | 产物齐全 |
|
||||||
|
| D-014 | WG-D | 校验 M-013 脱敏扫描结果 | D-013 | 指标断言 | SEC | 必须为 0 |
|
||||||
|
| D-015 | WG-D | 校验 M-014 入站覆盖率 | D-013 | 指标断言 | SEC+PLAT | 必须 100% |
|
||||||
|
| D-016 | WG-D | 校验 M-015 直连绕过事件 | D-013 | 指标断言 | SEC+SRE | 必须 0 |
|
||||||
|
| D-017 | WG-D | 校验 M-016 query key 拒绝率 | D-013 | 指标断言 | SEC+PLAT | 必须 100% |
|
||||||
|
| D-018 | WG-D | 生成新 preflight 报告(PASS) | D-007~D-017 | preflight 更新 | QA | 状态 PASS |
|
||||||
|
| E-001 | WG-E | 回填 ACC 报告每条用例 PASS/FAIL | D-007/D-008 | ACC 报告 | QA | 6条有结论 |
|
||||||
|
| E-002 | WG-E | 回填 PKG 报告每条用例 PASS/FAIL | D-009/D-010 | PKG 报告 | QA | 6条有结论 |
|
||||||
|
| E-003 | WG-E | 回填 SET 报告每条用例 PASS/FAIL | D-011/D-012 | SET 报告 | QA+FIN | 5条有结论 |
|
||||||
|
| E-004 | WG-E | 回填 SEC 报告与 M-013~M-016 实测值 | D-013~D-017 | SEC 报告 | SEC+QA | 指标完整 |
|
||||||
|
| E-005 | WG-E | 更新 SUP 汇总报告分项结论 | E-001~E-004 | 汇总表更新 | ARCH+QA | 4项闭环 |
|
||||||
|
| E-006 | WG-E | 勾选汇总结论(通过/有条件通过/不通过) | E-005 | 结论勾选 | ARCH | 单选且合理 |
|
||||||
|
| E-007 | WG-E | 回填实名 Owner(非“待实名”) | E-005 | Owner 完整 | PMO | 全实名 |
|
||||||
|
| E-008 | WG-E | 回填 4 方签署人 + 日期 +签署编号 | E-007 | 签署页完整 | PMO | 可审计 |
|
||||||
|
| E-009 | WG-E | 更新复核报告未关闭风险章节 | E-006~E-008 | 复核报告更新 | ARCH | 状态同步 |
|
||||||
|
| E-010 | WG-E | 在任务单标记 SUP-004~SUP-008 状态 | E-006 | 任务单更新 | PMO | 状态一致 |
|
||||||
|
| F-001 | WG-F | 创建“全局 P0 -> 供应侧/平台侧”映射表 | `llm_gateway_prd_v1_2026-03-25.md` | 新映射文档 | 产品 | 覆盖 PRD 全量 P0 |
|
||||||
|
| F-002 | WG-F | 将“预算/告警/账单导出”映射到按钮或页面入口 | F-001 | 映射补充 | 产品+UIUX | 无遗漏 |
|
||||||
|
| F-003 | WG-F | 将映射表接入测试追踪矩阵 | F-001/F-002 | 矩阵扩展 | QA | 需求可测 |
|
||||||
|
| F-004 | WG-F | 定义 `/supply` vs `/supplier` 命名策略 | OpenAPI | 命名决议 | ARCH+PLAT | 有兼容方案 |
|
||||||
|
| F-005 | WG-F | 若保留双路径,补充 alias/重定向规则 | F-004 | 契约补充 | PLAT | 客户端可迁移 |
|
||||||
|
| F-006 | WG-F | 更新 API 规范注释与变更日志 | F-004/F-005 | 变更记录 | PLAT | 变更可追溯 |
|
||||||
|
| F-007 | WG-F | 在复核报告追加 P1/P2 收敛状态 | F-001~F-006 | 复核更新 | ARCH | 收敛结论明确 |
|
||||||
|
| G-001 | 全局 | 执行一次跨文档链接完整性检查 | 全部更新文档 | 检查记录 | PMO | 无失效引用 |
|
||||||
|
| G-002 | 全局 | 执行一次门禁指标与报告一致性检查 | 指标报告 | 对齐记录 | QA+SEC | 指标一致 |
|
||||||
|
| G-003 | 全局 | 输出最终“GO/CONDITIONAL GO/NO-GO”决议稿 | E/F/G 完成项 | 决议文档 | ARCH | 管理层可签发 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 3. 里程碑与退出标准
|
||||||
|
|
||||||
|
1. 里程碑 M-A(需求冻结完成):A-001~A-008 全完成。
|
||||||
|
2. 里程碑 M-B(契约对齐完成):B-001~B-012、C-001~C-008 全完成。
|
||||||
|
3. 里程碑 M-C(执行证据完成):D-001~D-018、E-001~E-010 全完成。
|
||||||
|
4. 里程碑 M-D(一致性收尾):F-001~F-007、G-001~G-003 全完成。
|
||||||
|
|
||||||
|
退出标准(可判定 GO):
|
||||||
|
1. 所有 P0 项已关闭。
|
||||||
|
2. `SUP-004~SUP-007` 真实执行证据完整。
|
||||||
|
3. M-013~M-016 全达标且有签署。
|
||||||
|
4. 汇总评审从“不通过/阻塞”转为“通过/有条件通过(带关闭计划)”。
|
||||||
70
docs/product/owner_sla_dispute_compensation_rules_v1.md
Normal file
70
docs/product/owner_sla_dispute_compensation_rules_v1.md
Normal file
@@ -0,0 +1,70 @@
|
|||||||
|
# 业主 SLA/申诉/赔付规则(XR-004)
|
||||||
|
|
||||||
|
- 版本:v1.0
|
||||||
|
- 日期:2026-03-25
|
||||||
|
- 状态:生效(门禁验收条款)
|
||||||
|
- 目的:将业主侧交付承诺、异常申诉与赔付策略转化为可执行标准
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 1. 适用范围
|
||||||
|
|
||||||
|
1. 供应侧三页链路导致的业主体验异常。
|
||||||
|
2. 计费、结算、提现、导出相关争议。
|
||||||
|
3. 平台凭证与安全边界导致的访问阻断争议。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 2. SLA 分级
|
||||||
|
|
||||||
|
| 等级 | 场景 | 首次响应时限 | 处置时限 | 升级条件 |
|
||||||
|
|---|---|---|---|---|
|
||||||
|
| S0 | 资金错误、凭证泄露、越权操作 | 15 分钟 | 4 小时临时止血,24 小时根因报告 | 逾期或影响多个租户 |
|
||||||
|
| S1 | 提现延迟、账单错误、关键功能不可用 | 30 分钟 | 8 小时恢复,48 小时复盘 | 连续 2 次超时 |
|
||||||
|
| S2 | 非关键体验问题、展示问题 | 2 小时 | 3 个工作日修复 | 重复投诉 >=3 次 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 3. 申诉流程
|
||||||
|
|
||||||
|
1. 入口:控制台工单 + 客服入口 + 邮件入口(统一工单 ID)。
|
||||||
|
2. 必填字段:租户 ID、request_id、发生时间、影响描述、截图或日志。
|
||||||
|
3. 处理阶段:
|
||||||
|
1. 受理:校验信息完整性并分级。
|
||||||
|
2. 调查:拉取审计日志、账务流水、网关日志。
|
||||||
|
3. 结论:给出责任归因、影响范围、恢复动作。
|
||||||
|
4. 关闭:用户确认或超时自动关闭(保留复开入口)。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 4. 赔付规则
|
||||||
|
|
||||||
|
| 事件类型 | 赔付基线 | 上限 | 备注 |
|
||||||
|
|---|---|---|---|
|
||||||
|
| 资金多扣/错扣 | 全额返还 + 额外 10% 服务补偿 | 单事件不超过当月服务费 50% | 先返还后复盘 |
|
||||||
|
| 提现超时(SLA 逾期) | 每逾期 24h 赔付 2% 服务费减免 | 累计不超过当月服务费 20% | 法务可调整 |
|
||||||
|
| 平台故障导致不可用 | 按不可用时长折算服务费减免 | 不超过当月服务费 30% | 以监控证据为准 |
|
||||||
|
| 凭证边界误拦截 | 免责恢复 + 工单优先通道 | 不适用 | 以恢复时效考核为主 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 5. 可观测与验收
|
||||||
|
|
||||||
|
1. 指标:
|
||||||
|
1. `owner_sla_first_response_on_time_pct >= 99%`
|
||||||
|
2. `owner_sla_resolution_on_time_pct >= 98%`
|
||||||
|
3. `owner_dispute_reopen_rate <= 3%`
|
||||||
|
2. 证据:
|
||||||
|
1. 工单系统导出
|
||||||
|
2. 审计日志链路
|
||||||
|
3. 赔付记录与审批记录
|
||||||
|
3. 门禁:
|
||||||
|
1. 任一 S0 超时视为 P0。
|
||||||
|
2. 周期内 S1 超时率 >2% 视为 P1。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 6. 对齐关系
|
||||||
|
|
||||||
|
1. 本文与 `supply_test_plan_enhanced_v1_2026-03-25.md` 的“申诉与赔付可测性”章节一一对应。
|
||||||
|
2. 本文由 `XR-004` 任务验收并纳入 `XR-005` 综合复核。
|
||||||
186
docs/resource_assessment_plan_v1_2026-03-18.md
Normal file
186
docs/resource_assessment_plan_v1_2026-03-18.md
Normal file
@@ -0,0 +1,186 @@
|
|||||||
|
# 资源评估与补充方案
|
||||||
|
|
||||||
|
> 版本:v1.0
|
||||||
|
> 日期:2026-03-18
|
||||||
|
> 目的:系统性解决评审提出的资源紧张问题
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 1. 当前资源评估
|
||||||
|
|
||||||
|
### 1.1 各阶段人力需求
|
||||||
|
|
||||||
|
| 阶段 | 周期 | 原需求 | 评估 | 风险等级 |
|
||||||
|
|------|------|--------|------|----------|
|
||||||
|
| **S0** | 12周 | 5-8人 | ⚠️ 紧张 | 🔴 高 |
|
||||||
|
| **S1** | 8周 | 6-10人 | ⚠️ 紧张 | 🟡 中 |
|
||||||
|
| **S2** | 13周 | 8-12人 | ❌ 风险高 | 🔴 高 |
|
||||||
|
| **S3** | 11周 | 待评估 | - | 🟢 低 |
|
||||||
|
| **S4** | 5月 | 待评估 | - | 🟢 低 |
|
||||||
|
|
||||||
|
### 1.2 瓶颈分析
|
||||||
|
|
||||||
|
```
|
||||||
|
资源冲突示意图:
|
||||||
|
|
||||||
|
S0 周期(12周): |==========|--------|
|
||||||
|
↑ ↑
|
||||||
|
用户供应 Subapi
|
||||||
|
系统 集成
|
||||||
|
并行 并行
|
||||||
|
|
||||||
|
S1 周期(8周): |====|====|
|
||||||
|
↑ ↑
|
||||||
|
S1开发 S0收尾
|
||||||
|
重叠 冲突
|
||||||
|
```
|
||||||
|
|
||||||
|
**核心瓶颈**:
|
||||||
|
1. **S0/S1并行风险**:用户供应系统 + Subapi集成需要同时开发
|
||||||
|
2. **S2 Router Core自研**:需要资深Go开发,难度高
|
||||||
|
3. **全栈能力需求**:需要同时懂网关、计费、风控的复合型人才
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 2. 解决方案
|
||||||
|
|
||||||
|
### 2.1 方案A:延长周期(推荐)
|
||||||
|
|
||||||
|
| 阶段 | 原周期 | 建议周期 | 增加周数 |
|
||||||
|
|------|--------|----------|----------|
|
||||||
|
| S0 | 12周 | **15周** | +3周 |
|
||||||
|
| S1 | 8周 | 10周 | +2周 |
|
||||||
|
| S2 | 13周 | **16周** | +3周 |
|
||||||
|
|
||||||
|
**优点**:
|
||||||
|
- 风险可控
|
||||||
|
- 团队压力减小
|
||||||
|
- 质量更有保障
|
||||||
|
|
||||||
|
**缺点**:
|
||||||
|
- 整体上线时间延后
|
||||||
|
- 市场竞争窗口期缩短
|
||||||
|
|
||||||
|
### 2.2 方案B:增加资源
|
||||||
|
|
||||||
|
| 阶段 | 原人力 | 建议人力 | 增加人数 |
|
||||||
|
|------|--------|----------|----------|
|
||||||
|
| S0 | 5-8人 | **8-10人** | +2-3人 |
|
||||||
|
| S1 | 6-10人 | 8-12人 | +2人 |
|
||||||
|
| S2 | 8-12人 | **12-15人** | +3-4人 |
|
||||||
|
|
||||||
|
**优点**:
|
||||||
|
- 周期不变
|
||||||
|
- 可以并行推进
|
||||||
|
|
||||||
|
**缺点**:
|
||||||
|
- 成本增加
|
||||||
|
- 管理复杂度上升
|
||||||
|
- 招聘难度大
|
||||||
|
|
||||||
|
### 2.3 方案C:分阶段交付(混合策略)
|
||||||
|
|
||||||
|
**策略**:将S0拆分为两个里程碑
|
||||||
|
|
||||||
|
| 里程碑 | 内容 | 周期 | 人力 |
|
||||||
|
|--------|------|------|------|
|
||||||
|
| S0-M1 | 供应方入驻 + 基础验证 | 8周 | 5人 |
|
||||||
|
| S0-M2 | 套餐发布 + 计费 + 风控 | 7周 | 6人 |
|
||||||
|
| S1 | Subapi集成 | 8周 | 8人 |
|
||||||
|
|
||||||
|
**优点**:
|
||||||
|
- 每个里程碑可独立验收
|
||||||
|
- 资源错峰配置
|
||||||
|
- 风险分散
|
||||||
|
|
||||||
|
**缺点**:
|
||||||
|
- 需要更好的项目管理
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 3. 推荐方案
|
||||||
|
|
||||||
|
### 3.1 采用方案C(分阶段交付)
|
||||||
|
|
||||||
|
**理由**:
|
||||||
|
1. 平衡风险和进度
|
||||||
|
2. 避免资源剧烈波动
|
||||||
|
3. 每个阶段有明确交付物
|
||||||
|
|
||||||
|
### 3.2 人力配置建议
|
||||||
|
|
||||||
|
```
|
||||||
|
时间轴 →
|
||||||
|
|
||||||
|
S0-M1 (W1-W8): [供应方入驻+验证] 5人
|
||||||
|
S0-M2 (W6-W13): [套餐发布+计费] 6人 ← 重叠2周
|
||||||
|
S1 (W10-W18): [Subapi集成] 8人
|
||||||
|
S2 (W19-W34): [Router Core自研] 10人
|
||||||
|
```
|
||||||
|
|
||||||
|
### 3.3 角色配置
|
||||||
|
|
||||||
|
| 角色 | S0-M1 | S0-M2 | S1 | S2 |
|
||||||
|
|------|-------|-------|----|----|
|
||||||
|
| 后端Go开发 | 2 | 3 | 4 | 6 |
|
||||||
|
| 前端React | 1 | 1 | 2 | 2 |
|
||||||
|
| 架构师 | 1 | 1 | 1 | 1 |
|
||||||
|
| 测试QA | 1 | 1 | 1 | 2 |
|
||||||
|
| **合计** | **5** | **6** | **8** | **11** |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 4. 招聘计划
|
||||||
|
|
||||||
|
### 4.1 关键岗位
|
||||||
|
|
||||||
|
| 岗位 | 数量 | 到岗时间 | 优先级 |
|
||||||
|
|------|------|----------|--------|
|
||||||
|
| 资深Go后端 | 2-3人 | S0启动前 | 🔴 P0 |
|
||||||
|
| 全栈工程师 | 1人 | S0-M2前 | 🟡 P1 |
|
||||||
|
| 测试工程师 | 1人 | S1启动前 | 🟡 P1 |
|
||||||
|
|
||||||
|
### 4.2 成本估算
|
||||||
|
|
||||||
|
| 人力方案 | 月薪成本 | 12周成本 | 24周成本 |
|
||||||
|
|----------|----------|----------|----------|
|
||||||
|
| 5人团队 | ¥15-25万 | ¥45-75万 | ¥90-150万 |
|
||||||
|
| 8人团队 | ¥24-40万 | ¥72-120万 | ¥144-240万 |
|
||||||
|
| 10人团队 | ¥30-50万 | ¥90-150万 | ¥180-300万 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 5. 风险缓解措施
|
||||||
|
|
||||||
|
### 5.1 资源冲突缓解
|
||||||
|
|
||||||
|
| 风险 | 缓解措施 | 责任人 |
|
||||||
|
|------|----------|--------|
|
||||||
|
| S0/S1并行冲突 | 分阶段交付,里程碑验收 | 项目经理 |
|
||||||
|
| 招聘不及期 | 提前启动招聘,猎头渠道 | HR |
|
||||||
|
| 技术难度高 | 架构师深度参与,代码审查 | 技术负责人 |
|
||||||
|
|
||||||
|
### 5.2 备选方案
|
||||||
|
|
||||||
|
如果招聘困难:
|
||||||
|
1. **外包部分非核心功能**(如管理后台)
|
||||||
|
2. **使用云服务商托管服务**(如托管Redis、Kafka)
|
||||||
|
3. **延迟非关键功能**(如部分S3机器人能力)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 6. 决策建议
|
||||||
|
|
||||||
|
| 决策项 | 选项 | 推荐 |
|
||||||
|
|--------|------|------|
|
||||||
|
| S0周期 | 12周 / 15周 | **15周** |
|
||||||
|
| S2周期 | 13周 / 16周 | **16周** |
|
||||||
|
| 人力配置 | 5人 / 8人 / 动态 | **动态配置** |
|
||||||
|
| 招聘策略 | 内招 / 外包 / 猎头 | **猎头+内招** |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
**文档状态**:资源评估方案
|
||||||
|
**关联文档**:
|
||||||
|
- `llm_gateway_subapi_evolution_plan_v4_1_2026-03-18.md`
|
||||||
|
- `s0_wbs_detailed_v1_2026-03-18.md`
|
||||||
136
docs/router_core_s2_acceptance_test_cases_v1_2026-03-17.md
Normal file
136
docs/router_core_s2_acceptance_test_cases_v1_2026-03-17.md
Normal file
@@ -0,0 +1,136 @@
|
|||||||
|
# Router Core S2 验收测试用例清单(按模块展开,v1.1)
|
||||||
|
|
||||||
|
- 版本:v1.1
|
||||||
|
- 日期:2026-03-24
|
||||||
|
- 适用阶段:S2(2026-05-16 至 2026-08-15)
|
||||||
|
- 关联文档:
|
||||||
|
- `router_core_takeover_execution_plan_v3_2026-03-17.md`
|
||||||
|
- `router_core_takeover_metrics_sql_dashboard_v1_2026-03-17.md`
|
||||||
|
|
||||||
|
## 1. 验收范围与统一门槛
|
||||||
|
|
||||||
|
统一质量/账务/迁移门槛(与 v3 对齐):
|
||||||
|
|
||||||
|
1. 网关附加时延 `P95 <= 60ms`。
|
||||||
|
2. 5xx 不高于基线 `+0.1%`。
|
||||||
|
3. 账务差错率 `<= 0.1%`。
|
||||||
|
4. 幂等冲突率 `<= 0.01%`。
|
||||||
|
5. `cn_takeover = 100%`。
|
||||||
|
6. `overall_takeover >= 60%`。
|
||||||
|
7. `supplier_credential_exposure_events = 0`。
|
||||||
|
8. `platform_credential_ingress_coverage_pct = 100%`。
|
||||||
|
9. `direct_supplier_call_by_consumer_events = 0`。
|
||||||
|
10. `query_key_external_reject_rate_pct = 100%`。
|
||||||
|
|
||||||
|
测试环境前置:
|
||||||
|
|
||||||
|
1. 测试库已包含 `usage_logs`、`usage_billing_dedup`、`ops_*` 表。
|
||||||
|
2. 已启用接管率统计口径(临时口径或验收口径至少一种)。
|
||||||
|
3. 准备至少 2 个账号/分组用于 failover 与并发争用验证。
|
||||||
|
|
||||||
|
## 2. Scheduler Core
|
||||||
|
|
||||||
|
| 用例ID | 前置条件 | 输入 | 测试步骤 | 预期结果 | 证据 |
|
||||||
|
|---|---|---|---|---|---|
|
||||||
|
| SCH-001 | 同分组有 A/B 两账号;A 为 previous 命中账号 | 同一会话连续请求 50 次 | 1) 首次请求命中 A 2) 持续请求保持会话上下文 3) 记录命中账号分布 | previous/session 策略优先命中 A;命中率符合配置 | 调度日志(含策略层级)+ 请求明细导出 |
|
||||||
|
| SCH-002 | session TTL 已配置(如 3600s) | 跨 TTL 边界请求 | 1) TTL 内请求 2) 超过 TTL 后再次请求 | TTL 内保持粘性;超时后允许重新选路 | session key 变化记录 + 调度 trace |
|
||||||
|
| SCH-003 | 账号负载差异明显(A 高负载,B 低负载) | 500 并发短请求 | 1) 打压 A 形成高队列 2) 发起混合流量 | load 评分生效,流量向低负载账号倾斜 | 账号负载快照 + 命中分布图 |
|
||||||
|
| SCH-004 | 存在错误账号 C(持续 429/529) | 正常请求流量 | 1) 触发 C 错误 2) 持续观测调度 | 错误账号降权/隔离,不应持续被命中 | 账号状态变更日志 + 调度拒选原因 |
|
||||||
|
|
||||||
|
## 3. Concurrency Gate
|
||||||
|
|
||||||
|
| 用例ID | 前置条件 | 输入 | 测试步骤 | 预期结果 | 证据 |
|
||||||
|
|---|---|---|---|---|---|
|
||||||
|
| CCG-001 | 用户并发上限=5 | 同用户 20 并发 | 1) 同时发起 20 请求 2) 统计状态码 | 超出上限请求进入等待或返回 429;无 500 扩散 | 压测报告 + 429 比例统计 |
|
||||||
|
| CCG-002 | 账号并发上限=3;队列上限已配置 | 多用户同打一个账号 | 1) 持续冲击单账号 2) 监控队列深度 | 账号槽位受控;队列深度不越界 | 队列深度曲线 + 账号并发快照 |
|
||||||
|
| CCG-003 | 队列超时阈值已配置 | 长耗时请求 + 短请求混流 | 1) 先占满槽位 2) 注入短请求 | 超时请求返回预期错误(429/超时码);无槽位泄漏 | 错误分布 + 槽位计数核对 |
|
||||||
|
| CCG-004 | 支持异常中断场景 | 中途断连/超时中止请求 | 1) 发起请求后主动断连 2) 重复多轮 | 槽位自动释放;后续请求可正常获取槽位 | 槽位释放日志 + 恢复后成功率 |
|
||||||
|
|
||||||
|
## 4. Failover Orchestrator
|
||||||
|
|
||||||
|
| 用例ID | 前置条件 | 输入 | 测试步骤 | 预期结果 | 证据 |
|
||||||
|
|---|---|---|---|---|---|
|
||||||
|
| FO-001 | 配置同号重试次数=2 | 构造可重试错误(网络抖动) | 1) 注入瞬时网络错误 2) 观察重试轨迹 | 先同号重试,成功后不换号 | failover trace + 重试次数统计 |
|
||||||
|
| FO-002 | 配置换号重试上限=3 | 构造 429/529 持续错误 | 1) 让当前账号持续限流 2) 观察是否切换账号 | 按策略换号且不超过上限 | 账号切换链路日志 |
|
||||||
|
| FO-003 | 注入不可重试错误(4xx 业务错误) | 非法参数请求 | 1) 提交非法请求 2) 观察行为 | 不应触发重试/换号,直接返回标准错误 | 错误规范化结果 + 无重试证据 |
|
||||||
|
| FO-004 | 多账号可用;`max switches` 已配置 | 持续故障流量 | 1) 触发连续失败 2) 超过上限后继续请求 | 超过上限后停止切换并返回兜底错误 | failover 上限命中日志 + 告警事件 |
|
||||||
|
|
||||||
|
## 5. Stream Guard Layer
|
||||||
|
|
||||||
|
| 用例ID | 前置条件 | 输入 | 测试步骤 | 预期结果 | 证据 |
|
||||||
|
|---|---|---|---|---|---|
|
||||||
|
| SG-001 | 流式链路可控注入错误 | 首包前上游失败 | 1) 在首 token 前注入可重试错误 2) 观察策略 | 可按策略重试,不产生重复输出 | 流式事件序列 + 重试日志 |
|
||||||
|
| SG-002 | 流式输出已开始 | 首 token 后注入上游失败 | 1) 首 token 已发送 2) 注入上游失败 | 禁止 replay;返回单次终止,不出现双流拼接 | SSE/WS 抓包 + stream guard 日志 |
|
||||||
|
| SG-003 | 客户端断流可观测 | 客户端中途取消 | 1) 流式进行中取消连接 2) 观察后端行为 | 后端及时收敛,不继续写流,不泄漏资源 | 连接生命周期日志 + 资源占用快照 |
|
||||||
|
|
||||||
|
## 6. Usage & Billing Core
|
||||||
|
|
||||||
|
| 用例ID | 前置条件 | 输入 | 测试步骤 | 预期结果 | 证据 |
|
||||||
|
|---|---|---|---|---|---|
|
||||||
|
| UB-001 | 幂等键生效(`request_id + api_key_id`) | 相同 request_id 重放 2 次 | 1) 发送首次请求 2) 原样重放 | 只扣费一次;重复请求命中幂等 | 账务明细 + `usage_billing_dedup` 记录 |
|
||||||
|
| UB-002 | 启用指纹冲突检测 | 同 request_id 不同 payload | 1) 首次成功请求 2) 修改 payload 重放 | 触发冲突告警;不允许静默重复扣费 | 冲突告警事件 + 审计日志 |
|
||||||
|
| UB-003 | failover 打开 | 单请求多次尝试后成功 | 1) 前几次失败 2) 最终一次成功 | 全链路只产生一条有效扣费记录 | request 级账务对账报告 |
|
||||||
|
| UB-004 | 冷归档任务可执行 | 运行 dedup 归档任务 | 1) 执行归档 2) 抽样验证历史请求去重 | 归档后热表缩小且去重能力不退化 | 归档任务日志 + 抽样重放结果 |
|
||||||
|
|
||||||
|
## 7. CN Provider Adapter Pack
|
||||||
|
|
||||||
|
| 用例ID | 前置条件 | 输入 | 测试步骤 | 预期结果 | 证据 |
|
||||||
|
|---|---|---|---|---|---|
|
||||||
|
| CN-001 | 已接入国内供应商账号集 | 标准文本请求 | 1) 发起兼容请求 2) 核对路由和鉴权 | 鉴权成功,路由到目标供应商,无协议回退 | adapter 请求日志 + 上游响应 |
|
||||||
|
| CN-002 | 模型映射表已配置 | 兼容模型名请求 | 1) 使用统一模型名 2) 检查映射后的上游模型 | 映射正确,计费口径不丢失 | 模型映射日志 + usage 样本 |
|
||||||
|
| CN-003 | 国内供应商多账号可用 | 注入单账号故障 | 1) 让主账号故障 2) 验证同平台切换 | 在国内供应商集合内切换成功,链路不回退到 subapi | failover trace + router_engine 统计 |
|
||||||
|
| CN-004 | 已开启 CN 全量灰度阶段 | 24h 连续流量 | 1) 持续运行 2) 统计接管率 | `cn_takeover=100%`,若非 100 立即阻断升波 | CN 接管率 SQL 输出 + 告警记录 |
|
||||||
|
|
||||||
|
## 8. Error Normalization Engine
|
||||||
|
|
||||||
|
| 用例ID | 前置条件 | 输入 | 测试步骤 | 预期结果 | 证据 |
|
||||||
|
|---|---|---|---|---|---|
|
||||||
|
| EN-001 | OpenAI/Anthropic/Gemini 适配器可用 | 三平台等价限流错误 | 1) 各平台触发 429 2) 比较返回结构 | 统一 `category/code/retryable` 语义一致 | 契约测试报告 |
|
||||||
|
| EN-002 | 非重试错误映射规则就绪 | 参数错误/鉴权错误 | 1) 触发 400/401 2) 观察重试标记 | `retryable=false`,无误重试 | 错误返回样本 + failover 日志 |
|
||||||
|
| EN-003 | passthrough 白名单规则已配置 | 指定平台原始错误 | 1) 命中白名单规则 2) 校验透传字段 | 仅白名单字段透传,敏感信息不泄漏 | 规则命中日志 + 安全审计记录 |
|
||||||
|
|
||||||
|
## 9. Observability & Audit
|
||||||
|
|
||||||
|
| 用例ID | 前置条件 | 输入 | 测试步骤 | 预期结果 | 证据 |
|
||||||
|
|---|---|---|---|---|---|
|
||||||
|
| OBS-001 | 全链路打点开启 | 正常/异常混合请求 | 1) 抽样 100 请求 2) 逐条回溯 request_id | request_id 可追踪率 100% | trace 检查清单 |
|
||||||
|
| OBS-002 | 接管率看板已上线 | 固定时间窗查询 | 1) 执行 SQL 2) 对照看板值 | SQL 与看板偏差在容忍范围内(建议 <=0.1pp) | SQL 输出存档 + 看板截图 |
|
||||||
|
| OBS-003 | 告警规则已启用 | 人工触发阈值场景 | 1) 构造超阈值 2) 观察告警生命周期 | 告警按规则触发、抑制、恢复 | `ops_alert_events` 记录 |
|
||||||
|
| OBS-004 | 审计日志不可篡改策略生效 | 管理操作与重试操作 | 1) 执行管理动作 2) 验证审计条目 | 关键动作均有审计证据,字段完整 | 审计导出报表 |
|
||||||
|
|
||||||
|
## 10. Credential Boundary(凭证边界)
|
||||||
|
|
||||||
|
| 用例ID | 前置条件 | 输入 | 测试步骤 | 预期结果 | 证据 |
|
||||||
|
|---|---|---|---|---|---|
|
||||||
|
| CB-001 | 平台凭证鉴权已开启 | 使用平台凭证访问主路径接口 | 1) 用平台凭证发起请求 2) 校验日志字段 | 请求通过;记录平台凭证上下文;`platform_credential_ingress_coverage_pct` 统计为有效样本 | 鉴权日志 + 指标快照 |
|
||||||
|
| CB-002 | 已开启凭证脱敏与导出审计 | 触发常见错误/导出账单 | 1) 构造4xx/5xx错误 2) 导出账单和审计 | 错误体、报表、导出中无可复用供应方上游凭证;`supplier_credential_exposure_events=0` | 错误样本 + 导出样本 + 脱敏扫描报告 |
|
||||||
|
| CB-003 | 出网审计与告警已开启 | 构造需求方绕过平台直连上游尝试 | 1) 从需求方网络直接访问上游 2) 校验告警与阻断 | 直连被阻断并告警;`direct_supplier_call_by_consumer_events` 记录并闭环处置 | 出网策略命中日志 + 安全事件记录 |
|
||||||
|
| CB-004 | query key 外部拦截策略已开启 | 外部 query key 请求(含 `/v1beta/*`) | 1) 发送带 query key 请求 2) 校验响应与计数 | 外部 query key 全拒绝;`query_key_external_reject_rate_pct=100%` | 网关拦截日志 + 指标快照 |
|
||||||
|
|
||||||
|
## 11. 分波次 Gate(Wave-CN / Wave-Global)
|
||||||
|
|
||||||
|
| Wave | 流量目标 | 观察窗口 | Go 条件(全部满足) | Stop 条件(任一触发) |
|
||||||
|
|---|---|---|---|---|
|
||||||
|
| Wave-CN-1 | CN 10% | 24h | P0 模块用例全绿;`cn_takeover=100%`(窗口内);`route_mark_coverage>=99.9%`;5xx 与时延达标;`platform_credential_ingress_coverage_pct=100%` | 5xx > 基线+0.1% 或 账务差错率>0.1% 或 幂等冲突率>0.01% 或 `route_mark_coverage<99.9%` 或 `platform_credential_ingress_coverage_pct<100%` |
|
||||||
|
| Wave-CN-2 | CN 40% | 24h | 延续 Wave-CN-1 全绿;CN adapter 用例全绿;`route_mark_coverage>=99.9%`;`supplier_credential_exposure_events=0` | 同上,或出现协议不兼容回退,或 `supplier_credential_exposure_events>0` |
|
||||||
|
| Wave-CN-3 | CN 70% | 48h | Failover+Stream Guard 用例全绿;连续稳定;`route_mark_coverage>=99.9%`;`direct_supplier_call_by_consumer_events=0` | 同上,或出现流式 replay 异常,或 `direct_supplier_call_by_consumer_events>0` |
|
||||||
|
| Wave-CN-4 | CN 100% | 连续 7 天 | `cn_takeover=100%` 连续成立;账务核对通过;`route_mark_coverage>=99.9%`;`query_key_external_reject_rate_pct=100%` | 任一时段 `cn_takeover<100%` 或 `route_mark_coverage<99.9%` 或 `query_key_external_reject_rate_pct<100%` 立即降级 |
|
||||||
|
| Wave-Global-1 | 全量 20% | 24h | P0/P1 模块核心用例全绿;口径稳定;`route_mark_coverage>=99.9%`;`platform_credential_ingress_coverage_pct=100%` | 质量或账务任一红线触发,或 `route_mark_coverage<99.9%`,或 `platform_credential_ingress_coverage_pct<100%` |
|
||||||
|
| Wave-Global-2 | 全量 40% | 24h | 关键告警稳定;按租户拆分无异常尖刺;`route_mark_coverage>=99.9%`;`supplier_credential_exposure_events=0` | 同上,或 `supplier_credential_exposure_events>0` |
|
||||||
|
| Wave-Global-3 | 全量 60%+ | 连续 7 天 | `overall_takeover>=60%` 连续成立;抽样对账通过;`route_mark_coverage>=99.9%`;`supplier_credential_exposure_events=0`;`direct_supplier_call_by_consumer_events=0`;`query_key_external_reject_rate_pct=100%` | `overall_takeover<60%` 或任一红线触发,或 `route_mark_coverage<99.9%`,或任一凭证边界指标失效 |
|
||||||
|
|
||||||
|
## 12. 执行与出具报告要求
|
||||||
|
|
||||||
|
每轮 Wave 必交付以下证据包:
|
||||||
|
|
||||||
|
1. 模块测试报告(含失败重测记录)。
|
||||||
|
2. 接管率 SQL 原始结果(overall/cn + 趋势)。
|
||||||
|
3. 质量指标快照(P95、5xx、队列深度)。
|
||||||
|
4. 账务对账报告(请求级抽样,至少覆盖 Top 租户)。
|
||||||
|
5. 异常与回滚记录(如发生)。
|
||||||
|
|
||||||
|
验收结论模板:
|
||||||
|
|
||||||
|
1. 通过:满足当前 Wave 全部 Go 条件。
|
||||||
|
2. 有条件通过:存在已知风险但不触发 Stop,需明确补救期限。
|
||||||
|
3. 不通过:触发任一 Stop 条件,必须回切并复盘后重试。
|
||||||
252
docs/router_core_takeover_execution_plan_v3_2026-03-17.md
Normal file
252
docs/router_core_takeover_execution_plan_v3_2026-03-17.md
Normal file
@@ -0,0 +1,252 @@
|
|||||||
|
# Router Core 接管执行方案(v3)
|
||||||
|
|
||||||
|
- 版本:v3.0
|
||||||
|
- 日期:2026-03-17
|
||||||
|
- 目标阶段:S2(2026-05-16 至 2026-08-15)
|
||||||
|
- 关联文档:
|
||||||
|
- `llm_gateway_subapi_evolution_plan_v2_2026-03-17.md`
|
||||||
|
- `subapi_connector_contract_v1_2026-03-17.md`
|
||||||
|
- `sub2api_scheduler_billing_flow_deep_dive_v2_2026-03-17.md`
|
||||||
|
|
||||||
|
## 1. 目标与边界
|
||||||
|
|
||||||
|
本方案只解决 S2 的三件事:
|
||||||
|
|
||||||
|
1. 自研 Router Core 主路径接管率(全供应商)`>= 60%`。
|
||||||
|
2. 国内 LLM 供应商主路径接管率 `= 100%`。
|
||||||
|
3. 在不牺牲稳定性与计费正确性的前提下推进迁移(可灰度、可回滚、可审计)。
|
||||||
|
|
||||||
|
边界:
|
||||||
|
|
||||||
|
1. 不在 S2 内引入机器人客户能力(归属 S3)。
|
||||||
|
2. 不在 S2 内上线低成本账号采购模块(归属 S4)。
|
||||||
|
3. S2 允许保留 `subapi connector` 作为兜底与长尾协议承接层。
|
||||||
|
|
||||||
|
## 2. 接管率口径(统一计算,避免歧义)
|
||||||
|
|
||||||
|
## 2.1 主路径定义
|
||||||
|
|
||||||
|
“主路径请求”定义为:
|
||||||
|
|
||||||
|
1. 从统一入口进入并归一到 canonical 主路径端点集合(`/v1/chat/completions`、`/v1/messages`、`/v1/responses`、`/v1beta/*`)。
|
||||||
|
- 说明:`/responses` 等 alias 入口会在 Ingress 层归一到 `/v1/responses` 后再参与统计。
|
||||||
|
2. 经路由决策后实际发往上游模型供应商的在线请求(不含后台管理请求)。
|
||||||
|
3. 按请求数统计,不按 token 统计。
|
||||||
|
|
||||||
|
## 2.2 接管判定
|
||||||
|
|
||||||
|
当以下能力全部由自研 Router Core 执行,判定为“自研接管请求”:
|
||||||
|
|
||||||
|
1. 账号选择(scheduler)
|
||||||
|
2. 并发门控(user/account slot)
|
||||||
|
3. failover 决策
|
||||||
|
4. usage 记录与幂等计费落账
|
||||||
|
|
||||||
|
任一环节仍依赖 `subapi` 内部实现,则该请求记为“subapi 路径请求”。
|
||||||
|
|
||||||
|
## 2.3 公式
|
||||||
|
|
||||||
|
1. 全供应商接管率
|
||||||
|
|
||||||
|
`overall_takeover = self_built_main_path_requests / all_main_path_requests`
|
||||||
|
|
||||||
|
2. 国内供应商接管率
|
||||||
|
|
||||||
|
`cn_takeover = self_built_cn_provider_requests / all_cn_provider_requests`
|
||||||
|
|
||||||
|
S2 验收门槛:
|
||||||
|
|
||||||
|
1. `overall_takeover >= 60%`
|
||||||
|
2. `cn_takeover = 100%`
|
||||||
|
|
||||||
|
## 3. 模块拆分与迁移优先级
|
||||||
|
|
||||||
|
## 3.1 模块清单(按执行优先级)
|
||||||
|
|
||||||
|
| 优先级 | 模块 | S2 目标状态 | 是否必须自研 | 说明 |
|
||||||
|
|---|---|---|---|---|
|
||||||
|
| P0 | Scheduler Core | GA | 是 | 三层选择(previous/session/load) |
|
||||||
|
| P0 | Concurrency Gate | GA | 是 | user/account 双槽位 + wait queue |
|
||||||
|
| P0 | Failover Orchestrator | GA | 是 | 同号重试/换号/流式 no-replay |
|
||||||
|
| P0 | Usage & Billing Core | GA | 是 | request 级幂等落账 |
|
||||||
|
| P0 | CN Provider Adapter Pack | GA | 是 | 国内供应商 100% 走自研路径 |
|
||||||
|
| P1 | Error Normalization Engine | GA | 是 | OpenAI/Anthropic/Gemini 统一错误语义 |
|
||||||
|
| P1 | Stream Guard Layer | GA | 是 | 已写出流内容后禁止 replay |
|
||||||
|
| P1 | Observability & Audit | GA | 是 | 接管率、扣费冲突、failover 可观测 |
|
||||||
|
| P2 | subapi Connector | 保留 | 否 | 长尾协议与兜底 |
|
||||||
|
| P2 | Non-critical Protocol Compat | Beta | 否 | 可延后到 S3/S4 |
|
||||||
|
|
||||||
|
## 3.2 模块执行顺序(建议 6 个批次)
|
||||||
|
|
||||||
|
1. 批次 A(第 1-2 周):
|
||||||
|
- Scheduler Core
|
||||||
|
- Concurrency Gate
|
||||||
|
- 基础 Observability
|
||||||
|
|
||||||
|
2. 批次 B(第 3-4 周):
|
||||||
|
- Failover Orchestrator
|
||||||
|
- Stream Guard Layer
|
||||||
|
|
||||||
|
3. 批次 C(第 5-6 周):
|
||||||
|
- Usage & Billing Core
|
||||||
|
- 幂等仓储与冲突告警
|
||||||
|
|
||||||
|
4. 批次 D(第 7-8 周):
|
||||||
|
- CN Provider Adapter Pack 全量接入
|
||||||
|
- 国内供应商流量灰度到 70%
|
||||||
|
|
||||||
|
5. 批次 E(第 9 周):
|
||||||
|
- 国内供应商流量 100% 自研接管
|
||||||
|
- `cn_takeover` 验收
|
||||||
|
|
||||||
|
6. 批次 F(第 10-12 周):
|
||||||
|
- 全供应商流量继续切换至 60%+
|
||||||
|
- `overall_takeover` 验收
|
||||||
|
|
||||||
|
## 4. 分阶段迁移策略(含灰度门槛)
|
||||||
|
|
||||||
|
## 4.1 国内供应商迁移(必须 100%)
|
||||||
|
|
||||||
|
1. Wave-CN-1:10%
|
||||||
|
- 前置条件:P0 模块全部可用
|
||||||
|
- 观察窗口:24h
|
||||||
|
- 红线:5xx、计费冲突率、超时率
|
||||||
|
|
||||||
|
2. Wave-CN-2:40%
|
||||||
|
- 前置条件:Wave-CN-1 全部指标通过
|
||||||
|
- 观察窗口:24h
|
||||||
|
|
||||||
|
3. Wave-CN-3:70%
|
||||||
|
- 前置条件:Failover 与 Stream Guard 指标通过
|
||||||
|
- 观察窗口:48h
|
||||||
|
|
||||||
|
4. Wave-CN-4:100%
|
||||||
|
- 前置条件:连续 7 天稳定
|
||||||
|
- 动作:将国内供应商主路径全部切至自研
|
||||||
|
|
||||||
|
## 4.2 全供应商迁移(目标 >=60%)
|
||||||
|
|
||||||
|
1. Wave-Global-1:20%
|
||||||
|
2. Wave-Global-2:40%
|
||||||
|
3. Wave-Global-3:60%+
|
||||||
|
|
||||||
|
每一波都必须具备:
|
||||||
|
|
||||||
|
1. 一键回切到 `subapi connector`
|
||||||
|
2. 独立观察看板(按 provider、tenant、endpoint)
|
||||||
|
3. 账务核对通过(请求级抽样)
|
||||||
|
|
||||||
|
## 5. 验收测试矩阵(可执行)
|
||||||
|
|
||||||
|
## 5.1 模块级验收矩阵
|
||||||
|
|
||||||
|
| 模块 | 测试类型 | 核心用例 | 通过标准 | 证据产物 |
|
||||||
|
|---|---|---|---|---|
|
||||||
|
| Scheduler Core | 单元+集成 | previous/session/load 三层选择 | 选择层命中率符合预期;错误账号不重复命中 | 调度决策日志 + 测试报告 |
|
||||||
|
| Concurrency Gate | 压测+集成 | user/account 双槽位争用 | 无槽位泄漏;等待队列上限生效;超限返回 429 | 并发压测报告 + Redis key 观测 |
|
||||||
|
| Failover Orchestrator | 集成 | 同号重试、换号重试、切换上限 | 不超过 max switches;重试策略符合配置 | failover trace |
|
||||||
|
| Stream Guard Layer | 集成+回归 | 流已写出后上游错误 | 禁止 replay;无双流拼接 | 流式回归用例结果 |
|
||||||
|
| Usage & Billing Core | 集成+一致性 | 重复 request_id、冲突 fingerprint | 重复不重复扣费;冲突可告警可追踪 | 账务对账报表 |
|
||||||
|
| CN Adapter Pack | 端到端 | 全国内供应商请求链路 | 路由、鉴权、错误映射、计费全通过 | provider e2e 报告 |
|
||||||
|
| Error Normalization | 契约 | OpenAI/Anthropic/Gemini 错误归一 | category/code/retryable 一致 | 契约测试报告 |
|
||||||
|
| Observability & Audit | 集成 | request_id 全链路、接管率统计 | 可追踪率 100%;接管率计算一致 | dashboard 截图 + SQL 校验 |
|
||||||
|
|
||||||
|
## 5.2 阶段性门槛
|
||||||
|
|
||||||
|
1. 质量门槛
|
||||||
|
- 网关附加时延 P95 <= 60ms
|
||||||
|
- 5xx 不高于基线 + 0.1%
|
||||||
|
|
||||||
|
2. 账务门槛
|
||||||
|
- 账务差错率 <= 0.1%
|
||||||
|
- 幂等冲突率 <= 0.01%(超阈值即阻断继续灰度)
|
||||||
|
|
||||||
|
3. 迁移门槛
|
||||||
|
- `cn_takeover = 100%`
|
||||||
|
- `overall_takeover >= 60%`
|
||||||
|
|
||||||
|
## 6. 里程碑与交付物
|
||||||
|
|
||||||
|
| 里程碑 | 时间窗 | 交付物 |
|
||||||
|
|---|---|---|
|
||||||
|
| M1 基础接管能力可用 | 第 2 周末 | P0 模块上线灰度 |
|
||||||
|
| M2 稳定 failover 与流式边界 | 第 4 周末 | failover/stream guard 回归通过 |
|
||||||
|
| M3 幂等计费闭环 | 第 6 周末 | 账务一致性报告 |
|
||||||
|
| M4 国内供应商 100% 接管 | 第 9 周末 | `cn_takeover` 验收报告 |
|
||||||
|
| M5 全供应商 60%+ 接管 | 第 12 周末 | `overall_takeover` 验收报告 |
|
||||||
|
|
||||||
|
## 7. 风险与应对
|
||||||
|
|
||||||
|
1. 风险:接管率统计口径前后不一致
|
||||||
|
- 应对:统一 SQL 统计脚本与看板口径,验收前做双系统对账
|
||||||
|
|
||||||
|
2. 风险:流式边界处理不一致导致客户端异常
|
||||||
|
- 应对:将“写出后禁止 replay”抽象为统一中间层并覆盖回归
|
||||||
|
|
||||||
|
3. 风险:国内供应商适配细节差异过大
|
||||||
|
- 应对:Provider Adapter Pack 先做最小公共面,再逐家补差异策略
|
||||||
|
|
||||||
|
4. 风险:计费幂等冲突上升
|
||||||
|
- 应对:request_id 生成策略收敛 + fingerprint 冲突报警 + 快速止血回切
|
||||||
|
|
||||||
|
## 8. 本周执行清单(可直接开工)
|
||||||
|
|
||||||
|
1. 固化接管率统计 SQL(overall/cn 两套)并接入 dashboard。
|
||||||
|
2. 拉出 P0 四模块的接口清单与 owner,建立每日燃尽图。
|
||||||
|
3. 为 Stream Guard 建立跨协议回归用例(OpenAI/Anthropic/Gemini)。
|
||||||
|
4. 为 Usage & Billing 建立“重复请求/冲突指纹”专项压测与告警规则。
|
||||||
|
5. 按国内供应商清单建立 Adapter 接入优先级(先高频模型再长尾)。
|
||||||
|
|
||||||
|
## 9. 与现有文档的关系
|
||||||
|
|
||||||
|
1. 本文档是 S2 执行层文档,补充了 v2 演进稿中“方向有了但执行口径不够细”的部分。
|
||||||
|
2. `subapi connector` 契约继续有效,S2 作为兜底与长尾承接;不再承担国内供应商主路径。
|
||||||
|
3. 本文档可作为每周项目例会的唯一追踪基线(接管率、质量、账务三条主线)。
|
||||||
|
|
||||||
|
## 10. 实施附件(新增)
|
||||||
|
|
||||||
|
为保证 S2 可执行与可验收,新增以下实施附件:
|
||||||
|
|
||||||
|
1. `router_core_takeover_metrics_sql_dashboard_v1_2026-03-17.md`
|
||||||
|
- 固化接管率 SQL(overall/cn/趋势)
|
||||||
|
- 明确看板字段与告警阈值
|
||||||
|
2. `router_core_s2_acceptance_test_cases_v1_2026-03-17.md`
|
||||||
|
- 按模块展开验收用例
|
||||||
|
- 固化 Wave-CN / Wave-Global 的 stop/go 条件
|
||||||
|
|
||||||
|
## 11. 兼容与安全运维设计附件(新增)
|
||||||
|
|
||||||
|
为降低 S2 实施期兼容与安全事故风险,新增设计文档:
|
||||||
|
|
||||||
|
1. `subapi_integration_compat_security_reliability_design_v1_2026-03-17.md`
|
||||||
|
- 明确兼容三重 Gate(Schema/Behavior/Performance)
|
||||||
|
- 固化 subapi 集成安全风险台账与防护基线
|
||||||
|
- 补齐“运维简单 + 高可靠”目标架构与两周落地动作
|
||||||
|
|
||||||
|
## 12. 两周执行任务单(新增)
|
||||||
|
|
||||||
|
为确保上述风险控制设计可落地执行,新增:
|
||||||
|
|
||||||
|
1. `subapi_integration_risk_controls_execution_tasks_v1_2026-03-17.md`
|
||||||
|
- 两周任务排期(里程碑、owner、截止日期)
|
||||||
|
- 明确任务级验收标准与证据包要求
|
||||||
|
- 固化 daily/weekly gate 与 P0/P1/P2 阻断规则
|
||||||
|
|
||||||
|
## 13. 专家审核与博弈机制(新增)
|
||||||
|
|
||||||
|
为确保“可实现集成 + 可实现替换 + 企业级可商用”目标可被独立验证,新增:
|
||||||
|
|
||||||
|
1. `subapi_expert_review_wargame_plan_v1_2026-03-17.md`
|
||||||
|
- 定义专家组成、独立性与回避规则
|
||||||
|
- 采用 Red vs Blue 对抗式评审与四轮审核流程
|
||||||
|
- 固化评分模型、GO/CONDITIONAL GO/NO-GO 决策与一票否决条件
|
||||||
|
|
||||||
|
## 14. 三角色联合评审输入(新增,2026-03-18)
|
||||||
|
|
||||||
|
为强化 S2 接管执行的用户可接受性、测试阻断能力与网关替换可逆性,新增:
|
||||||
|
|
||||||
|
1. `subapi_role_based_review_wargame_optimization_v1_2026-03-18.md`
|
||||||
|
- 用户代表:迁移通知与争议 SLA 门槛
|
||||||
|
- 测试专家:契约漂移/流式 failover/升波证据包门槛
|
||||||
|
- 网关专家:Provider 能力矩阵与降级策略门槛
|
||||||
|
- 相关新增任务:`UXR-*`、`TST-*`、`GAT-*`、`EXP-007`
|
||||||
441
docs/router_core_takeover_metrics_sql_dashboard_v1_2026-03-17.md
Normal file
441
docs/router_core_takeover_metrics_sql_dashboard_v1_2026-03-17.md
Normal file
@@ -0,0 +1,441 @@
|
|||||||
|
# Router Core 接管率统计 SQL 与看板字段定义(v1)
|
||||||
|
|
||||||
|
- 版本:v1.0
|
||||||
|
- 日期:2026-03-17
|
||||||
|
- 适用阶段:S2(2026-05-16 至 2026-08-15)
|
||||||
|
- 关联文档:
|
||||||
|
- `router_core_takeover_execution_plan_v3_2026-03-17.md`
|
||||||
|
- `sub2api_scheduler_billing_flow_deep_dive_v2_2026-03-17.md`
|
||||||
|
|
||||||
|
## 1. 口径与前提
|
||||||
|
|
||||||
|
S2 验收口径(与 v3 一致):
|
||||||
|
|
||||||
|
1. `overall_takeover >= 60%`
|
||||||
|
2. `cn_takeover = 100%`
|
||||||
|
|
||||||
|
说明:
|
||||||
|
|
||||||
|
1. 现有 `usage_logs` 已有 `request_type/openai_ws_mode/inbound_endpoint/upstream_endpoint`,但没有“本请求由谁执行主路径(自研 Router Core vs subapi 路径)”的显式字段。
|
||||||
|
2. 因此本文件给出两套 SQL:
|
||||||
|
- T+0 临时口径:基于 `gateway_route_marks` 标记表(可立即落地)。
|
||||||
|
- T+7 验收口径:在 `usage_logs` 增加 `router_engine` 后,直接从事实表统计(推荐作为最终验收口径)。
|
||||||
|
|
||||||
|
## 2. T+0 临时口径(可立即执行)
|
||||||
|
|
||||||
|
## 2.1 建立路由标记表(一次性 DDL)
|
||||||
|
|
||||||
|
```sql
|
||||||
|
CREATE TABLE IF NOT EXISTS gateway_route_marks (
|
||||||
|
request_id VARCHAR(255) NOT NULL,
|
||||||
|
api_key_id BIGINT NOT NULL,
|
||||||
|
router_engine SMALLINT NOT NULL, -- 1=subapi_path, 2=router_core
|
||||||
|
marked_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||||
|
PRIMARY KEY (request_id, api_key_id),
|
||||||
|
CONSTRAINT gateway_route_marks_router_engine_check
|
||||||
|
CHECK (router_engine IN (1, 2))
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_gateway_route_marks_marked_at
|
||||||
|
ON gateway_route_marks (marked_at DESC);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_gateway_route_marks_engine_time
|
||||||
|
ON gateway_route_marks (router_engine, marked_at DESC);
|
||||||
|
```
|
||||||
|
|
||||||
|
写入约定(必须执行):
|
||||||
|
|
||||||
|
1. 每个主路径请求在最终完成 usage 记录前,写入一条 `gateway_route_marks`。
|
||||||
|
2. `router_engine=2` 表示四个关键环节(scheduler/concurrency/failover/billing)均由自研链路执行。
|
||||||
|
3. `router_engine=1` 表示任一关键环节仍依赖 subapi 路径。
|
||||||
|
|
||||||
|
## 2.2 全供应商接管率(overall)
|
||||||
|
|
||||||
|
```sql
|
||||||
|
WITH params AS (
|
||||||
|
SELECT
|
||||||
|
NOW() - INTERVAL '24 hours' AS start_ts,
|
||||||
|
NOW() AS end_ts
|
||||||
|
),
|
||||||
|
main_path AS (
|
||||||
|
SELECT
|
||||||
|
ul.id,
|
||||||
|
ul.request_id,
|
||||||
|
ul.api_key_id,
|
||||||
|
ul.created_at,
|
||||||
|
rm.router_engine
|
||||||
|
FROM usage_logs ul
|
||||||
|
LEFT JOIN gateway_route_marks rm
|
||||||
|
ON rm.request_id = ul.request_id
|
||||||
|
AND rm.api_key_id = ul.api_key_id
|
||||||
|
CROSS JOIN params p
|
||||||
|
WHERE ul.created_at >= p.start_ts
|
||||||
|
AND ul.created_at < p.end_ts
|
||||||
|
AND (
|
||||||
|
ul.inbound_endpoint IN (
|
||||||
|
'/v1/chat/completions',
|
||||||
|
'/v1/messages',
|
||||||
|
'/v1/responses'
|
||||||
|
)
|
||||||
|
OR ul.inbound_endpoint LIKE '/v1beta/%'
|
||||||
|
)
|
||||||
|
)
|
||||||
|
SELECT
|
||||||
|
COUNT(*) AS all_main_path_requests,
|
||||||
|
COUNT(*) FILTER (WHERE router_engine = 2) AS self_built_main_path_requests,
|
||||||
|
ROUND(
|
||||||
|
100.0 * COUNT(*) FILTER (WHERE router_engine = 2)
|
||||||
|
/ NULLIF(COUNT(*), 0),
|
||||||
|
2
|
||||||
|
) AS overall_takeover_pct,
|
||||||
|
ROUND(
|
||||||
|
100.0 * COUNT(*) FILTER (WHERE router_engine IN (1, 2))
|
||||||
|
/ NULLIF(COUNT(*), 0),
|
||||||
|
2
|
||||||
|
) AS route_mark_coverage_pct
|
||||||
|
FROM main_path;
|
||||||
|
```
|
||||||
|
|
||||||
|
说明:`cn_platforms` 必须由配置表维护(例如 `gateway_cn_platforms`),禁止在验收 SQL 硬编码固定值。
|
||||||
|
|
||||||
|
## 2.3 国内供应商接管率(cn)
|
||||||
|
|
||||||
|
```sql
|
||||||
|
WITH params AS (
|
||||||
|
SELECT
|
||||||
|
NOW() - INTERVAL '24 hours' AS start_ts,
|
||||||
|
NOW() AS end_ts,
|
||||||
|
COALESCE(
|
||||||
|
(
|
||||||
|
SELECT ARRAY_AGG(platform ORDER BY platform)
|
||||||
|
FROM gateway_cn_platforms
|
||||||
|
WHERE enabled = TRUE
|
||||||
|
),
|
||||||
|
ARRAY[]::TEXT[]
|
||||||
|
) AS cn_platforms
|
||||||
|
),
|
||||||
|
main_path AS (
|
||||||
|
SELECT
|
||||||
|
ul.id,
|
||||||
|
ul.request_id,
|
||||||
|
ul.api_key_id,
|
||||||
|
ul.created_at,
|
||||||
|
a.platform,
|
||||||
|
rm.router_engine
|
||||||
|
FROM usage_logs ul
|
||||||
|
JOIN accounts a
|
||||||
|
ON a.id = ul.account_id
|
||||||
|
LEFT JOIN gateway_route_marks rm
|
||||||
|
ON rm.request_id = ul.request_id
|
||||||
|
AND rm.api_key_id = ul.api_key_id
|
||||||
|
CROSS JOIN params p
|
||||||
|
WHERE ul.created_at >= p.start_ts
|
||||||
|
AND ul.created_at < p.end_ts
|
||||||
|
AND a.platform = ANY(p.cn_platforms)
|
||||||
|
AND (
|
||||||
|
ul.inbound_endpoint IN (
|
||||||
|
'/v1/chat/completions',
|
||||||
|
'/v1/messages',
|
||||||
|
'/v1/responses'
|
||||||
|
)
|
||||||
|
OR ul.inbound_endpoint LIKE '/v1beta/%'
|
||||||
|
)
|
||||||
|
)
|
||||||
|
SELECT
|
||||||
|
COUNT(*) AS all_cn_provider_requests,
|
||||||
|
COUNT(*) FILTER (WHERE router_engine = 2) AS self_built_cn_provider_requests,
|
||||||
|
ROUND(
|
||||||
|
100.0 * COUNT(*) FILTER (WHERE router_engine = 2)
|
||||||
|
/ NULLIF(COUNT(*), 0),
|
||||||
|
2
|
||||||
|
) AS cn_takeover_pct,
|
||||||
|
ROUND(
|
||||||
|
100.0 * COUNT(*) FILTER (WHERE router_engine IN (1, 2))
|
||||||
|
/ NULLIF(COUNT(*), 0),
|
||||||
|
2
|
||||||
|
) AS route_mark_coverage_pct
|
||||||
|
FROM main_path;
|
||||||
|
```
|
||||||
|
|
||||||
|
## 2.4 趋势 SQL(按小时 / 按天)
|
||||||
|
|
||||||
|
### 2.4.1 按小时趋势(overall + cn 同图)
|
||||||
|
|
||||||
|
```sql
|
||||||
|
WITH params AS (
|
||||||
|
SELECT
|
||||||
|
NOW() - INTERVAL '72 hours' AS start_ts,
|
||||||
|
NOW() AS end_ts,
|
||||||
|
COALESCE(
|
||||||
|
(
|
||||||
|
SELECT ARRAY_AGG(platform ORDER BY platform)
|
||||||
|
FROM gateway_cn_platforms
|
||||||
|
WHERE enabled = TRUE
|
||||||
|
),
|
||||||
|
ARRAY[]::TEXT[]
|
||||||
|
) AS cn_platforms
|
||||||
|
),
|
||||||
|
base AS (
|
||||||
|
SELECT
|
||||||
|
DATE_TRUNC('hour', ul.created_at) AS bucket,
|
||||||
|
a.platform,
|
||||||
|
rm.router_engine
|
||||||
|
FROM usage_logs ul
|
||||||
|
JOIN accounts a
|
||||||
|
ON a.id = ul.account_id
|
||||||
|
LEFT JOIN gateway_route_marks rm
|
||||||
|
ON rm.request_id = ul.request_id
|
||||||
|
AND rm.api_key_id = ul.api_key_id
|
||||||
|
CROSS JOIN params p
|
||||||
|
WHERE ul.created_at >= p.start_ts
|
||||||
|
AND ul.created_at < p.end_ts
|
||||||
|
AND (
|
||||||
|
ul.inbound_endpoint IN (
|
||||||
|
'/v1/chat/completions',
|
||||||
|
'/v1/messages',
|
||||||
|
'/v1/responses'
|
||||||
|
)
|
||||||
|
OR ul.inbound_endpoint LIKE '/v1beta/%'
|
||||||
|
)
|
||||||
|
)
|
||||||
|
SELECT
|
||||||
|
bucket,
|
||||||
|
ROUND(
|
||||||
|
100.0 * COUNT(*) FILTER (WHERE router_engine = 2)
|
||||||
|
/ NULLIF(COUNT(*), 0),
|
||||||
|
2
|
||||||
|
) AS overall_takeover_pct,
|
||||||
|
ROUND(
|
||||||
|
100.0 * COUNT(*) FILTER (
|
||||||
|
WHERE platform = ANY((SELECT cn_platforms FROM params))
|
||||||
|
AND router_engine = 2
|
||||||
|
)
|
||||||
|
/ NULLIF(
|
||||||
|
COUNT(*) FILTER (WHERE platform = ANY((SELECT cn_platforms FROM params))),
|
||||||
|
0
|
||||||
|
),
|
||||||
|
2
|
||||||
|
) AS cn_takeover_pct,
|
||||||
|
COUNT(*) AS total_requests,
|
||||||
|
COUNT(*) FILTER (WHERE platform = ANY((SELECT cn_platforms FROM params))) AS cn_requests
|
||||||
|
FROM base
|
||||||
|
GROUP BY bucket
|
||||||
|
ORDER BY bucket;
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2.4.2 按天趋势(验收期)
|
||||||
|
|
||||||
|
```sql
|
||||||
|
WITH params AS (
|
||||||
|
SELECT
|
||||||
|
NOW() - INTERVAL '30 days' AS start_ts,
|
||||||
|
NOW() AS end_ts,
|
||||||
|
COALESCE(
|
||||||
|
(
|
||||||
|
SELECT ARRAY_AGG(platform ORDER BY platform)
|
||||||
|
FROM gateway_cn_platforms
|
||||||
|
WHERE enabled = TRUE
|
||||||
|
),
|
||||||
|
ARRAY[]::TEXT[]
|
||||||
|
) AS cn_platforms
|
||||||
|
),
|
||||||
|
base AS (
|
||||||
|
SELECT
|
||||||
|
DATE_TRUNC('day', ul.created_at) AS bucket,
|
||||||
|
a.platform,
|
||||||
|
rm.router_engine
|
||||||
|
FROM usage_logs ul
|
||||||
|
JOIN accounts a
|
||||||
|
ON a.id = ul.account_id
|
||||||
|
LEFT JOIN gateway_route_marks rm
|
||||||
|
ON rm.request_id = ul.request_id
|
||||||
|
AND rm.api_key_id = ul.api_key_id
|
||||||
|
CROSS JOIN params p
|
||||||
|
WHERE ul.created_at >= p.start_ts
|
||||||
|
AND ul.created_at < p.end_ts
|
||||||
|
AND (
|
||||||
|
ul.inbound_endpoint IN (
|
||||||
|
'/v1/chat/completions',
|
||||||
|
'/v1/messages',
|
||||||
|
'/v1/responses'
|
||||||
|
)
|
||||||
|
OR ul.inbound_endpoint LIKE '/v1beta/%'
|
||||||
|
)
|
||||||
|
)
|
||||||
|
SELECT
|
||||||
|
bucket,
|
||||||
|
ROUND(100.0 * COUNT(*) FILTER (WHERE router_engine = 2) / NULLIF(COUNT(*), 0), 2) AS overall_takeover_pct,
|
||||||
|
ROUND(
|
||||||
|
100.0 * COUNT(*) FILTER (
|
||||||
|
WHERE platform = ANY((SELECT cn_platforms FROM params))
|
||||||
|
AND router_engine = 2
|
||||||
|
)
|
||||||
|
/ NULLIF(
|
||||||
|
COUNT(*) FILTER (WHERE platform = ANY((SELECT cn_platforms FROM params))),
|
||||||
|
0
|
||||||
|
),
|
||||||
|
2
|
||||||
|
) AS cn_takeover_pct
|
||||||
|
FROM base
|
||||||
|
GROUP BY bucket
|
||||||
|
ORDER BY bucket;
|
||||||
|
```
|
||||||
|
|
||||||
|
## 3. T+7 验收口径(推荐)
|
||||||
|
|
||||||
|
## 3.1 对 `usage_logs` 做最小字段扩展
|
||||||
|
|
||||||
|
```sql
|
||||||
|
ALTER TABLE usage_logs
|
||||||
|
ADD COLUMN IF NOT EXISTS router_engine SMALLINT NOT NULL DEFAULT 0,
|
||||||
|
ADD COLUMN IF NOT EXISTS router_version VARCHAR(32);
|
||||||
|
|
||||||
|
DO $$
|
||||||
|
BEGIN
|
||||||
|
IF NOT EXISTS (
|
||||||
|
SELECT 1
|
||||||
|
FROM pg_constraint
|
||||||
|
WHERE conname = 'usage_logs_router_engine_check'
|
||||||
|
) THEN
|
||||||
|
ALTER TABLE usage_logs
|
||||||
|
ADD CONSTRAINT usage_logs_router_engine_check
|
||||||
|
CHECK (router_engine IN (0, 1, 2));
|
||||||
|
END IF;
|
||||||
|
END
|
||||||
|
$$;
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_usage_logs_router_engine_created_at
|
||||||
|
ON usage_logs (router_engine, created_at DESC);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_usage_logs_created_at_account_id
|
||||||
|
ON usage_logs (created_at DESC, account_id);
|
||||||
|
```
|
||||||
|
|
||||||
|
字段语义:
|
||||||
|
|
||||||
|
1. `0`:未知(历史数据或未标记)。
|
||||||
|
2. `1`:subapi 路径。
|
||||||
|
3. `2`:自研 Router Core 路径(验收口径中的“接管请求”)。
|
||||||
|
|
||||||
|
## 3.2 用标记表回填(可选,建议分批)
|
||||||
|
|
||||||
|
```sql
|
||||||
|
WITH cte AS (
|
||||||
|
SELECT ul.id, rm.router_engine
|
||||||
|
FROM usage_logs ul
|
||||||
|
JOIN gateway_route_marks rm
|
||||||
|
ON rm.request_id = ul.request_id
|
||||||
|
AND rm.api_key_id = ul.api_key_id
|
||||||
|
WHERE ul.router_engine = 0
|
||||||
|
ORDER BY ul.id
|
||||||
|
LIMIT 5000
|
||||||
|
)
|
||||||
|
UPDATE usage_logs ul
|
||||||
|
SET router_engine = cte.router_engine
|
||||||
|
FROM cte
|
||||||
|
WHERE ul.id = cte.id;
|
||||||
|
```
|
||||||
|
|
||||||
|
## 3.3 验收 SQL(不依赖标记表)
|
||||||
|
|
||||||
|
```sql
|
||||||
|
WITH params AS (
|
||||||
|
SELECT
|
||||||
|
NOW() - INTERVAL '24 hours' AS start_ts,
|
||||||
|
NOW() AS end_ts,
|
||||||
|
COALESCE(
|
||||||
|
(
|
||||||
|
SELECT ARRAY_AGG(platform ORDER BY platform)
|
||||||
|
FROM gateway_cn_platforms
|
||||||
|
WHERE enabled = TRUE
|
||||||
|
),
|
||||||
|
ARRAY[]::TEXT[]
|
||||||
|
) AS cn_platforms
|
||||||
|
),
|
||||||
|
base AS (
|
||||||
|
SELECT
|
||||||
|
ul.created_at,
|
||||||
|
ul.router_engine,
|
||||||
|
a.platform
|
||||||
|
FROM usage_logs ul
|
||||||
|
JOIN accounts a
|
||||||
|
ON a.id = ul.account_id
|
||||||
|
CROSS JOIN params p
|
||||||
|
WHERE ul.created_at >= p.start_ts
|
||||||
|
AND ul.created_at < p.end_ts
|
||||||
|
AND (
|
||||||
|
ul.inbound_endpoint IN (
|
||||||
|
'/v1/chat/completions',
|
||||||
|
'/v1/messages',
|
||||||
|
'/v1/responses'
|
||||||
|
)
|
||||||
|
OR ul.inbound_endpoint LIKE '/v1beta/%'
|
||||||
|
)
|
||||||
|
)
|
||||||
|
SELECT
|
||||||
|
ROUND(100.0 * COUNT(*) FILTER (WHERE router_engine = 2) / NULLIF(COUNT(*), 0), 2) AS overall_takeover_pct,
|
||||||
|
ROUND(
|
||||||
|
100.0 * COUNT(*) FILTER (
|
||||||
|
WHERE platform = ANY((SELECT cn_platforms FROM params))
|
||||||
|
AND router_engine = 2
|
||||||
|
)
|
||||||
|
/ NULLIF(
|
||||||
|
COUNT(*) FILTER (WHERE platform = ANY((SELECT cn_platforms FROM params))),
|
||||||
|
0
|
||||||
|
),
|
||||||
|
2
|
||||||
|
) AS cn_takeover_pct,
|
||||||
|
ROUND(100.0 * COUNT(*) FILTER (WHERE router_engine IN (1, 2)) / NULLIF(COUNT(*), 0), 2) AS route_mark_coverage_pct
|
||||||
|
FROM base;
|
||||||
|
```
|
||||||
|
|
||||||
|
说明:验收 SQL 的 `cn_platforms` 以配置表 `gateway_cn_platforms` 为唯一来源,避免平台清单变更导致口径漂移。
|
||||||
|
|
||||||
|
## 4. 看板字段定义(S2)
|
||||||
|
|
||||||
|
## 4.1 KPI 卡片
|
||||||
|
|
||||||
|
| 字段键 | 展示名 | 公式 | 阈值/目标 | 数据来源 |
|
||||||
|
|---|---|---|---|---|
|
||||||
|
| `overall_takeover_pct` | 全供应商接管率 | `self_built_main_path_requests / all_main_path_requests * 100` | `>= 60%` | 上述 SQL |
|
||||||
|
| `cn_takeover_pct` | 国内供应商接管率 | `self_built_cn_provider_requests / all_cn_provider_requests * 100` | `= 100%` | 上述 SQL |
|
||||||
|
| `route_mark_coverage_pct` | 路由标记覆盖率 | `marked_requests / all_main_path_requests * 100` | `>= 99.9%` | 上述 SQL |
|
||||||
|
| `billing_error_rate_pct` | 账务差错率 | `billing_error_requests / billed_requests * 100` | `<= 0.1%` | 账务核对任务/报表 |
|
||||||
|
| `billing_conflict_rate_pct` | 幂等冲突率 | `billing_dedup_conflicts / billed_requests * 100` | `<= 0.01%` | 扣费幂等审计计数器 |
|
||||||
|
| `gateway_added_latency_p95_ms` | 网关附加时延P95 | 网关处理时延分位数 | `<= 60ms` | `ops_system_metrics` 或 APM |
|
||||||
|
| `gateway_5xx_delta_pct` | 5xx 相对基线增量 | `current_5xx - baseline_5xx` | `<= +0.1%` | 统一错误指标 |
|
||||||
|
|
||||||
|
## 4.2 维度拆分
|
||||||
|
|
||||||
|
看板必须支持以下维度切片:
|
||||||
|
|
||||||
|
1. `platform`(`anthropic/openai/gemini/antigravity/sora`)
|
||||||
|
2. `group_id`
|
||||||
|
3. `inbound_endpoint`
|
||||||
|
4. `upstream_endpoint`
|
||||||
|
5. `request_type`(sync/stream/openai_ws)
|
||||||
|
6. `api_key_id`(用于租户级排障)
|
||||||
|
|
||||||
|
## 4.3 图表建议
|
||||||
|
|
||||||
|
1. 折线:`overall_takeover_pct` 按小时(72h)+ 按天(30d)。
|
||||||
|
2. 折线:`cn_takeover_pct` 按小时(72h)+ 按天(30d)。
|
||||||
|
3. 堆叠柱:主路径请求量按 `router_engine` 拆分(自研/subapi/未知)。
|
||||||
|
4. 热力图:`platform x inbound_endpoint` 的接管率。
|
||||||
|
5. 散点:`takeover_pct` vs `gateway_added_latency_p95_ms`,用于识别“接管提升但延迟恶化”的点位。
|
||||||
|
|
||||||
|
## 5. 告警规则(与 S2 门槛一致)
|
||||||
|
|
||||||
|
1. `cn_takeover_pct < 100` 持续 5 分钟:`P0`。
|
||||||
|
2. `overall_takeover_pct < 60` 且当前处于 Wave-Global-3:`P1`。
|
||||||
|
3. `route_mark_coverage_pct < 99.9`:`P1`(口径不可信,阻断升级)。
|
||||||
|
4. `billing_conflict_rate_pct > 0.01`:`P0`(立即停止继续灰度)。
|
||||||
|
5. `billing_error_rate_pct > 0.1`:`P0`。
|
||||||
|
6. `gateway_added_latency_p95_ms > 60` 持续 10 分钟:`P1`。
|
||||||
|
7. `gateway_5xx_delta_pct > 0.1` 持续 5 分钟:`P0`。
|
||||||
|
|
||||||
|
## 6. 落地顺序(建议)
|
||||||
|
|
||||||
|
1. 先落 `gateway_route_marks` + 临时 SQL,保证本周就有可观测接管率。
|
||||||
|
2. 再加 `usage_logs.router_engine`,切换到验收口径。
|
||||||
|
3. 验收和周报统一只读“验收口径”看板,避免双口径冲突。
|
||||||
501
docs/s0_wbs_detailed_v1_2026-03-18.md
Normal file
501
docs/s0_wbs_detailed_v1_2026-03-18.md
Normal file
@@ -0,0 +1,501 @@
|
|||||||
|
# S0 阶段详细工作分解结构(WBS)
|
||||||
|
|
||||||
|
> 版本:v1.0
|
||||||
|
> 阶段:S0(2026-03-18 至 2026-06-08)
|
||||||
|
> 周期:12周
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 1. S0 阶段总览
|
||||||
|
|
||||||
|
### 1.1 阶段目标
|
||||||
|
|
||||||
|
| 目标 | 指标 | 验收标准 |
|
||||||
|
|------|------|----------|
|
||||||
|
| Subapi 集成能力(Track B) | 可售卖MVP能力就绪 | 灰度7天可用性>=99.9% |
|
||||||
|
| 用户供应系统 | 首批10家 | 套餐验证成功率>=90% |
|
||||||
|
| API Key 安全体系 | 自建完成 | Key必须包含平台标识+数据库验证 |
|
||||||
|
|
||||||
|
### 1.2 双线并行架构
|
||||||
|
|
||||||
|
```
|
||||||
|
┌─────────────────────────────────────────────────────────────────────────┐
|
||||||
|
│ S0 阶段(12周) │
|
||||||
|
├─────────────────────────────────────────────────────────────────────────┤
|
||||||
|
│ │
|
||||||
|
│ ┌─────────────────────────────────┐ ┌─────────────────────────────┐ │
|
||||||
|
│ │ track A: 用户供应系统 │ │ track B: Subapi集成 │ │
|
||||||
|
│ │ │ │ │ │
|
||||||
|
│ │ W1-W2 账号挂载模块 │ │ W1-W2 环境搭建 │ │
|
||||||
|
│ │ W3-W4 套餐发布模块 │ │ W3-W4 Connector开发 │ │
|
||||||
|
│ │ W5-W6 调度计费模块 │ │ W5-W6 契约测试 │ │
|
||||||
|
│ │ W7-W8 风控模块 │ │ W7-W8 集成验证 │ │
|
||||||
|
│ │ W9-W10 内部测试 │ │ W9-W10 灰度测试 │ │
|
||||||
|
│ │ W11-W12 首批引入 │ │ W11-W12 上线准备 │ │
|
||||||
|
│ └─────────────────────────────────┘ └─────────────────────────────┘ │
|
||||||
|
│ │
|
||||||
|
│ ┌─────────────────────────────────────────────────────────────────┐ │
|
||||||
|
│ │ 并行任务:C0 公共基础设施(贯穿始终) │ │
|
||||||
|
│ │ C0-1: API Key安全体系 C0-2: 监控系统 C0-3: 基础架构 │ │
|
||||||
|
│ └─────────────────────────────────────────────────────────────────┘ │
|
||||||
|
│ │
|
||||||
|
└─────────────────────────────────────────────────────────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 2. Track A: 用户供应系统开发
|
||||||
|
|
||||||
|
### A0: 准备阶段(W1)
|
||||||
|
|
||||||
|
#### A0.1 项目初始化
|
||||||
|
|
||||||
|
| 任务ID | 任务名称 | 负责人 | 工时 | 依赖 | 验收标准 |
|
||||||
|
|--------|----------|--------|------|------|----------|
|
||||||
|
| A0.1.1 | 项目环境搭建 | 后端Lead | 16h | - | Git仓库创建,CI/CD配置完成 |
|
||||||
|
| A0.1.2 | 技术方案评审 | 架构师 | 8h | A0.1.1 | 技术方案文档通过 |
|
||||||
|
| A0.1.3 | 数据库设计评审 | 后端/DBA | 8h | A0.1.2 | ER图评审通过 |
|
||||||
|
|
||||||
|
#### A0.2 API Key 安全体系
|
||||||
|
|
||||||
|
| 任务ID | 任务名称 | 负责人 | 工时 | 依赖 | 验收标准 |
|
||||||
|
|--------|----------|--------|------|------|----------|
|
||||||
|
| A0.2.1 | Key生成算法设计 | 安全/后端 | 16h | A0.1.2 | 算法文档完成 |
|
||||||
|
| A0.2.2 | Key生成服务开发 | 后端 | 24h | A0.2.1 | 单元测试通过 |
|
||||||
|
| A0.2.3 | Key验证服务开发 | 后端 | 24h | A0.2.2 | 验证逻辑完成 |
|
||||||
|
| A0.2.4 | API Key数据库设计 | 后端/DBA | 8h | A0.1.3 | 表结构设计完成 |
|
||||||
|
| A0.2.5 | API Key管理API开发 | 后端 | 32h | A0.2.4 | CRUD接口完成 |
|
||||||
|
| A0.2.6 | Key轮换服务开发 | 后端 | 16h | A0.2.5 | 轮换功能完成 |
|
||||||
|
| A0.2.7 | 激活码生成服务开发 | 后端 | 16h | A0.2.1 | 激活码功能完成 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### A1: 账号挂载模块(W1-W2)
|
||||||
|
|
||||||
|
#### A1.1 基础能力
|
||||||
|
|
||||||
|
| 任务ID | 任务名称 | 负责人 | 工时 | 依赖 | 验收标准 |
|
||||||
|
|--------|----------|--------|------|------|----------|
|
||||||
|
| A1.1.1 | 供应方账号表设计 | 后端/DBA | 8h | A0.1.3 | 表结构完成 |
|
||||||
|
| A1.1.2 | 账号挂载API开发 | 后端 | 24h | A1.1.1 | 挂载接口完成 |
|
||||||
|
| A1.1.3 | 账号格式校验开发 | 后端 | 8h | A1.1.2 | 校验通过 |
|
||||||
|
| A1.1.4 | 账号有效性验证开发 | 后端 | 24h | A1.1.3 | 验证通过 |
|
||||||
|
|
||||||
|
#### A1.2 验证服务
|
||||||
|
|
||||||
|
| 任务ID | 任务名称 | 负责人 | 工时 | 依赖 | 验收标准 |
|
||||||
|
|--------|----------|--------|------|------|----------|
|
||||||
|
| A1.2.1 | 供应商API验证适配器 | 后端 | 32h | A1.1.4 | 5家供应商适配完成 |
|
||||||
|
| A1.2.2 | 额度查询服务开发 | 后端 | 24h | A1.2.1 | 额度查询完成 |
|
||||||
|
| A1.2.3 | ToS合规检查开发 | 安全/合规 | 24h | A1.2.1 | 合规检查完成 |
|
||||||
|
| A1.2.4 | 风险评估服务开发 | 后端/安全 | 24h | A1.2.3 | 风险评分完成 |
|
||||||
|
|
||||||
|
#### A1.3 存储与安全
|
||||||
|
|
||||||
|
| 任务ID | 任务名称 | 负责人 | 工时 | 依赖 | 验收标准 |
|
||||||
|
|--------|----------|--------|------|------|----------|
|
||||||
|
| A1.3.1 | KMS加密集成 | 安全/后端 | 16h | A1.1.1 | 加密完成 |
|
||||||
|
| A1.3.2 | 账号存储服务开发 | 后端 | 16h | A1.3.1 | 存储完成 |
|
||||||
|
| A1.3.3 | 账号状态管理开发 | 后端 | 16h | A1.3.2 | 状态管理完成 |
|
||||||
|
|
||||||
|
#### A1.4 前端开发
|
||||||
|
|
||||||
|
| 任务ID | 任务名称 | 负责人 | 工时 | 依赖 | 验收标准 |
|
||||||
|
|--------|----------|--------|------|------|----------|
|
||||||
|
| A1.4.1 | 账号挂载页面开发 | 前端 | 24h | A1.1.2 | 页面完成 |
|
||||||
|
| A1.4.2 | 账号列表页面开发 | 前端 | 16h | A1.3.3 | 列表完成 |
|
||||||
|
| A1.4.3 | 账号详情页面开发 | 前端 | 16h | A1.4.2 | 详情完成 |
|
||||||
|
|
||||||
|
#### A1.5 测试与集成
|
||||||
|
|
||||||
|
| 任务ID | 任务名称 | 负责人 | 工时 | 依赖 | 验收标准 |
|
||||||
|
|--------|----------|--------|------|------|----------|
|
||||||
|
| A1.5.1 | 账号挂载单元测试 | 后端 | 16h | A1.3.3 | 测试通过 |
|
||||||
|
| A1.5.2 | 账号挂载集成测试 | 测试 | 16h | A1.4.3 | 测试通过 |
|
||||||
|
| A1.5.3 | 账号模块验收测试 | 测试 | 8h | A1.5.2 | 验收通过 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### A2: 套餐发布模块(W3-W4)
|
||||||
|
|
||||||
|
#### A2.1 套餐管理
|
||||||
|
|
||||||
|
| 任务ID | 任务名称 | 负责人 | 工时 | 依赖 | 验收标准 |
|
||||||
|
|--------|----------|--------|------|------|----------|
|
||||||
|
| A2.1.1 | 供应套餐表设计 | 后端/DBA | 8h | A1.1.1 | 表结构完成 |
|
||||||
|
| A2.1.2 | 套餐创建API开发 | 后端 | 24h | A2.1.1 | 创建接口完成 |
|
||||||
|
| A2.1.3 | 套餐上下架开发 | 后端 | 16h | A2.1.2 | 上下架完成 |
|
||||||
|
| A2.1.4 | 套餐定价服务开发 | 后端 | 24h | A2.1.3 | 定价完成 |
|
||||||
|
|
||||||
|
#### A2.2 套餐展示
|
||||||
|
|
||||||
|
| 任务ID | 任务名称 | 负责人 | 工时 | 依赖 | 验收标准 |
|
||||||
|
|--------|----------|--------|------|------|----------|
|
||||||
|
| A2.2.1 | 套餐列表API开发 | 后端 | 16h | A2.1.4 | 列表接口完成 |
|
||||||
|
| A2.2.2 | 套餐筛选功能开发 | 后端 | 16h | A2.2.1 | 筛选完成 |
|
||||||
|
| A2.2.3 | 套餐详情API开发 | 后端 | 8h | A2.2.1 | 详情完成 |
|
||||||
|
|
||||||
|
#### A2.3 前端开发
|
||||||
|
|
||||||
|
| 任务ID | 任务名称 | 负责人 | 工时 | 依赖 | 验收标准 |
|
||||||
|
|--------|----------|--------|------|------|----------|
|
||||||
|
| A2.3.1 | 套餐发布页面开发 | 前端 | 24h | A2.1.4 | 发布页面完成 |
|
||||||
|
| A2.3.2 | 套餐列表页面开发 | 前端 | 16h | A2.2.2 | 列表页面完成 |
|
||||||
|
| A2.3.3 | 套餐详情页面开发 | 前端 | 16h | A2.2.3 | 详情页面完成 |
|
||||||
|
|
||||||
|
#### A2.4 测试
|
||||||
|
|
||||||
|
| 任务ID | 任务名称 | 负责人 | 工时 | 依赖 | 验收标准 |
|
||||||
|
|--------|----------|--------|------|------|----------|
|
||||||
|
| A2.4.1 | 套餐模块单元测试 | 后端 | 16h | A2.1.4 | 测试通过 |
|
||||||
|
| A2.4.2 | 套餐模块集成测试 | 测试 | 16h | A2.3.3 | 测试通过 |
|
||||||
|
| A2.4.3 | 套餐模块验收测试 | 测试 | 8h | A2.4.2 | 验收通过 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### A3: 调度计费模块(W5-W6)
|
||||||
|
|
||||||
|
#### A3.1 调度引擎
|
||||||
|
|
||||||
|
| 任务ID | 任务名称 | 负责人 | 工时 | 依赖 | 验收标准 |
|
||||||
|
|--------|----------|--------|------|------|----------|
|
||||||
|
| A3.1.1 | 套餐调度策略设计 | 架构/后端 | 16h | A2.4.3 | 策略文档完成 |
|
||||||
|
| A3.1.2 | 调度核心服务开发 | 后端 | 40h | A3.1.1 | 调度完成 |
|
||||||
|
| A3.1.3 | 负载均衡策略开发 | 后端 | 24h | A3.1.2 | 均衡完成 |
|
||||||
|
| A3.1.4 | 失败转移服务开发 | 后端 | 24h | A3.1.2 | 转移完成 |
|
||||||
|
|
||||||
|
#### A3.2 计费服务
|
||||||
|
|
||||||
|
| 任务ID | 任务名称 | 负责人 | 工时 | 依赖 | 验收标准 |
|
||||||
|
|--------|----------|--------|------|------|----------|
|
||||||
|
| A3.2.1 | 使用记录表设计 | 后端/DBA | 8h | A3.1.2 | 表结构完成 |
|
||||||
|
| A3.2.2 | 实时计费服务开发 | 后端 | 32h | A3.2.1 | 计费完成 |
|
||||||
|
| A3.2.3 | 预扣服务开发 | 后端 | 24h | A3.2.2 | 预扣完成 |
|
||||||
|
| A3.2.4 | 结算服务开发 | 后端 | 24h | A3.2.3 | 结算完成 |
|
||||||
|
|
||||||
|
#### A3.3 订单管理
|
||||||
|
|
||||||
|
| 任务ID | 任务名称 | 负责人 | 工时 | 依赖 | 验收标准 |
|
||||||
|
|--------|----------|--------|------|------|----------|
|
||||||
|
| A3.3.1 | 订单表设计 | 后端/DBA | 8h | A3.2.2 | 表结构完成 |
|
||||||
|
| A3.3.2 | 购买流程API开发 | 后端 | 32h | A3.3.1 | 购买完成 |
|
||||||
|
| A3.3.3 | 支付集成开发 | 后端 | 24h | A3.3.2 | 支付完成 |
|
||||||
|
| A3.3.4 | 账单查询API开发 | 后端 | 16h | A3.3.3 | 账单完成 |
|
||||||
|
|
||||||
|
#### A3.4 测试
|
||||||
|
|
||||||
|
| 任务ID | 任务名称 | 负责人 | 工时 | 依赖 | 验收标准 |
|
||||||
|
|--------|----------|--------|------|------|----------|
|
||||||
|
| A3.4.1 | 调度模块单元测试 | 后端 | 24h | A3.1.4 | 测试通过 |
|
||||||
|
| A3.4.2 | 计费模块单元测试 | 后端 | 24h | A3.2.4 | 测试通过 |
|
||||||
|
| A3.4.3 | 计费集成测试 | 测试 | 24h | A3.3.4 | 测试通过 |
|
||||||
|
| A3.4.4 | 调度计费验收测试 | 测试 | 16h | A3.4.3 | 验收通过 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### A4: 风控模块(W7-W8)
|
||||||
|
|
||||||
|
#### A4.1 风险识别
|
||||||
|
|
||||||
|
| 任务ID | 任务名称 | 负责人 | 工时 | 依赖 | 验收标准 |
|
||||||
|
|--------|----------|--------|------|------|----------|
|
||||||
|
| A4.1.1 | 风控规则引擎设计 | 安全/后端 | 24h | A3.4.4 | 规则文档完成 |
|
||||||
|
| A4.1.2 | 欺诈检测服务开发 | 后端 | 40h | A4.1.1 | 检测完成 |
|
||||||
|
| A4.1.3 | 异常检测服务开发 | 后端 | 32h | A4.1.2 | 异常检测完成 |
|
||||||
|
| A4.1.4 | 风险评估服务开发 | 后端 | 24h | A4.1.3 | 评估完成 |
|
||||||
|
|
||||||
|
#### A4.2 防护措施
|
||||||
|
|
||||||
|
| 任务ID | 任务名称 | 负责人 | 工时 | 依赖 | 验收标准 |
|
||||||
|
|--------|----------|--------|------|------|----------|
|
||||||
|
| A4.2.1 | 限流服务开发 | 后端 | 24h | A4.1.4 | 限流完成 |
|
||||||
|
| A4.2.2 | IP限制服务开发 | 后端 | 16h | A4.2.1 | IP限制完成 |
|
||||||
|
| A4.2.3 | 冻结服务开发 | 后端 | 24h | A4.2.2 | 冻结完成 |
|
||||||
|
|
||||||
|
#### A4.3 保证金
|
||||||
|
|
||||||
|
| 任务ID | 任务名称 | 负责人 | 工时 | 依赖 | 验收标准 |
|
||||||
|
|--------|----------|--------|------|------|----------|
|
||||||
|
| A4.3.1 | 保证金服务开发 | 后端 | 24h | A4.2.3 | 保证金完成 |
|
||||||
|
| A4.3.2 | 保证金退还服务开发 | 后端 | 16h | A4.3.1 | 退还完成 |
|
||||||
|
|
||||||
|
#### A4.4 测试
|
||||||
|
|
||||||
|
| 任务ID | 任务名称 | 负责人 | 工时 | 依赖 | 验收标准 |
|
||||||
|
|--------|----------|--------|------|------|----------|
|
||||||
|
| A4.4.1 | 风控模块单元测试 | 后端 | 24h | A4.3.2 | 测试通过 |
|
||||||
|
| A4.4.2 | 风控模块集成测试 | 测试 | 24h | A4.4.1 | 测试通过 |
|
||||||
|
| A4.4.3 | 风控模块验收测试 | 测试 | 16h | A4.4.2 | 验收通过 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### A5: 内部测试(W9-W10)
|
||||||
|
|
||||||
|
#### A5.1 系统测试
|
||||||
|
|
||||||
|
| 任务ID | 任务名称 | 负责人 | 工时 | 依赖 | 验收标准 |
|
||||||
|
|--------|----------|--------|------|------|----------|
|
||||||
|
| A5.1.1 | 功能回归测试 | 测试 | 32h | A4.4.3 | 回归通过 |
|
||||||
|
| A5.1.2 | 性能测试 | 测试 | 24h | A5.1.1 | 性能达标 |
|
||||||
|
| A5.1.3 | 安全测试 | 安全 | 24h | A5.1.2 | 安全通过 |
|
||||||
|
| A5.1.4 | 压力测试 | 测试 | 16h | A5.1.3 | 压力达标 |
|
||||||
|
|
||||||
|
#### A5.2 缺陷修复
|
||||||
|
|
||||||
|
| 任务ID | 任务名称 | 负责人 | 工时 | 依赖 | 验收标准 |
|
||||||
|
|--------|----------|--------|------|------|----------|
|
||||||
|
| A5.2.1 | 测试缺陷修复 | 后端/前端 | 40h | A5.1.4 | 缺陷修复完成 |
|
||||||
|
| A5.2.2 | 回归缺陷验证 | 测试 | 16h | A5.2.1 | 验证通过 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### A6: 首批引入(W11-W12)
|
||||||
|
|
||||||
|
#### A6.1 运营准备
|
||||||
|
|
||||||
|
| 任务ID | 任务名称 | 负责人 | 工时 | 依赖 | 验收标准 |
|
||||||
|
|--------|----------|--------|------|------|----------|
|
||||||
|
| A6.1.1 | 运营文档编写 | 运营 | 16h | A5.2.2 | 文档完成 |
|
||||||
|
| A6.1.2 | 客服培训 | 运营 | 8h | A6.1.1 | 培训完成 |
|
||||||
|
| A6.1.3 | 引入流程设计 | 产品 | 16h | A6.1.2 | 流程完成 |
|
||||||
|
|
||||||
|
#### A6.2 试点引入
|
||||||
|
|
||||||
|
| 任务ID | 任务名称 | 负责人 | 工时 | 依赖 | 验收标准 |
|
||||||
|
|--------|----------|--------|------|------|----------|
|
||||||
|
| A6.2.1 | 首批供应方邀请 | 运营 | 16h | A6.1.3 | 10家完成 |
|
||||||
|
| A6.2.2 | 账号挂载指导 | 运营 | 24h | A6.2.1 | 挂载完成 |
|
||||||
|
| A6.2.3 | 套餐发布指导 | 运营 | 16h | A6.2.2 | 发布完成 |
|
||||||
|
|
||||||
|
#### A6.3 验收
|
||||||
|
|
||||||
|
| 任务ID | 任务名称 | 负责人 | 工时 | 依赖 | 验收标准 |
|
||||||
|
|--------|----------|--------|------|------|----------|
|
||||||
|
| A6.3.1 | 试点效果评估 | 产品/运营 | 16h | A6.2.3 | 评估完成 |
|
||||||
|
| A6.3.2 | 问题收集与优化 | 后端/前端 | 24h | A6.3.1 | 优化完成 |
|
||||||
|
| A6.3.3 | S0阶段验收 | PM | 8h | A6.3.2 | 验收通过 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 3. Track B: Subapi 集成
|
||||||
|
|
||||||
|
### B0: 准备阶段(W1)
|
||||||
|
|
||||||
|
#### B0.1 环境搭建
|
||||||
|
|
||||||
|
| 任务ID | 任务名称 | 负责人 | 工时 | 依赖 | 验收标准 |
|
||||||
|
|--------|----------|--------|------|------|----------|
|
||||||
|
| B0.1.1 | Subapi环境部署 | 运维 | 24h | - | 部署完成 |
|
||||||
|
| B0.1.2 | 网络策略配置 | 运维 | 8h | B0.1.1 | 网络配置完成 |
|
||||||
|
| B0.1.3 | 监控接入 | 运维 | 16h | B0.1.2 | 监控就绪 |
|
||||||
|
|
||||||
|
#### B0.2 技术准备
|
||||||
|
|
||||||
|
| 任务ID | 任务名称 | 负责人 | 工时 | 依赖 | 验收标准 |
|
||||||
|
|--------|----------|--------|------|------|----------|
|
||||||
|
| B0.2.1 | Subapi代码分析 | 后端 | 24h | - | 分析完成 |
|
||||||
|
| B0.2.2 | Connector架构设计 | 架构 | 16h | B0.2.1 | 架构文档完成 |
|
||||||
|
| B0.2.3 | 接口契约定义 | 后端/架构 | 16h | B0.2.2 | 契约完成 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### B1: Connector 开发(W3-W4)
|
||||||
|
|
||||||
|
#### B1.1 核心开发
|
||||||
|
|
||||||
|
| 任务ID | 任务名称 | 负责人 | 工时 | 依赖 | 验收标准 |
|
||||||
|
|--------|----------|--------|------|------|----------|
|
||||||
|
| B1.1.1 | 请求转发服务开发 | 后端 | 40h | B0.2.3 | 转发完成 |
|
||||||
|
| B1.1.2 | 响应归一服务开发 | 后端 | 32h | B1.1.1 | 归一完成 |
|
||||||
|
| B1.1.3 | 错误处理服务开发 | 后端 | 24h | B1.1.2 | 错误处理完成 |
|
||||||
|
| B1.1.4 | 流式处理服务开发 | 后端 | 32h | B1.1.3 | 流式完成 |
|
||||||
|
|
||||||
|
#### B1.2 适配器开发
|
||||||
|
|
||||||
|
| 任务ID | 任务名称 | 负责人 | 工时 | 依赖 | 验收标准 |
|
||||||
|
|--------|----------|--------|------|------|----------|
|
||||||
|
| B1.2.1 | OpenAI适配器开发 | 后端 | 24h | B1.1.1 | OpenAI完成 |
|
||||||
|
| B1.2.2 | Anthropic适配器开发 | 后端 | 24h | B1.2.1 | Anthropic完成 |
|
||||||
|
| B1.2.3 | Gemini适配器开发 | 后端 | 24h | B1.2.2 | Gemini完成 |
|
||||||
|
| B1.2.4 | 国内供应商适配器开发 | 后端 | 40h | B1.2.3 | 适配完成 |
|
||||||
|
|
||||||
|
#### B1.3 测试
|
||||||
|
|
||||||
|
| 任务ID | 任务名称 | 负责人 | 工时 | 依赖 | 验收标准 |
|
||||||
|
|--------|----------|--------|------|------|----------|
|
||||||
|
| B1.3.1 | Connector单元测试 | 后端 | 24h | B1.2.4 | 测试通过 |
|
||||||
|
| B1.3.2 | Connector集成测试 | 测试 | 24h | B1.3.1 | 测试通过 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### B2: 契约测试(W5-W6)
|
||||||
|
|
||||||
|
#### B2.1 契约测试开发
|
||||||
|
|
||||||
|
| 任务ID | 任务名称 | 负责人 | 工时 | 依赖 | 验收标准 |
|
||||||
|
|--------|----------|--------|------|------|----------|
|
||||||
|
| B2.1.1 | 契约测试框架搭建 | 测试 | 16h | B1.3.2 | 框架完成 |
|
||||||
|
| B2.1.2 | OpenAI契约测试开发 | 测试 | 24h | B2.1.1 | 测试完成 |
|
||||||
|
| B2.1.3 | Anthropic契约测试开发 | 测试 | 24h | B2.1.2 | 测试完成 |
|
||||||
|
| B2.1.4 | Gemini契约测试开发 | 测试 | 24h | B2.1.3 | 测试完成 |
|
||||||
|
| B2.1.5 | 错误码映射测试开发 | 测试 | 16h | B2.1.4 | 测试完成 |
|
||||||
|
|
||||||
|
#### B2.2 测试执行
|
||||||
|
|
||||||
|
| 任务ID | 任务名称 | 负责人 | 工时 | 依赖 | 验收标准 |
|
||||||
|
|--------|----------|--------|------|------|----------|
|
||||||
|
| B2.2.1 | 契约测试执行 | 测试 | 24h | B2.1.5 | 测试通过 |
|
||||||
|
| B2.2.2 | 契约回归测试 | 测试 | 16h | B2.2.1 | 回归通过 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### B3: 集成验证(W7-W8)
|
||||||
|
|
||||||
|
#### B3.1 集成开发
|
||||||
|
|
||||||
|
| 任务ID | 任务名称 | 负责人 | 工时 | 依赖 | 验收标准 |
|
||||||
|
|--------|----------|--------|------|------|----------|
|
||||||
|
| B3.1.1 | 与现有系统集成 | 后端 | 32h | B2.2.2 | 集成完成 |
|
||||||
|
| B3.1.2 | 认证集成开发 | 后端 | 24h | B3.1.1 | 认证完成 |
|
||||||
|
| B3.1.3 | 计费集成开发 | 后端 | 24h | B3.1.2 | 计费完成 |
|
||||||
|
| B3.1.4 | 日志审计集成开发 | 后端 | 16h | B3.1.3 | 日志完成 |
|
||||||
|
|
||||||
|
#### B3.2 验证测试
|
||||||
|
|
||||||
|
| 任务ID | 任务名称 | 负责人 | 工时 | 依赖 | 验收标准 |
|
||||||
|
|--------|----------|--------|------|------|----------|
|
||||||
|
| B3.2.1 | 端到端测试 | 测试 | 32h | B3.1.4 | E2E通过 |
|
||||||
|
| B3.2.2 | 灰度验证测试 | 测试 | 24h | B3.2.1 | 灰度通过 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### B4: 灰度测试(W9-W10)
|
||||||
|
|
||||||
|
#### B4.1 灰度发布
|
||||||
|
|
||||||
|
| 任务ID | 任务名称 | 负责人 | 工时 | 依赖 | 验收标准 |
|
||||||
|
|--------|----------|--------|------|------|----------|
|
||||||
|
| B4.1.1 | 灰度策略配置 | 运维 | 16h | B3.2.2 | 配置完成 |
|
||||||
|
| B4.1.2 | 5%灰度发布 | 运维 | 8h | B4.1.1 | 5%完成 |
|
||||||
|
| B4.1.3 | 20%灰度发布 | 运维 | 8h | B4.1.2 | 20%完成 |
|
||||||
|
| B4.1.4 | 50%灰度发布 | 运维 | 8h | B4.1.3 | 50%完成 |
|
||||||
|
| B4.1.5 | 100%灰度发布 | 运维 | 8h | B4.1.4 | 100%完成 |
|
||||||
|
|
||||||
|
#### B4.2 监控与调优
|
||||||
|
|
||||||
|
| 任务ID | 任务名称 | 负责人 | 工时 | 依赖 | 验收标准 |
|
||||||
|
|--------|----------|--------|------|------|----------|
|
||||||
|
| B4.2.1 | 灰度监控分析 | 运维 | 24h | B4.1.5 | 监控就绪 |
|
||||||
|
| B4.2.2 | 性能调优 | 后端 | 16h | B4.2.1 | 调优完成 |
|
||||||
|
| B4.2.3 | 稳定性验证 | 测试/运维 | 24h | B4.2.2 | 稳定验证通过 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### B5: 上线准备(W11-W12)
|
||||||
|
|
||||||
|
#### B5.1 上线检查
|
||||||
|
|
||||||
|
| 任务ID | 任务名称 | 负责人 | 工时 | 依赖 | 验收标准 |
|
||||||
|
|--------|----------|--------|------|------|----------|
|
||||||
|
| B5.1.1 | 上线检查清单 | 运维 | 8h | B4.2.3 | 检查完成 |
|
||||||
|
| B5.1.2 | 回滚方案验证 | 运维 | 16h | B5.1.1 | 回滚验证通过 |
|
||||||
|
| B5.1.3 | 值班安排 | 运维 | 8h | B5.1.2 | 值班表完成 |
|
||||||
|
|
||||||
|
#### B5.2 上线
|
||||||
|
|
||||||
|
| 任务ID | 任务名称 | 负责人 | 工时 | 依赖 | 验收标准 |
|
||||||
|
|--------|----------|--------|------|------|----------|
|
||||||
|
| B5.2.1 | 正式环境发布 | 运维 | 8h | B5.1.3 | 发布完成 |
|
||||||
|
| B5.2.2 | 上线验证 | 测试/运维 | 16h | B5.2.1 | 验证通过 |
|
||||||
|
| B5.2.3 | Track B阶段验收 | PM | 8h | B5.2.2 | 验收通过 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 4. C0: 公共基础设施(贯穿始终)
|
||||||
|
|
||||||
|
### C0.1 API Key 安全体系
|
||||||
|
|
||||||
|
| 任务ID | 任务名称 | 负责人 | 工时 | 依赖 | 验收标准 |
|
||||||
|
|--------|----------|--------|------|------|----------|
|
||||||
|
| C0.1.1 | API Key安全基线巡检(跨Track) | 安全 | 16h | A0.2.7 | 完成 |
|
||||||
|
| C0.1.2 | Key泄露应急预案与演练(跨Track) | 安全/运维 | 16h | C0.1.1 | 完成 |
|
||||||
|
| C0.1.3 | Key生命周期监控告警接入(跨Track) | 运维 | 16h | C0.1.2 | 完成 |
|
||||||
|
| C0.1.4 | API Key专项安全验收(跨Track) | 安全/测试 | 24h | C0.1.3 | 通过 |
|
||||||
|
|
||||||
|
### C0.2 监控系统
|
||||||
|
|
||||||
|
| 任务ID | 任务名称 | 负责人 | 工时 | 依赖 | 验收标准 |
|
||||||
|
|--------|----------|--------|------|------|----------|
|
||||||
|
| C0.2.1 | 监控系统搭建 | 运维 | 24h | - | 完成 |
|
||||||
|
| C0.2.2 | 业务指标接入 | 运维 | 32h | C0.2.1 | 接入完成 |
|
||||||
|
| C0.2.3 | 告警配置 | 运维 | 16h | C0.2.2 | 配置完成 |
|
||||||
|
|
||||||
|
### C0.3 基础架构
|
||||||
|
|
||||||
|
| 任务ID | 任务名称 | 负责人 | 工时 | 依赖 | 验收标准 |
|
||||||
|
|--------|----------|--------|------|------|----------|
|
||||||
|
| C0.3.1 | CI/CD流水线 | 运维 | 40h | - | 完成 |
|
||||||
|
| C0.3.2 | 代码质量门禁 | 运维 | 24h | C0.3.1 | 完成 |
|
||||||
|
| C0.3.3 | 容器化部署 | 运维 | 32h | C0.3.2 | 完成 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 5. 关键里程碑
|
||||||
|
|
||||||
|
| 里程碑 | 时间 | 完成条件 |
|
||||||
|
|--------|------|----------|
|
||||||
|
| M1: 技术方案评审通过 | W1周末 | A0.1.2 + B0.2.2 |
|
||||||
|
| M2: API Key体系完成 | W2周末 | C0.1.4 |
|
||||||
|
| M3: 账号挂载模块完成 | W2末 | A1.5.3 |
|
||||||
|
| M4: 套餐发布模块完成 | W4末 | A2.4.3 |
|
||||||
|
| M5: 调度计费模块完成 | W6末 | A3.4.4 + B2.2.2 |
|
||||||
|
| M6: 风控模块完成 | W8末 | A4.4.3 + B3.2.2 |
|
||||||
|
| M7: 内部测试完成 | W10末 | A5.2.2 + B4.2.3 |
|
||||||
|
| M8: S0阶段验收 | W12末 | A6.3.3 + B5.2.3 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 6. 资源分配建议
|
||||||
|
|
||||||
|
### 6.1 人力需求
|
||||||
|
|
||||||
|
| 角色 | 数量 | 工作内容 |
|
||||||
|
|------|------|----------|
|
||||||
|
| 后端开发 | 4人 | 核心业务开发 |
|
||||||
|
| 前端开发 | 2人 | 前端页面开发 |
|
||||||
|
| 测试 | 2人 | 测试执行 |
|
||||||
|
| 运维 | 1人 | 基础设施/部署 |
|
||||||
|
| 安全 | 1人(兼职) | 安全审查 |
|
||||||
|
| 架构 | 1人(兼职) | 技术方案 |
|
||||||
|
| 产品 | 1人 | 产品设计 |
|
||||||
|
| 运营 | 1人 | 运营支持 |
|
||||||
|
|
||||||
|
### 6.2 并行冲突提示
|
||||||
|
|
||||||
|
| 时间段 | Track A | Track B | 冲突风险 |
|
||||||
|
|--------|---------|---------|----------|
|
||||||
|
| W1-W2 | A0+A1 | B0 | 低 |
|
||||||
|
| W3-W4 | A2 | B1 | 中 |
|
||||||
|
| W5-W6 | A3 | B2 | 中 |
|
||||||
|
| W7-W8 | A4 | B3 | 中 |
|
||||||
|
| W9-W10 | A5 | B4 | 低 |
|
||||||
|
| W11-W12 | A6 | B5 | 低 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 7. 验收标准汇总
|
||||||
|
|
||||||
|
### S0 阶段验收标准
|
||||||
|
|
||||||
|
| 指标 | 目标 | 验收条件 |
|
||||||
|
|------|------|----------|
|
||||||
|
| 可用性 | >=99.9% | 灰度7天 |
|
||||||
|
| 账务差错率 | <=0.1% | 测试验证 |
|
||||||
|
| 供应方引入 | >=10家 | 实际引入 |
|
||||||
|
| 套餐验证成功率 | >=90% | 验证测试 |
|
||||||
|
| API Key安全 | 平台标识+DB验证 | 安全测试通过 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
**文档版本**:v1.0
|
||||||
|
**下次更新**:根据实际执行情况调整
|
||||||
404
docs/s2_staged_verification_mechanism_v1_2026-03-18.md
Normal file
404
docs/s2_staged_verification_mechanism_v1_2026-03-18.md
Normal file
@@ -0,0 +1,404 @@
|
|||||||
|
# S2 阶段分阶段验证机制设计(补充章节)
|
||||||
|
|
||||||
|
> 本章节对 `llm_gateway_subapi_evolution_plan_v2_2026-03-17.md` 中的 S2 阶段进行细化,补充分阶段验证机制和中间检查点设计。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 1. 分阶段验证机制总览
|
||||||
|
|
||||||
|
### 1.1 三阶段推进策略
|
||||||
|
|
||||||
|
```
|
||||||
|
S2 阶段时间线:2026-05-16 至 2026-08-15(共 13 周)
|
||||||
|
|
||||||
|
┌─────────────────────────────────────────────────────────────────────────┐
|
||||||
|
│ │
|
||||||
|
│ 阶段 A 阶段 B 阶段 C │
|
||||||
|
│ ────── ────── ────── │
|
||||||
|
│ W1-W4 W5-W8 W9-W13 │
|
||||||
|
│ (5月-6月) (6月-7月) (7月-8月) │
|
||||||
|
│ │
|
||||||
|
│ 10% ─────▶ 30% ─────▶ 40% ──────▶ 60% │
|
||||||
|
│ │ │ │ │ │
|
||||||
|
│ │ │ │ │ │
|
||||||
|
│ 灰度验证 扩大验证 中间检查 目标达成 │
|
||||||
|
│ │ │ │ │ │
|
||||||
|
│ ▼ ▼ ▼ ▼ │
|
||||||
|
│ 可回滚 可回滚 需决策 不可逆 │
|
||||||
|
│ │
|
||||||
|
└─────────────────────────────────────────────────────────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
### 1.2 各阶段核心目标
|
||||||
|
|
||||||
|
| 阶段 | 时间 | 接管率目标 | 核心验证点 |
|
||||||
|
|------|------|-----------|-----------|
|
||||||
|
| **S2-A** | W1-W4 | 10% | 核心链路可用性、基础指标达标 |
|
||||||
|
| **S2-B** | W5-W8 | 30% | 规模化稳定性、运维流程验证 |
|
||||||
|
| **S2-C1** | W9-W10 | 40% | **中间检查点** - 决定是否继续 |
|
||||||
|
| **S2-C2** | W11-W13 | 60% | 全面接管、达成S2目标 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 2. S2-A 阶段:灰度验证(第1-4周)
|
||||||
|
|
||||||
|
### 2.1 目标
|
||||||
|
- 接管率达到 10%
|
||||||
|
- 验证核心链路可用性
|
||||||
|
|
||||||
|
### 2.2 实施策略
|
||||||
|
|
||||||
|
| 批次 | 灰度比例 | 租户范围 | 供应商范围 |
|
||||||
|
|------|----------|----------|-----------|
|
||||||
|
| A1 | 1% | 1-2个试点租户 | 国内供应商(2家) |
|
||||||
|
| A2 | 3% | 5个租户 | 国内供应商(3家) |
|
||||||
|
| A3 | 5% | 10个租户 | 国内供应商全量 |
|
||||||
|
| A4 | 10% | 20个租户 | 国内+海外重点供应商 |
|
||||||
|
|
||||||
|
### 2.3 验收标准
|
||||||
|
|
||||||
|
| 指标类别 | 指标名称 | 目标值 | 严重阈值 |
|
||||||
|
|----------|----------|--------|----------|
|
||||||
|
| **可用性** | 网关可用率 | >= 99.9% | < 99.5% |
|
||||||
|
| **时延** | P95 额外时延 | <= 60ms | > 100ms |
|
||||||
|
| **正确性** | 路由准确率 | >= 99% | < 98% |
|
||||||
|
| **账务** | 账务差错率 | <= 0.1% | > 0.5% |
|
||||||
|
| **回退** | fallback成功率 | >= 95% | < 90% |
|
||||||
|
|
||||||
|
### 2.4 通过条件(Gate A)
|
||||||
|
|
||||||
|
- [ ] 连续 2 周可用率 >= 99.9%
|
||||||
|
- [ ] P95 时延 <= 60ms
|
||||||
|
- [ ] 路由准确率 >= 99%
|
||||||
|
- [ ] 无 P0/P1 级事故
|
||||||
|
- [ ] 运维团队熟练掌握回滚操作
|
||||||
|
|
||||||
|
### 2.5 如果未通过
|
||||||
|
|
||||||
|
**回滚策略**:
|
||||||
|
1. 立即停止新租户接入
|
||||||
|
2. 现有租户切回 subapi 路径
|
||||||
|
3. 分析根因,修复后进入下一批次
|
||||||
|
|
||||||
|
**补救措施**:
|
||||||
|
- 延长 A 阶段 2-4 周
|
||||||
|
- 增加资源投入
|
||||||
|
- 引入专家支持
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 3. S2-B 阶段:扩大验证(第5-8周)
|
||||||
|
|
||||||
|
### 3.1 目标
|
||||||
|
- 接管率达到 30%
|
||||||
|
- 验证规模化稳定性
|
||||||
|
|
||||||
|
### 3.2 实施策略
|
||||||
|
|
||||||
|
| 批次 | 灰度比例 | 租户范围 | 供应商范围 |
|
||||||
|
|------|----------|----------|-----------|
|
||||||
|
| B1 | 15% | 50个租户 | 全供应商(国内100%) |
|
||||||
|
| B2 | 20% | 80个租户 | 全供应商 |
|
||||||
|
| B3 | 25% | 120个租户 | 全供应商 |
|
||||||
|
| B4 | 30% | 150个租户 | 全供应商 |
|
||||||
|
|
||||||
|
### 3.3 验收标准
|
||||||
|
|
||||||
|
| 指标类别 | 指标名称 | 目标值 | 严重阈值 |
|
||||||
|
|----------|----------|--------|----------|
|
||||||
|
| **可用性** | 网关可用率 | >= 99.95% | < 99.8% |
|
||||||
|
| **时延** | P99 额外时延 | <= 100ms | > 150ms |
|
||||||
|
| **扩展性** | 扩容响应时间 | <= 5min | > 10min |
|
||||||
|
| **可观测** | 告警响应时间 | <= 1min | > 3min |
|
||||||
|
| **回退** | fallback成功率 | >= 97% | < 93% |
|
||||||
|
|
||||||
|
### 3.4 通过条件(Gate B)
|
||||||
|
|
||||||
|
- [ ] 连续 2 周可用率 >= 99.95%
|
||||||
|
- [ ] P99 时延 <= 100ms
|
||||||
|
- [ ] 成功完成 1 次故障演练
|
||||||
|
- [ ] 运维团队完成故障响应手册
|
||||||
|
- [ ] 无 P0 级事故,P1 级 <= 2 次
|
||||||
|
|
||||||
|
### 3.5 如果未通过
|
||||||
|
|
||||||
|
**决策矩阵**:
|
||||||
|
|
||||||
|
| 问题类型 | 影响程度 | 处理方式 |
|
||||||
|
|----------|----------|----------|
|
||||||
|
| 可用性未达标 | 中 | 延长 B 阶段,增加资源 |
|
||||||
|
| 时延未达标 | 中 | 优化路由算法,增加缓存 |
|
||||||
|
| 规模化瓶颈 | 高 | 架构调整,可能回滚 |
|
||||||
|
| 账务错误 | 严重 | 立即回滚,修复后重来 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 4. S2-C1 阶段:40% 中间检查点(第9-10周)
|
||||||
|
|
||||||
|
### 4.1 重要性说明
|
||||||
|
|
||||||
|
40% 检查点是 S2 阶段的关键决策点:
|
||||||
|
|
||||||
|
1. **验证规模化是否稳定**:30% → 40% 是从"小规模"到"中等规模"的跨越
|
||||||
|
2. **为 60% 目标打基础**:40% 验证通过,60% 才有信心推进
|
||||||
|
3. **留出纠错空间**:即使 40% 未达标,还有时间调整
|
||||||
|
|
||||||
|
### 4.2 目标
|
||||||
|
- 接管率达到 40%
|
||||||
|
- 全供应商口径验证
|
||||||
|
|
||||||
|
### 4.3 实施策略
|
||||||
|
|
||||||
|
| 批次 | 灰度比例 | 租户范围 | 供应商范围 |
|
||||||
|
|------|----------|----------|-----------|
|
||||||
|
| C1-1 | 35% | 200个租户 | 全供应商 |
|
||||||
|
| C1-2 | 40% | 250个租户 | 全供应商 |
|
||||||
|
|
||||||
|
### 4.4 验收标准(中间检查点)
|
||||||
|
|
||||||
|
| 指标类别 | 指标名称 | 目标值 | 严重阈值 | 红灯阈值 |
|
||||||
|
|----------|----------|--------|----------|----------|
|
||||||
|
| **可用性** | 网关可用率 | >= 99.95% | < 99.8% | < 99.5% |
|
||||||
|
| **时延** | P95 额外时延 | <= 60ms | > 80ms | > 100ms |
|
||||||
|
| **时延** | P99 额外时延 | <= 100ms | > 120ms | > 150ms |
|
||||||
|
| **正确性** | 路由准确率 | >= 99.5% | < 99% | < 98% |
|
||||||
|
| **账务** | 账务差错率 | <= 0.1% | > 0.2% | > 0.5% |
|
||||||
|
| **回退** | fallback成功率 | >= 97% | < 95% | < 90% |
|
||||||
|
| **容量** | 峰值承载能力 | >= 目标1.2倍 | - | < 目标 |
|
||||||
|
|
||||||
|
### 4.5 通过条件(Gate C1)
|
||||||
|
|
||||||
|
- [ ] 连续 2 周可用率 >= 99.95%
|
||||||
|
- [ ] P95 时延 <= 60ms,P99 <= 100ms
|
||||||
|
- [ ] 路由准确率 >= 99.5%
|
||||||
|
- [ ] 成功完成 2 次故障演练
|
||||||
|
- [ ] 无 P0 级事故,P1 级 <= 1 次
|
||||||
|
- [ ] 运维团队确认可继续推进
|
||||||
|
|
||||||
|
### 4.6 决策结果
|
||||||
|
|
||||||
|
```
|
||||||
|
┌────────────────────────┐
|
||||||
|
│ 40% 中间检查点 │
|
||||||
|
│ 决策会议 │
|
||||||
|
└──────────┬───────────┘
|
||||||
|
│
|
||||||
|
┌────────────────┼────────────────┐
|
||||||
|
▼ ▼ ▼
|
||||||
|
┌─────────┐ ┌───────────┐ ┌─────────┐
|
||||||
|
│ 继续推进 │ │ 有条件通过 │ │ 暂停/ │
|
||||||
|
│ (GO) │ │(CONDITIONAL)│ │ 回滚 │
|
||||||
|
│ │ │ │ │(NO-GO) │
|
||||||
|
└─────────┘ └───────────┘ └─────────┘
|
||||||
|
│ │ │
|
||||||
|
▼ ▼ ▼
|
||||||
|
60%目标 附加整改项 分析根因
|
||||||
|
继续推进 2周内完成 重新评估
|
||||||
|
```
|
||||||
|
|
||||||
|
### 4.7 红灯阈值(立即触发决策复核)
|
||||||
|
|
||||||
|
以下情况立即触发决策复核,可能导致暂停或回滚:
|
||||||
|
|
||||||
|
1. **连续 3 天可用率 < 99.5%**
|
||||||
|
2. **P95 时延连续 3 天 > 100ms**
|
||||||
|
3. **账务差错率 > 0.5%**
|
||||||
|
4. **发生 P0 级事故**
|
||||||
|
5. **任何 2 项严重阈值同时触发**
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 5. S2-C2 阶段:60% 目标达成(第11-13周)
|
||||||
|
|
||||||
|
### 5.1 目标
|
||||||
|
- 接管率达到 60%(全供应商口径)
|
||||||
|
- 国内供应商接管率 = 100%
|
||||||
|
|
||||||
|
### 5.2 实施策略
|
||||||
|
|
||||||
|
| 批次 | 灰度比例 | 租户范围 | 供应商范围 |
|
||||||
|
|------|----------|----------|-----------|
|
||||||
|
| C2-1 | 45% | 300个租户 | 全供应商 |
|
||||||
|
| C2-2 | 50% | 350个租户 | 全供应商 |
|
||||||
|
| C2-3 | 55% | 380个租户 | 全供应商 |
|
||||||
|
| C2-4 | 60% | 400个租户 | 全供应商 |
|
||||||
|
|
||||||
|
### 5.3 验收标准
|
||||||
|
|
||||||
|
| 指标类别 | 指标名称 | 目标值 | 严重阈值 |
|
||||||
|
|----------|----------|--------|----------|
|
||||||
|
| **可用性** | 网关可用率 | >= 99.95% | < 99.9% |
|
||||||
|
| **时延** | P95 额外时延 | <= 60ms | > 80ms |
|
||||||
|
| **正确性** | 路由准确率 | >= 99.5% | < 99% |
|
||||||
|
| **账务** | 账务差错率 | <= 0.1% | > 0.15% |
|
||||||
|
| **接管率** | 全供应商接管率 | >= 60% | < 55% |
|
||||||
|
| **接管率** | 国内供应商接管率 | = 100% | < 95% |
|
||||||
|
|
||||||
|
### 5.4 通过条件(Gate C2)
|
||||||
|
|
||||||
|
- [ ] 连续 2 周可用率 >= 99.95%
|
||||||
|
- [ ] P95 时延 <= 60ms
|
||||||
|
- [ ] 全供应商接管率 >= 60%
|
||||||
|
- [ ] 国内供应商接管率 = 100%
|
||||||
|
- [ ] 账务差错率 <= 0.1%
|
||||||
|
- [ ] 无 P0 级事故,P1 级 <= 1 次
|
||||||
|
|
||||||
|
### 5.5 S2 阶段完成标志
|
||||||
|
|
||||||
|
- [ ] Router Core 接管率 >= 60%(全供应商)
|
||||||
|
- [ ] 国内供应商接管率 = 100%
|
||||||
|
- [ ] 所有验收标准持续 2 周达标
|
||||||
|
- [ ] 运维团队确认 subapi 可作为备用
|
||||||
|
- [ ] 文档齐全:运维手册、故障响应、交接清单
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 6. 回滚机制设计
|
||||||
|
|
||||||
|
### 6.1 回滚触发条件
|
||||||
|
|
||||||
|
| 触发条件 | 触发方式 | 回滚级别 |
|
||||||
|
|----------|----------|----------|
|
||||||
|
| 可用率 < 99.5% 连续 3 天 | 自动告警 + 人工确认 | 租户级/全量 |
|
||||||
|
| P95 时延 > 100ms 连续 3 天 | 自动告警 + 人工确认 | 租户级/全量 |
|
||||||
|
| 账务差错率 > 0.5% | 自动触发 | 全量 |
|
||||||
|
| P0 级事故 | 自动触发 | 全量 |
|
||||||
|
| 任何红灯阈值触发 | 决策会议 | 视情况 |
|
||||||
|
|
||||||
|
### 6.2 回滚执行流程
|
||||||
|
|
||||||
|
```
|
||||||
|
回滚触发
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
┌─────────────────┐
|
||||||
|
│ 成立故障响应组 │ 5分钟内
|
||||||
|
└────────┬────────┘
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
┌─────────────────┐
|
||||||
|
│ 评估影响范围 │ 10分钟内
|
||||||
|
└────────┬────────┘
|
||||||
|
│
|
||||||
|
┌────┴────┐
|
||||||
|
▼ ▼
|
||||||
|
┌───────┐ ┌───────┐
|
||||||
|
│租户级 │ │ 全量 │
|
||||||
|
│回滚 │ │ 回滚 │
|
||||||
|
└───┬───┘ └───┬───┘
|
||||||
|
│ │
|
||||||
|
▼ ▼
|
||||||
|
切换到 切换到
|
||||||
|
subapi subapi
|
||||||
|
│ │
|
||||||
|
▼ ▼
|
||||||
|
┌─────────────────┐
|
||||||
|
│ 验证回滚效果 │ 30分钟内
|
||||||
|
└────────┬────────┘
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
┌─────────────────┐
|
||||||
|
│ 通知受影响方 │ 1小时内
|
||||||
|
└────────┬────────┘
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
┌─────────────────┐
|
||||||
|
│ 根因分析与修复 │ 24小时内
|
||||||
|
└─────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
### 6.3 回滚后决策
|
||||||
|
|
||||||
|
| 回滚原因 | 回滚后决策 |
|
||||||
|
|----------|------------|
|
||||||
|
| 规模化瓶颈 | 延长当前阶段,增加资源 |
|
||||||
|
| 路由算法问题 | 修复后重新验证 |
|
||||||
|
| 账务错误 | 修复后从上一稳定点重新开始 |
|
||||||
|
| 架构性问题 | 重新评估 S2 目标,可能降级 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 7. 风险缓解措施
|
||||||
|
|
||||||
|
### 7.1 技术风险缓解
|
||||||
|
|
||||||
|
| 风险 | 可能性 | 影响 | 缓解措施 |
|
||||||
|
|------|--------|------|----------|
|
||||||
|
| Router Core 性能不足 | 中 | 高 | 提前压测,预留容量 |
|
||||||
|
| 路由算法效果差 | 中 | 高 | 多算法并行验证 |
|
||||||
|
| 账务偏差 | 低 | 严重 | 实时监控 + T+1 对账 |
|
||||||
|
| 上游 API 变更 | 高 | 中 | 契约测试 + 版本兼容层 |
|
||||||
|
|
||||||
|
### 7.2 运营风险缓解
|
||||||
|
|
||||||
|
| 风险 | 可能性 | 影响 | 缓解措施 |
|
||||||
|
|------|--------|------|----------|
|
||||||
|
| 运维团队经验不足 | 中 | 高 | 提前培训 + 专家驻场 |
|
||||||
|
| 故障响应不及时 | 中 | 中 | 自动化告警 + OnCall 轮值 |
|
||||||
|
| 文档不完善 | 高 | 中 | 文档建设纳入验收 |
|
||||||
|
|
||||||
|
### 7.3 外部风险缓解
|
||||||
|
|
||||||
|
| 风险 | 可能性 | 影响 | 缓解措施 |
|
||||||
|
|------|--------|------|----------|
|
||||||
|
| subapi 版本不兼容 | 中 | 高 | 版本锁定 + 契约测试 |
|
||||||
|
| 供应商 API 变更 | 高 | 中 | 监控 + 快速响应机制 |
|
||||||
|
| 客户投诉增加 | 中 | 中 | 客户成功团队介入 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 8. 监控与告警配置
|
||||||
|
|
||||||
|
### 8.1 核心监控指标
|
||||||
|
|
||||||
|
| 指标 | 监控面板 | 告警阈值 | 告警方式 |
|
||||||
|
|------|----------|----------|----------|
|
||||||
|
| 接管率 | Router Core Dashboard | < 目标 -10% | 严重 |
|
||||||
|
| 可用率 | Gateway Dashboard | < 99.9% | 警告 |
|
||||||
|
| P95 时延 | Latency Dashboard | > 80ms | 警告 |
|
||||||
|
| 账务差错率 | Billing Dashboard | > 0.2% | 严重 |
|
||||||
|
| fallback 成功率 | Routing Dashboard | < 95% | 警告 |
|
||||||
|
|
||||||
|
### 8.2 每日检查清单
|
||||||
|
|
||||||
|
- [ ] 接管率是否达到当前阶段目标
|
||||||
|
- [ ] 各项指标是否在严重阈值以内
|
||||||
|
- [ ] 是否有未处理的 P1/P0 告警
|
||||||
|
- [ ] 回滚演练是否定期执行
|
||||||
|
|
||||||
|
### 8.3 周度评审
|
||||||
|
|
||||||
|
- [ ] 指标趋势分析
|
||||||
|
- [ ] 故障演练总结
|
||||||
|
- [ ] 下周灰度计划确认
|
||||||
|
- [ ] 风险更新
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 9. 里程碑汇总
|
||||||
|
|
||||||
|
| 里程碑 | 时间 | 目标 | 验收方式 |
|
||||||
|
|--------|------|------|----------|
|
||||||
|
| S2-A 结束 | W4 末 | 10% 接管率 | Gate A 通过 |
|
||||||
|
| S2-B 结束 | W8 末 | 30% 接管率 | Gate B 通过 |
|
||||||
|
| **C1 检查点** | **W10 末** | **40% 接管率** | **Gate C1 通过** |
|
||||||
|
| S2 完成 | W13 末 | 60% 接管率 | Gate C2 通过 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 10. 责任矩阵
|
||||||
|
|
||||||
|
| 任务 | 负责人 | 协作方 | 汇报频率 |
|
||||||
|
|------|--------|--------|----------|
|
||||||
|
| 灰度执行 | 平台工程负责人 | SRE | 每日 |
|
||||||
|
| 指标监控 | SRE | 平台工程 | 实时 |
|
||||||
|
| 故障响应 | SRE OnCall | 平台工程 | 随时 |
|
||||||
|
| 决策评审 | 技术负责人 | 架构/SRE/产品 | 每周+检查点 |
|
||||||
|
| 客户沟通 | 产品经理 | 运营 | 按需 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
**章节状态**:初稿
|
||||||
|
**关联文档**:
|
||||||
|
- `llm_gateway_subapi_evolution_plan_v2_2026-03-17.md`
|
||||||
|
- `router_core_takeover_execution_plan_v3_2026-03-17.md`
|
||||||
169
docs/s2_takeover_buffer_strategy_v1_2026-03-18.md
Normal file
169
docs/s2_takeover_buffer_strategy_v1_2026-03-18.md
Normal file
@@ -0,0 +1,169 @@
|
|||||||
|
# S2 接管率目标预留 Buffer 策略
|
||||||
|
|
||||||
|
> 版本:v1.0
|
||||||
|
> 日期:2026-03-18
|
||||||
|
> 目的:为60%接管率目标预留弹性空间,确保目标可达成
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 1. 目标回顾
|
||||||
|
|
||||||
|
| 指标 | 目标值 | 验收时间 | 难度 |
|
||||||
|
|------|--------|----------|------|
|
||||||
|
| 全供应商接管率 | >= 60% | S2结束 | 高 |
|
||||||
|
| 国内供应商接管率 | = 100% | S2结束 | 中 |
|
||||||
|
|
||||||
|
**评审意见**:60%目标激进,需要预留buffer
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 2. Buffer 设计
|
||||||
|
|
||||||
|
### 2.1 多层 Buffer 机制
|
||||||
|
|
||||||
|
```
|
||||||
|
┌─────────────────────────────────────────────────────────┐
|
||||||
|
│ S2 接管率目标 │
|
||||||
|
├─────────────────────────────────────────────────────────┤
|
||||||
|
│ 理想目标: 60% │
|
||||||
|
│ │
|
||||||
|
│ ├── 乐观场景 (+10%): 70% → 奖励目标 │
|
||||||
|
│ ├── 正常场景 (0%): 60% → 正常达标 │
|
||||||
|
│ ├── 保守场景 (-10%): 50% → 可接受底线 │
|
||||||
|
│ └── 止损场景 (-20%): 40% → 启动应急预案 │
|
||||||
|
└─────────────────────────────────────────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2.2 分阶段 Buffer 分配
|
||||||
|
|
||||||
|
| 阶段 | 时间 | 目标接管率 | 最低可接受 | 缓冲 |
|
||||||
|
|------|------|------------|------------|------|
|
||||||
|
| S2-A | W1-W4 | 10% | 5% | 5% |
|
||||||
|
| S2-B | W5-W8 | 30% | 20% | 10% |
|
||||||
|
| S2-C1 | W9-W10 | **40%** | 30% | 10% |
|
||||||
|
| S2-C2 | W11-W13 | **60%** | 40% | 20% |
|
||||||
|
|
||||||
|
> ⚠️ **关键**:40%作为中间检查点,是最重要的决策门
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 3. 弹性策略
|
||||||
|
|
||||||
|
### 3.1 供应商分类接管优先级
|
||||||
|
|
||||||
|
| 优先级 | 供应商 | 目标接管率 | 难度 | 备注 |
|
||||||
|
|--------|--------|------------|------|------|
|
||||||
|
| P0 | 国内供应商 | 100% | 中 | 必须达成 |
|
||||||
|
| P1 | Azure OpenAI | 80% | 低 | 协议友好 |
|
||||||
|
| P2 | OpenAI | 60% | 高 | 需深度适配 |
|
||||||
|
| P3 | Anthropic | 50% | 高 | 需深度适配 |
|
||||||
|
| P4 | Gemini | 40% | 中 | 需适配 |
|
||||||
|
|
||||||
|
**策略**:优先保障国内供应商100%接管,确保核心差异化
|
||||||
|
|
||||||
|
### 3.2 降级策略
|
||||||
|
|
||||||
|
| 场景 | 触发条件 | 降级动作 |
|
||||||
|
|------|----------|----------|
|
||||||
|
| 轻度风险 | S2-C1未达30% | 增加资源投入 |
|
||||||
|
| 中度风险 | S2-C2未达40% | 延长S2周期 |
|
||||||
|
| 重度风险 | S2-C2未达30% | 调整目标为50% |
|
||||||
|
| 严重风险 | 关键技术问题 | 回退到subapi |
|
||||||
|
|
||||||
|
### 3.3 验收标准细化
|
||||||
|
|
||||||
|
| 检查点 | 验收指标 | 通过条件 | 失败处理 |
|
||||||
|
|--------|----------|----------|----------|
|
||||||
|
| Gate A | 10%接管 | 稳定性>99% | 回滚检查 |
|
||||||
|
| Gate B | 30%接管 | 稳定性>99.5% | 增加资源 |
|
||||||
|
| **Gate C1** | **40%接管** | 稳定性>99.9% | **决策点** |
|
||||||
|
| Gate C2 | 60%接管 | 稳定性>99.9% | 达标 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 4. 风险预留
|
||||||
|
|
||||||
|
### 4.1 技术风险 Buffer
|
||||||
|
|
||||||
|
| 风险项 | 可能性 | 影响 | Buffer措施 |
|
||||||
|
|--------|--------|------|------------|
|
||||||
|
| Router Core 自研超期 | 中 | 高 | 预留3周buffer |
|
||||||
|
| subapi兼容性问题 | 中 | 中 | 保留双轨 |
|
||||||
|
| 供应商API变更 | 高 | 中 | 快速响应机制 |
|
||||||
|
|
||||||
|
### 4.2 时间 Buffer
|
||||||
|
|
||||||
|
| 阶段 | 原计划 | 预留Buffer | 实际可用 |
|
||||||
|
|------|--------|------------|----------|
|
||||||
|
| S2总周期 | 13周 | +3周 | 16周 |
|
||||||
|
| 关键里程碑 | 4个 | +1个缓冲 | 5个 |
|
||||||
|
|
||||||
|
### 4.3 资源 Buffer
|
||||||
|
|
||||||
|
| 资源项 | 需求 | 预留 | 总量 |
|
||||||
|
|--------|------|------|------|
|
||||||
|
| 开发人员 | 8人 | +2人 | 10人 |
|
||||||
|
| 测试人员 | 1人 | +1人 | 2人 |
|
||||||
|
| 架构师 | 1人 | +0.5人 | 1.5人 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 5. 决策矩阵
|
||||||
|
|
||||||
|
### 5.1 S2-C1 检查点决策(40%)
|
||||||
|
|
||||||
|
| 实际接管率 | 稳定性 | 决策 |
|
||||||
|
|------------|--------|------|
|
||||||
|
| >= 40% | >= 99.9% | 继续按计划推进 |
|
||||||
|
| 35-40% | >= 99.9% | 正常推进,增加监控 |
|
||||||
|
| 30-35% | >= 99.9% | 增加资源,评估延后 |
|
||||||
|
| < 30% | < 99.9% | **暂停,启动应急预案** |
|
||||||
|
|
||||||
|
### 5.2 S2-C2 检查点决策(60%)
|
||||||
|
|
||||||
|
| 实际接管率 | 稳定性 | 决策 |
|
||||||
|
|------------|--------|------|
|
||||||
|
| >= 60% | >= 99.9% | S2完成,目标达成 |
|
||||||
|
| 50-60% | >= 99.9% | 触发补救措施并延长S2,不调整终验目标 |
|
||||||
|
| 40-50% | >= 99.9% | 延长S2 4周并冻结升波 |
|
||||||
|
| < 40% | < 99.9% | 回退到40%阶段,重建 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 6. 应急预案
|
||||||
|
|
||||||
|
### 6.1 预案级别
|
||||||
|
|
||||||
|
| 级别 | 触发条件 | 动作 |
|
||||||
|
|------|----------|------|
|
||||||
|
| 黄色预警 | 任一Gate未达标 | 增加资源/延长周期 |
|
||||||
|
| 橙色预警 | 连续两个Gate未达标 | 调整目标 |
|
||||||
|
| 红色预警 | 关键技术问题 | 回退到subapi |
|
||||||
|
|
||||||
|
### 6.2 回退机制
|
||||||
|
|
||||||
|
```
|
||||||
|
60%终验目标 ──未达标──▶ 延长S2并补救 ──仍未达标──▶ 回退40%阶段重建
|
||||||
|
│ │ │
|
||||||
|
▼ ▼ ▼
|
||||||
|
不改目标 保持双轨运行 保持双轨运行
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 7. 过程预警提案(不改变终验口径)
|
||||||
|
|
||||||
|
基于Buffer策略,建议:
|
||||||
|
|
||||||
|
| 指标 | 终验目标 | 过程预警区间 | 理由 |
|
||||||
|
|------|----------|--------------|------|
|
||||||
|
| 全供应商接管率 | **>= 60%** | 40%-60%(用于过程控制) | 预留调整空间但不改变终验 |
|
||||||
|
| 国内供应商接管率 | **= 100%** | =100%(全程硬约束) | 核心差异化目标 |
|
||||||
|
| 止损阈值 | - | <40% | 触发回退与重建 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
**文档状态**:S2 Buffer策略设计
|
||||||
|
**关联文档**:
|
||||||
|
- `llm_gateway_subapi_evolution_plan_v4_1_2026-03-18.md`
|
||||||
|
- `s2_staged_verification_mechanism_v1_2026-03-18.md`
|
||||||
399
docs/security_api_key_vulnerability_analysis_v1_2026-03-18.md
Normal file
399
docs/security_api_key_vulnerability_analysis_v1_2026-03-18.md
Normal file
@@ -0,0 +1,399 @@
|
|||||||
|
# 安全漏洞:Subapi API Key 跨部署验证问题
|
||||||
|
|
||||||
|
> 发现时间:2026-03-18
|
||||||
|
> 漏洞等级:**严重(P0)**
|
||||||
|
> 状态:历史漏洞分析文档(用于复盘),不作为当前实现基线。
|
||||||
|
> 实施基线:`security_solution_v1_2026-03-18.md`(HMAC-SHA256 方案)。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 1. 漏洞描述
|
||||||
|
|
||||||
|
### 1.1 问题现象
|
||||||
|
|
||||||
|
Subapi 分发给用户的 API Key 和激活码只验证算法正确性,未验证 Key 是否由当前系统生成。
|
||||||
|
|
||||||
|
这意味着:
|
||||||
|
- 部署在 A 服务器的 Subapi 生成的 API Key,可以在部署在 B 服务器的 Subapi 中通过验证
|
||||||
|
- 不同独立部署之间的 API Key 可以互相串用
|
||||||
|
|
||||||
|
### 1.2 漏洞原理
|
||||||
|
|
||||||
|
```python
|
||||||
|
# Subapi 当前的验证逻辑(推测)
|
||||||
|
def verify_api_key(key):
|
||||||
|
# 只验证格式和算法
|
||||||
|
if validate_format(key) and validate_checksum(key):
|
||||||
|
return True # 通过验证
|
||||||
|
return False
|
||||||
|
|
||||||
|
# 问题:没有验证 Key 的来源(哪个部署生成的)
|
||||||
|
```
|
||||||
|
|
||||||
|
### 1.3 影响范围
|
||||||
|
|
||||||
|
| 场景 | 影响 |
|
||||||
|
|------|------|
|
||||||
|
| 平台间串用 | 用户的 Key 可能在其他平台也能用 |
|
||||||
|
| 账号盗用 | 窃取的 Key 可以在任意部署使用 |
|
||||||
|
| 收益损失 | 供应方的配额可能被其他平台盗用 |
|
||||||
|
| 账务错误 | 调用记录和计费可能记到错误平台 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 2. 漏洞影响我们的规划
|
||||||
|
|
||||||
|
### 2.1 如果集成 Subapi
|
||||||
|
|
||||||
|
- 我们的用户可能使用其他 Subapi 部署生成的 Key
|
||||||
|
- 我们的计费可能被绕过
|
||||||
|
- 供应方的收益可能被截取
|
||||||
|
|
||||||
|
### 2.2 解决方案
|
||||||
|
|
||||||
|
**方案 A:自建 API Key 体系(推荐)**
|
||||||
|
|
||||||
|
```python
|
||||||
|
# 我们的 API Key 设计
|
||||||
|
def generate_api_key(user_id, platform_id):
|
||||||
|
# Key 结构:{platform_prefix}{version}{user_hash}{checksum}
|
||||||
|
# platform_prefix: 我们的平台标识(如 "LGW")
|
||||||
|
# user_hash: 用户ID的哈希
|
||||||
|
# checksum: CRC32/MD5 校验
|
||||||
|
|
||||||
|
key = f"lgw_{version}_{user_hash}_{checksum}"
|
||||||
|
return key
|
||||||
|
|
||||||
|
def verify_api_key(key):
|
||||||
|
# 1. 验证格式
|
||||||
|
# 2. 验证平台标识(我们的平台)
|
||||||
|
# 3. 验证校验和
|
||||||
|
# 4. 验证是否在我们的数据库中
|
||||||
|
|
||||||
|
if not key.startswith("lgw_"):
|
||||||
|
return False # 不是我们的 Key
|
||||||
|
|
||||||
|
# 继续验证...
|
||||||
|
return True
|
||||||
|
```
|
||||||
|
|
||||||
|
**方案 B:使用 Token 代替 API Key**
|
||||||
|
|
||||||
|
- 不直接传递 API Key
|
||||||
|
- 使用 OAuth 2.0 风格的 Access Token
|
||||||
|
- Token 绑定到具体部署,无法跨部署使用
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 3. 我们的 API Key 设计规范
|
||||||
|
|
||||||
|
### 3.1 Key 结构
|
||||||
|
|
||||||
|
```
|
||||||
|
{LGW}-{版本}-{用户哈希}-{时间戳}-{随机数}-{校验和}
|
||||||
|
|
||||||
|
示例:
|
||||||
|
lgw-v1-u7f3a2b1-t1700000000-r8f3a2-e9d4c1b2
|
||||||
|
```
|
||||||
|
|
||||||
|
| 字段 | 说明 | 长度 |
|
||||||
|
|------|------|------|
|
||||||
|
| LGW | 平台标识 | 3 |
|
||||||
|
| v1 | 版本号 | 2 |
|
||||||
|
| u7f3a2b1 | 用户哈希 | 8 |
|
||||||
|
| t1700000000 | 时间戳 | 10 |
|
||||||
|
| r8f3a2 | 随机数 | 6 |
|
||||||
|
| e9d4c1b2 | 校验和 | 8 |
|
||||||
|
|
||||||
|
### 3.2 验证流程
|
||||||
|
|
||||||
|
```
|
||||||
|
收到 API Key 请求
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
┌─────────────────┐
|
||||||
|
│ 1. 格式验证 │ ──▶ 格式错误 → 400
|
||||||
|
└────────┬────────┘
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
┌─────────────────┐
|
||||||
|
│ 2. 平台标识 │ ──▶ 不是 "lgw-" → 401
|
||||||
|
└────────┬────────┘
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
┌─────────────────┐
|
||||||
|
│ 3. 校验和验证 │ ──▶ 校验失败 → 401
|
||||||
|
└────────┬────────┘
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
┌─────────────────┐
|
||||||
|
│ 4. 数据库验证 │ ──▶ Key 不存在/已禁用 → 401
|
||||||
|
└────────┬────────┘
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
┌─────────────────┐
|
||||||
|
│ 5. 权限验证 │ ──▶ 无权限 → 403
|
||||||
|
└────────┬────────┘
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
验证通过
|
||||||
|
```
|
||||||
|
|
||||||
|
### 3.3 激活码设计
|
||||||
|
|
||||||
|
```
|
||||||
|
{LGW}-{类型}-{用户ID}-{过期时间}-{随机数}-{校验和}
|
||||||
|
|
||||||
|
示例:
|
||||||
|
lgw-act-1000-20260331-r8f3a2-e9d4c1b2
|
||||||
|
```
|
||||||
|
|
||||||
|
| 字段 | 说明 |
|
||||||
|
|------|------|
|
||||||
|
| lgw | 平台标识 |
|
||||||
|
| act | 激活码类型 |
|
||||||
|
| 1000 | 用户ID |
|
||||||
|
| 20260331 | 过期日期 |
|
||||||
|
| r8f3a2 | 随机数 |
|
||||||
|
| e9d4c1b2 | 校验和 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 4. 技术实现
|
||||||
|
|
||||||
|
### 4.1 Key 生成服务
|
||||||
|
|
||||||
|
```python
|
||||||
|
import hashlib
|
||||||
|
import secrets
|
||||||
|
import time
|
||||||
|
|
||||||
|
class APIKeyGenerator:
|
||||||
|
PLATFORM_PREFIX = "lgw"
|
||||||
|
VERSION = "v1"
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def generate(cls, user_id: int) -> str:
|
||||||
|
# 用户哈希(8位)
|
||||||
|
user_hash = hashlib.md5(str(user_id).encode()).hexdigest()[:8]
|
||||||
|
|
||||||
|
# 时间戳(10位)
|
||||||
|
timestamp = str(int(time.time()))
|
||||||
|
|
||||||
|
# 随机数(6位)
|
||||||
|
random = secrets.token_hex(3)[:6]
|
||||||
|
|
||||||
|
# 组合
|
||||||
|
raw = f"{cls.PLATFORM_PREFIX}-{cls.VERSION}-{user_hash}-{timestamp}-{random}"
|
||||||
|
|
||||||
|
# 校验和(8位)
|
||||||
|
checksum = hashlib.md5(raw.encode()).hexdigest()[:8]
|
||||||
|
|
||||||
|
return f"{raw}-{checksum}"
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def verify(cls, key: str) -> bool:
|
||||||
|
# 1. 格式验证
|
||||||
|
parts = key.split("-")
|
||||||
|
if len(parts) != 6:
|
||||||
|
return False
|
||||||
|
|
||||||
|
# 2. 平台标识验证
|
||||||
|
if parts[0] != cls.PLATFORM_PREFIX:
|
||||||
|
return False
|
||||||
|
|
||||||
|
# 3. 校验和验证
|
||||||
|
raw = "-".join(parts[:5])
|
||||||
|
expected_checksum = hashlib.md5(raw.encode()).hexdigest()[:8]
|
||||||
|
if parts[5] != expected_checksum:
|
||||||
|
return False
|
||||||
|
|
||||||
|
# 4. 数据库验证(在 Controller 中实现)
|
||||||
|
return True
|
||||||
|
```
|
||||||
|
|
||||||
|
### 4.2 激活码生成服务
|
||||||
|
|
||||||
|
```python
|
||||||
|
class ActivationCodeGenerator:
|
||||||
|
PLATFORM_PREFIX = "lgw"
|
||||||
|
CODE_TYPE = "act"
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def generate(cls, user_id: int, expiry_days: int) -> str:
|
||||||
|
# 计算过期日期
|
||||||
|
expiry = datetime.now() + timedelta(days=expiry_days)
|
||||||
|
expiry_str = expiry.strftime("%Y%m%d")
|
||||||
|
|
||||||
|
# 随机数
|
||||||
|
random = secrets.token_hex(3)[:6]
|
||||||
|
|
||||||
|
# 组合
|
||||||
|
raw = f"{cls.PLATFORM_PREFIX}-{cls.CODE_TYPE}-{user_id}-{expiry_str}-{random}"
|
||||||
|
|
||||||
|
# 校验和
|
||||||
|
checksum = hashlib.md5(raw.encode()).hexdigest()[:8]
|
||||||
|
|
||||||
|
return f"{raw}-{checksum}"
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 5. 数据库设计
|
||||||
|
|
||||||
|
```sql
|
||||||
|
-- API Keys 表
|
||||||
|
CREATE TABLE api_keys (
|
||||||
|
id BIGINT PRIMARY KEY AUTO_INCREMENT,
|
||||||
|
user_id BIGINT NOT NULL,
|
||||||
|
key_hash VARCHAR(64) NOT NULL UNIQUE COMMENT 'Key 的哈希(用于查询)',
|
||||||
|
key_prefix VARCHAR(20) NOT NULL COMMENT 'Key 前缀(用于展示)',
|
||||||
|
|
||||||
|
-- 绑定信息
|
||||||
|
team_id BIGINT,
|
||||||
|
organization_id BIGINT,
|
||||||
|
|
||||||
|
-- 权限
|
||||||
|
permissions JSON COMMENT '权限列表',
|
||||||
|
allowed_models JSON COMMENT '允许的模型列表',
|
||||||
|
allowed_ips JSON COMMENT 'IP 白名单',
|
||||||
|
|
||||||
|
-- 限制
|
||||||
|
rate_limit_rpm INT DEFAULT 60,
|
||||||
|
rate_limit_tpm INT DEFAULT 100000,
|
||||||
|
max_concurrent INT DEFAULT 10,
|
||||||
|
|
||||||
|
-- 状态
|
||||||
|
status VARCHAR(20) DEFAULT 'active' COMMENT 'active/disabled/expired',
|
||||||
|
|
||||||
|
-- 时间
|
||||||
|
expires_at TIMESTAMP,
|
||||||
|
last_used_at TIMESTAMP,
|
||||||
|
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
|
||||||
|
|
||||||
|
-- 审计
|
||||||
|
created_by BIGINT,
|
||||||
|
ip_address VARCHAR(45),
|
||||||
|
description VARCHAR(200),
|
||||||
|
|
||||||
|
INDEX idx_user_id (user_id),
|
||||||
|
INDEX idx_key_hash (key_hash),
|
||||||
|
INDEX idx_status (status),
|
||||||
|
INDEX idx_expires_at (expires_at)
|
||||||
|
) COMMENT 'API Keys 表';
|
||||||
|
|
||||||
|
-- 激活码表
|
||||||
|
CREATE TABLE activation_codes (
|
||||||
|
id BIGINT PRIMARY KEY AUTO_INCREMENT,
|
||||||
|
code_hash VARCHAR(64) NOT NULL UNIQUE COMMENT '激活码哈希',
|
||||||
|
code_prefix VARCHAR(20) NOT NULL COMMENT '激活码前缀',
|
||||||
|
|
||||||
|
-- 绑定信息
|
||||||
|
user_id BIGINT NOT NULL,
|
||||||
|
target_type VARCHAR(20) COMMENT '激活目标类型: subscription/package',
|
||||||
|
target_id BIGINT COMMENT '激活目标ID',
|
||||||
|
|
||||||
|
-- 状态
|
||||||
|
status VARCHAR(20) DEFAULT 'unused' COMMENT 'unused/used/expired',
|
||||||
|
used_at TIMESTAMP,
|
||||||
|
used_by BIGINT COMMENT '使用者ID',
|
||||||
|
|
||||||
|
-- 时间
|
||||||
|
expires_at TIMESTAMP NOT NULL,
|
||||||
|
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
|
||||||
|
INDEX idx_code_hash (code_hash),
|
||||||
|
INDEX idx_user_id (user_id),
|
||||||
|
INDEX idx_status (status),
|
||||||
|
INDEX idx_expires_at (expires_at)
|
||||||
|
) COMMENT '激活码表';
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 6. 与 Subapi 集成时的处理
|
||||||
|
|
||||||
|
### 6.1 方案:我们的 Gateway 作为唯一入口
|
||||||
|
|
||||||
|
```
|
||||||
|
用户请求
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
我们的 Gateway(验证 Key 来源)
|
||||||
|
│
|
||||||
|
├── 我们的 Key → 处理
|
||||||
|
│
|
||||||
|
└── Subapi 格式的 Key → 拒绝或转发到 Subapi
|
||||||
|
```
|
||||||
|
|
||||||
|
### 6.2 API Key 识别逻辑
|
||||||
|
|
||||||
|
```python
|
||||||
|
def identify_key_type(key: str) -> str:
|
||||||
|
if key.startswith("lgw-"):
|
||||||
|
return "own" # 我们的 Key
|
||||||
|
elif key.startswith("sk-"):
|
||||||
|
return "openai" # OpenAI 原始 Key
|
||||||
|
else:
|
||||||
|
return "unknown" # 未知类型
|
||||||
|
```
|
||||||
|
|
||||||
|
### 6.3 流量分离
|
||||||
|
|
||||||
|
| Key 类型 | 处理方式 |
|
||||||
|
|----------|----------|
|
||||||
|
| `lgw-` 开头 | 我们的 Gateway 处理 |
|
||||||
|
| `sk-` 开头 | 直接转发到对应供应商 |
|
||||||
|
| 其他 Subapi 格式 | 转发到 Subapi(如果有集成) |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 7. 风险评估与缓解
|
||||||
|
|
||||||
|
### 7.1 风险评估
|
||||||
|
|
||||||
|
| 风险 | 影响 | 可能性 | 严重性 |
|
||||||
|
|------|------|--------|--------|
|
||||||
|
| Subapi Key 串用 | 计费损失/账号盗用 | 高 | 严重 |
|
||||||
|
| 激活码伪造 | 权益被盗用 | 中 | 高 |
|
||||||
|
| Key 泄露 | 未授权使用 | 高 | 高 |
|
||||||
|
|
||||||
|
### 7.2 缓解措施
|
||||||
|
|
||||||
|
1. **强制 Key 来源验证**
|
||||||
|
- 所有 Key 必须包含平台标识
|
||||||
|
- 验证时必须查询数据库
|
||||||
|
|
||||||
|
2. **Key 轮换**
|
||||||
|
- 定期轮换 Key
|
||||||
|
- 用户可手动轮换
|
||||||
|
|
||||||
|
3. **使用监控**
|
||||||
|
- 记录 Key 使用情况
|
||||||
|
- 异常使用告警
|
||||||
|
|
||||||
|
4. **IP 限制**
|
||||||
|
- 支持 IP 白名单
|
||||||
|
- 异常 IP 告警
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 8. 结论
|
||||||
|
|
||||||
|
1. **Subapi 存在严重安全漏洞**:API Key 不验证来源,可在任意部署使用
|
||||||
|
|
||||||
|
2. **我们的系统必须自建 Key 体系**:
|
||||||
|
- Key 必须包含平台标识
|
||||||
|
- 必须数据库验证
|
||||||
|
- 必须防伪造
|
||||||
|
|
||||||
|
3. **集成时流量分离**:
|
||||||
|
- 我们的 Key 由我们处理
|
||||||
|
- Subapi Key 转发到 Subapi
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
**文档状态**:安全漏洞分析
|
||||||
|
**关联文档**:
|
||||||
|
- `supply_detailed_design_v1_2026-03-18.md`
|
||||||
666
docs/security_solution_v1_2026-03-18.md
Normal file
666
docs/security_solution_v1_2026-03-18.md
Normal file
@@ -0,0 +1,666 @@
|
|||||||
|
# 安全解决方案(P0问题修复)
|
||||||
|
|
||||||
|
> 版本:v1.0
|
||||||
|
> 日期:2026-03-18
|
||||||
|
> 目的:系统性解决评审发现的安全P0问题
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 1. 计费数据防篡改机制
|
||||||
|
|
||||||
|
### 1.1 当前问题
|
||||||
|
|
||||||
|
- 只有 usage_records 表,缺乏完整性校验
|
||||||
|
- 无防篡改审计日志
|
||||||
|
- 无法追溯数据变更
|
||||||
|
|
||||||
|
### 1.2 解决方案
|
||||||
|
|
||||||
|
#### 1.2.1 双重记账设计
|
||||||
|
|
||||||
|
```python
|
||||||
|
# 双重记账:借方和贷方必须平衡
|
||||||
|
class DoubleEntryBilling:
|
||||||
|
def record_billing(self, transaction: Transaction):
|
||||||
|
# 1. 借方:用户账户余额
|
||||||
|
self.debit(
|
||||||
|
account_type='user_balance',
|
||||||
|
account_id=transaction.user_id,
|
||||||
|
amount=transaction.amount,
|
||||||
|
currency=transaction.currency
|
||||||
|
)
|
||||||
|
|
||||||
|
# 2. 贷方:收入账户
|
||||||
|
self.credit(
|
||||||
|
account_type='revenue',
|
||||||
|
account_id='platform_revenue',
|
||||||
|
amount=transaction.amount,
|
||||||
|
currency=transaction.currency
|
||||||
|
)
|
||||||
|
|
||||||
|
# 3. 验证平衡
|
||||||
|
assert self.get_balance('user', transaction.user_id) + \
|
||||||
|
self.get_balance('revenue', 'platform_revenue') == 0
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 1.2.2 审计日志表
|
||||||
|
|
||||||
|
```sql
|
||||||
|
-- PostgreSQL 版本:计费审计日志表
|
||||||
|
CREATE TABLE IF NOT EXISTS billing_audit_log (
|
||||||
|
id BIGSERIAL PRIMARY KEY,
|
||||||
|
record_id BIGINT NOT NULL,
|
||||||
|
table_name VARCHAR(50) NOT NULL,
|
||||||
|
operation VARCHAR(20) NOT NULL,
|
||||||
|
old_value JSONB,
|
||||||
|
new_value JSONB,
|
||||||
|
operator_id BIGINT NOT NULL,
|
||||||
|
operator_ip INET,
|
||||||
|
operator_role VARCHAR(50),
|
||||||
|
request_id VARCHAR(64),
|
||||||
|
record_hash CHAR(64) NOT NULL,
|
||||||
|
previous_hash CHAR(64),
|
||||||
|
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_billing_audit_log_record_id
|
||||||
|
ON billing_audit_log (record_id);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_billing_audit_log_operator_id
|
||||||
|
ON billing_audit_log (operator_id);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_billing_audit_log_created_at
|
||||||
|
ON billing_audit_log (created_at DESC);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_billing_audit_log_request_id
|
||||||
|
ON billing_audit_log (request_id);
|
||||||
|
|
||||||
|
-- PostgreSQL 触发器:自动记录变更(示例)
|
||||||
|
CREATE OR REPLACE FUNCTION fn_audit_supply_usage_update()
|
||||||
|
RETURNS trigger
|
||||||
|
LANGUAGE plpgsql
|
||||||
|
AS $$
|
||||||
|
DECLARE
|
||||||
|
v_prev_hash CHAR(64);
|
||||||
|
BEGIN
|
||||||
|
SELECT record_hash
|
||||||
|
INTO v_prev_hash
|
||||||
|
FROM billing_audit_log
|
||||||
|
WHERE record_id = OLD.id
|
||||||
|
ORDER BY id DESC
|
||||||
|
LIMIT 1;
|
||||||
|
|
||||||
|
INSERT INTO billing_audit_log (
|
||||||
|
record_id,
|
||||||
|
table_name,
|
||||||
|
operation,
|
||||||
|
old_value,
|
||||||
|
new_value,
|
||||||
|
operator_id,
|
||||||
|
operator_ip,
|
||||||
|
operator_role,
|
||||||
|
request_id,
|
||||||
|
record_hash,
|
||||||
|
previous_hash
|
||||||
|
)
|
||||||
|
VALUES (
|
||||||
|
OLD.id,
|
||||||
|
'supply_usage_records',
|
||||||
|
'UPDATE',
|
||||||
|
to_jsonb(OLD),
|
||||||
|
to_jsonb(NEW),
|
||||||
|
COALESCE(NULLIF(current_setting('app.operator_id', true), ''), '0')::BIGINT,
|
||||||
|
NULLIF(current_setting('app.operator_ip', true), '')::INET,
|
||||||
|
NULLIF(current_setting('app.operator_role', true), ''),
|
||||||
|
NULLIF(current_setting('app.request_id', true), ''),
|
||||||
|
encode(digest(to_jsonb(NEW)::text, 'sha256'), 'hex'),
|
||||||
|
v_prev_hash
|
||||||
|
);
|
||||||
|
|
||||||
|
RETURN NEW;
|
||||||
|
END;
|
||||||
|
$$;
|
||||||
|
|
||||||
|
DROP TRIGGER IF EXISTS trg_usage_before_update ON supply_usage_records;
|
||||||
|
CREATE TRIGGER trg_usage_before_update
|
||||||
|
BEFORE UPDATE ON supply_usage_records
|
||||||
|
FOR EACH ROW
|
||||||
|
EXECUTE FUNCTION fn_audit_supply_usage_update();
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 1.2.3 实时对账机制
|
||||||
|
|
||||||
|
```python
|
||||||
|
class BillingReconciliation:
|
||||||
|
def hourly_reconciliation(self):
|
||||||
|
"""小时级对账"""
|
||||||
|
# 1. 获取计费记录
|
||||||
|
billing_records = self.get_billing_records(
|
||||||
|
start_time=self.hour_ago,
|
||||||
|
end_time=datetime.now()
|
||||||
|
)
|
||||||
|
|
||||||
|
# 2. 获取用户消费记录
|
||||||
|
usage_records = self.get_usage_records(
|
||||||
|
start_time=self.hour_ago,
|
||||||
|
end_time=datetime.now()
|
||||||
|
)
|
||||||
|
|
||||||
|
# 3. 比对
|
||||||
|
discrepancies = []
|
||||||
|
for billing, usage in zip(billing_records, usage_records):
|
||||||
|
if not self.is_match(billing, usage):
|
||||||
|
discrepancies.append({
|
||||||
|
'billing_id': billing.id,
|
||||||
|
'usage_id': usage.id,
|
||||||
|
'difference': billing.amount - usage.amount
|
||||||
|
})
|
||||||
|
|
||||||
|
# 4. 告警
|
||||||
|
if discrepancies:
|
||||||
|
self.send_alert('billing_discrepancy', discrepancies)
|
||||||
|
|
||||||
|
def real_time_verification(self):
|
||||||
|
"""实时验证(请求级别)"""
|
||||||
|
# 每个请求完成后立即验证
|
||||||
|
request = self.get_current_request()
|
||||||
|
expected_cost = self.calculate_cost(request.usage)
|
||||||
|
actual_cost = self.get_billing_record(request.id).amount
|
||||||
|
|
||||||
|
# 允许0.1%误差
|
||||||
|
if abs(expected_cost - actual_cost) > expected_cost * 0.001:
|
||||||
|
raise BillingAccuracyError(f"计费差异: {expected_cost} vs {actual_cost}")
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 2. 跨租户隔离强化
|
||||||
|
|
||||||
|
### 2.1 当前问题
|
||||||
|
|
||||||
|
- team_id 和 organization_id 字段存在
|
||||||
|
- 但缺乏强制验证和行级安全
|
||||||
|
|
||||||
|
### 2.2 解决方案
|
||||||
|
|
||||||
|
#### 2.2.1 强制租户上下文验证
|
||||||
|
|
||||||
|
```python
|
||||||
|
class TenantContextMiddleware:
|
||||||
|
def process_request(self, request):
|
||||||
|
# 1. 从Token提取租户ID
|
||||||
|
tenant_id = self.extract_tenant_id(request.token)
|
||||||
|
|
||||||
|
# 2. 从URL/Header强制验证
|
||||||
|
if request.tenant_id and request.tenant_id != tenant_id:
|
||||||
|
raise TenantMismatchError()
|
||||||
|
|
||||||
|
# 3. 强制设置租户上下文
|
||||||
|
request.tenant_id = tenant_id
|
||||||
|
|
||||||
|
# 4. 存储到请求上下文
|
||||||
|
self.set_context('tenant_id', tenant_id)
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 2.2.2 数据库行级安全(RLS)
|
||||||
|
|
||||||
|
```sql
|
||||||
|
-- 启用行级安全
|
||||||
|
ALTER TABLE api_keys ENABLE ROW LEVEL SECURITY;
|
||||||
|
|
||||||
|
-- 创建策略:用户只能访问自己的Key
|
||||||
|
CREATE POLICY api_keys_tenant_isolation
|
||||||
|
ON api_keys
|
||||||
|
FOR ALL
|
||||||
|
USING (tenant_id = current_setting('app.tenant_id')::BIGINT);
|
||||||
|
|
||||||
|
-- 对所有敏感表启用RLS
|
||||||
|
ALTER TABLE billing_records ENABLE ROW LEVEL SECURITY;
|
||||||
|
ALTER TABLE usage_records ENABLE ROW LEVEL SECURITY;
|
||||||
|
ALTER TABLE team_members ENABLE ROW LEVEL SECURITY;
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 2.2.3 敏感操作二次验证
|
||||||
|
|
||||||
|
```python
|
||||||
|
class SensitiveOperationGuard:
|
||||||
|
# 需要二次验证的操作
|
||||||
|
SENSITIVE_ACTIONS = [
|
||||||
|
'billing.write', # 写账单
|
||||||
|
'admin.tenant_write', # 租户管理
|
||||||
|
'provider.withdraw', # 供应方提现
|
||||||
|
]
|
||||||
|
|
||||||
|
def verify(self, user_id, action, context):
|
||||||
|
if action not in self.SENSITIVE_ACTIONS:
|
||||||
|
return True
|
||||||
|
|
||||||
|
# 1. 检查用户权限级别
|
||||||
|
user = self.get_user(user_id)
|
||||||
|
if user.role == 'admin':
|
||||||
|
return True
|
||||||
|
|
||||||
|
# 2. 检查是否需要二次验证
|
||||||
|
if self.requires_mfa(action, context):
|
||||||
|
# 发送验证码
|
||||||
|
self.send_verification_code(user)
|
||||||
|
return False
|
||||||
|
|
||||||
|
# 3. 记录审计日志
|
||||||
|
self.audit_log(user_id, action, context)
|
||||||
|
|
||||||
|
return True
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 3. 密钥轮换机制
|
||||||
|
|
||||||
|
### 3.1 当前问题
|
||||||
|
|
||||||
|
- API Key 无失效机制
|
||||||
|
- 无法强制轮换
|
||||||
|
- 无生命周期管理
|
||||||
|
|
||||||
|
### 3.2 解决方案
|
||||||
|
|
||||||
|
#### 3.2.1 密钥生命周期管理
|
||||||
|
|
||||||
|
```python
|
||||||
|
class APIKeyLifecycle:
|
||||||
|
# 配置
|
||||||
|
KEY_EXPIRY_DAYS = 90 # 有效期90天
|
||||||
|
WARNING_DAYS = 14 # 提前14天提醒
|
||||||
|
GRACE_PERIOD_DAYS = 7 # 宽限期7天
|
||||||
|
MAX_KEYS_PER_USER = 10 # 每个用户最多10个Key
|
||||||
|
|
||||||
|
def generate_key(self, user_id, description) -> APIKey:
|
||||||
|
# 1. 检查Key数量限制
|
||||||
|
current_keys = self.count_user_keys(user_id)
|
||||||
|
if current_keys >= self.MAX_KEYS_PER_USER:
|
||||||
|
raise MaxKeysExceededError()
|
||||||
|
|
||||||
|
# 2. 生成Key
|
||||||
|
key = self._generate_key_string()
|
||||||
|
|
||||||
|
# 3. 存储Key信息
|
||||||
|
api_key = APIKey(
|
||||||
|
key_hash=self.hash(key),
|
||||||
|
key_prefix=key[:12], # 显示前缀
|
||||||
|
user_id=user_id,
|
||||||
|
description=description,
|
||||||
|
expires_at=datetime.now() + timedelta(days=self.KEY_EXPIRY_DAYS),
|
||||||
|
created_at=datetime.now(),
|
||||||
|
status='active',
|
||||||
|
version=1
|
||||||
|
)
|
||||||
|
|
||||||
|
# 4. 保存到数据库
|
||||||
|
self.save(api_key)
|
||||||
|
|
||||||
|
return api_key
|
||||||
|
|
||||||
|
def is_key_valid(self, key: APIKey) -> ValidationResult:
|
||||||
|
# 1. 检查状态
|
||||||
|
if key.status == 'disabled':
|
||||||
|
return ValidationResult(False, 'Key is disabled')
|
||||||
|
|
||||||
|
if key.status == 'expired':
|
||||||
|
return ValidationResult(False, 'Key is expired')
|
||||||
|
|
||||||
|
# 2. 检查是否过期
|
||||||
|
if key.expires_at and key.expires_at < datetime.now():
|
||||||
|
# 检查是否在宽限期
|
||||||
|
if key.expires_at > datetime.now() - timedelta(days=self.GRACE_PERIOD_DAYS):
|
||||||
|
# 在宽限期,提醒但不拒绝
|
||||||
|
return ValidationResult(True, 'Key expiring soon', warning=True)
|
||||||
|
return ValidationResult(False, 'Key expired')
|
||||||
|
|
||||||
|
# 3. 检查是否需要轮换提醒
|
||||||
|
days_until_expiry = (key.expires_at - datetime.now()).days
|
||||||
|
if days_until_expiry <= self.WARNING_DAYS:
|
||||||
|
# 异步通知用户
|
||||||
|
self.notify_key_expiring(key, days_until_expiry)
|
||||||
|
|
||||||
|
return ValidationResult(True, 'Valid')
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 3.2.2 密钥泄露应急处理
|
||||||
|
|
||||||
|
```python
|
||||||
|
class KeyCompromiseHandler:
|
||||||
|
def report_compromised(self, key_id, reporter_id):
|
||||||
|
"""报告Key泄露"""
|
||||||
|
# 1. 立即禁用Key
|
||||||
|
key = self.get_key(key_id)
|
||||||
|
key.status = 'compromised'
|
||||||
|
key.disabled_at = datetime.now()
|
||||||
|
key.disabled_by = reporter_id
|
||||||
|
self.save(key)
|
||||||
|
|
||||||
|
# 2. 通知用户
|
||||||
|
user = self.get_user(key.user_id)
|
||||||
|
self.send_notification(user, 'key_compromised', {
|
||||||
|
'key_id': key_id,
|
||||||
|
'reported_at': datetime.now()
|
||||||
|
})
|
||||||
|
|
||||||
|
# 3. 记录审计日志
|
||||||
|
self.audit_log('key_compromised', {
|
||||||
|
'key_id': key_id,
|
||||||
|
'reported_by': reporter_id,
|
||||||
|
'action': 'disabled'
|
||||||
|
})
|
||||||
|
|
||||||
|
# 4. 自动创建新Key(可选)
|
||||||
|
new_key = self.generate_key(key.user_id, 'Auto-generated replacement')
|
||||||
|
return new_key
|
||||||
|
|
||||||
|
def rotate_key(self, key_id):
|
||||||
|
"""主动轮换Key"""
|
||||||
|
old_key = self.get_key(key_id)
|
||||||
|
|
||||||
|
# 1. 创建新Key
|
||||||
|
new_key = self.generate_key(
|
||||||
|
old_key.user_id,
|
||||||
|
f"Rotation of {old_key.description}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# 2. 标记旧Key为轮换
|
||||||
|
old_key.status = 'rotated'
|
||||||
|
old_key.rotated_at = datetime.now()
|
||||||
|
old_key.replaced_by = new_key.id
|
||||||
|
self.save(old_key)
|
||||||
|
|
||||||
|
return new_key
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 4. 激活码安全强化
|
||||||
|
|
||||||
|
### 4.1 当前问题
|
||||||
|
|
||||||
|
- 6位随机数entropy不足
|
||||||
|
- MD5校验和可碰撞
|
||||||
|
|
||||||
|
### 4.2 解决方案
|
||||||
|
|
||||||
|
```python
|
||||||
|
import secrets
|
||||||
|
import hashlib
|
||||||
|
import hmac
|
||||||
|
|
||||||
|
class SecureActivationCode:
|
||||||
|
def generate(self, user_id: int, expiry_days: int) -> str:
|
||||||
|
# 1. 使用 crypto.random 替代 random
|
||||||
|
# 16字节 = 128位 entropy
|
||||||
|
random_bytes = secrets.token_bytes(16)
|
||||||
|
random_hex = random_bytes.hex()
|
||||||
|
|
||||||
|
# 2. 使用 HMAC-SHA256 替代 MD5
|
||||||
|
expiry = datetime.now() + timedelta(days=expiry_days)
|
||||||
|
expiry_str = expiry.strftime("%Y%m%d")
|
||||||
|
|
||||||
|
# 3. 构建原始字符串
|
||||||
|
raw = f"lgw-act-{user_id}-{expiry_str}-{random_hex}"
|
||||||
|
|
||||||
|
# 4. HMAC 签名(使用应用密钥)
|
||||||
|
signature = hmac.new(
|
||||||
|
self.secret_key.encode(),
|
||||||
|
raw.encode(),
|
||||||
|
hashlib.sha256
|
||||||
|
).hexdigest()[:16]
|
||||||
|
|
||||||
|
return f"{raw}-{signature}"
|
||||||
|
|
||||||
|
def verify(self, code: str) -> VerificationResult:
|
||||||
|
parts = code.split('-')
|
||||||
|
if len(parts) != 6:
|
||||||
|
return VerificationResult(False, 'Invalid format')
|
||||||
|
|
||||||
|
# 1. 解析各部分
|
||||||
|
_, _, user_id, expiry_str, random_hex, signature = parts
|
||||||
|
|
||||||
|
# 2. 验证签名
|
||||||
|
raw = f"lgw-act-{user_id}-{expiry_str}-{random_hex}"
|
||||||
|
expected_signature = hmac.new(
|
||||||
|
self.secret_key.encode(),
|
||||||
|
raw.encode(),
|
||||||
|
hashlib.sha256
|
||||||
|
).hexdigest()[:16]
|
||||||
|
|
||||||
|
if not hmac.compare_digest(signature, expected_signature):
|
||||||
|
return VerificationResult(False, 'Invalid signature')
|
||||||
|
|
||||||
|
# 3. 验证过期
|
||||||
|
expiry = datetime.strptime(expiry_str, "%Y%m%d")
|
||||||
|
if expiry < datetime.now():
|
||||||
|
return VerificationResult(False, 'Expired')
|
||||||
|
|
||||||
|
return VerificationResult(True, 'Valid', user_id=int(user_id))
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 4. DDoS防护机制
|
||||||
|
|
||||||
|
### 4.1 防护层级
|
||||||
|
|
||||||
|
```python
|
||||||
|
class DDoSProtection:
|
||||||
|
"""DDoS防护 - 修复S-D-01"""
|
||||||
|
|
||||||
|
# 三层防护
|
||||||
|
TIERS = [
|
||||||
|
{'name': 'L4', 'layer': 'tcp', 'method': 'syn_cookie'},
|
||||||
|
{'name': 'L7', 'layer': 'http', 'method': 'rate_limit'},
|
||||||
|
{'name': 'APP', 'layer': 'application', 'method': 'challenge'}
|
||||||
|
]
|
||||||
|
|
||||||
|
# 限流配置
|
||||||
|
RATE_LIMITS = {
|
||||||
|
'global': {'requests': 100000, 'window': 60},
|
||||||
|
'per_ip': {'requests': 1000, 'window': 60},
|
||||||
|
'per_token': {'requests': 100, 'window': 60},
|
||||||
|
'burst': {'requests': 50, 'window': 1}
|
||||||
|
}
|
||||||
|
|
||||||
|
# IP黑名单
|
||||||
|
def check_ip_blacklist(self, ip: str) -> bool:
|
||||||
|
"""检查IP是否在黑名单"""
|
||||||
|
return self.redis.sismember('ddos:blacklist', ip)
|
||||||
|
|
||||||
|
def add_to_blacklist(self, ip: str, reason: str, duration: int = 3600):
|
||||||
|
"""加入黑名单"""
|
||||||
|
self.redis.sadd('ddos:blacklist', ip)
|
||||||
|
self.redis.expire('ddos:blacklist', duration)
|
||||||
|
# 记录原因
|
||||||
|
self.redis.hset('ddos:blacklist:reasons', ip, json.dumps({
|
||||||
|
'reason': reason,
|
||||||
|
'added_at': datetime.now().isoformat()
|
||||||
|
}))
|
||||||
|
```
|
||||||
|
|
||||||
|
### 4.2 攻击检测
|
||||||
|
|
||||||
|
```python
|
||||||
|
class AttackDetector:
|
||||||
|
"""攻击检测"""
|
||||||
|
|
||||||
|
# 检测规则
|
||||||
|
RULES = {
|
||||||
|
'syn_flood': {'threshold': 1000, 'window': 10, 'action': 'block'},
|
||||||
|
'http_flood': {'threshold': 500, 'window': 60, 'action': 'rate_limit'},
|
||||||
|
'slowloris': {'threshold': 50, 'window': 60, 'action': 'block'},
|
||||||
|
'credential_stuffing': {'threshold': 100, 'window': 60, 'action': 'challenge'}
|
||||||
|
}
|
||||||
|
|
||||||
|
async def detect(self, metrics: AttackMetrics) -> DetectionResult:
|
||||||
|
"""检测攻击"""
|
||||||
|
for rule_name, rule in self.RULES.items():
|
||||||
|
if metrics.exceeds_threshold(rule):
|
||||||
|
return DetectionResult(
|
||||||
|
attack=True,
|
||||||
|
rule=rule_name,
|
||||||
|
action=rule['action'],
|
||||||
|
severity='HIGH' if rule['action'] == 'block' else 'MEDIUM'
|
||||||
|
)
|
||||||
|
return DetectionResult(attack=False)
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 5. 日志脱敏规则
|
||||||
|
|
||||||
|
### 5.1 脱敏字段定义
|
||||||
|
|
||||||
|
```python
|
||||||
|
class LogDesensitization:
|
||||||
|
"""日志脱敏 - 修复S-D-02"""
|
||||||
|
|
||||||
|
# 脱敏规则
|
||||||
|
RULES = {
|
||||||
|
'api_key': {
|
||||||
|
'pattern': r'(sk-[a-zA-Z0-9]{20,})',
|
||||||
|
'replacement': r'sk-***',
|
||||||
|
'level': 'SENSITIVE'
|
||||||
|
},
|
||||||
|
'password': {
|
||||||
|
'pattern': r'(password["\']?\s*[:=]\s*["\']?)([^"\']+)',
|
||||||
|
'replacement': r'\1***',
|
||||||
|
'level': 'SENSITIVE'
|
||||||
|
},
|
||||||
|
'email': {
|
||||||
|
'pattern': r'([a-zA-Z0-9._%+-]+)@([a-zA-Z0-9.-]+\.[a-zA-Z]{2,})',
|
||||||
|
'replacement': r'\1***@\2',
|
||||||
|
'level': 'PII'
|
||||||
|
},
|
||||||
|
'phone': {
|
||||||
|
'pattern': r'(1[3-9]\d)(\d{4})(\d{4})',
|
||||||
|
'replacement': r'\1****\3',
|
||||||
|
'level': 'PII'
|
||||||
|
},
|
||||||
|
'ip_address': {
|
||||||
|
'pattern': r'(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})',
|
||||||
|
'replacement': r'\1 (masked)',
|
||||||
|
'level': 'NETWORK'
|
||||||
|
},
|
||||||
|
'credit_card': {
|
||||||
|
'pattern': r'(\d{4}[\s-]?\d{4}[\s-]?\d{4}[\s-]?\d{4})',
|
||||||
|
'replacement': r'****-****-****-\4',
|
||||||
|
'level': 'SENSITIVE'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def desensitize(self, log: dict) -> dict:
|
||||||
|
"""脱敏处理"""
|
||||||
|
import re
|
||||||
|
result = {}
|
||||||
|
for key, value in log.items():
|
||||||
|
if isinstance(value, str):
|
||||||
|
result[key] = self._desensitize_value(value)
|
||||||
|
else:
|
||||||
|
result[key] = value
|
||||||
|
return result
|
||||||
|
```
|
||||||
|
|
||||||
|
### 5.2 日志级别
|
||||||
|
|
||||||
|
```python
|
||||||
|
class LogLevel:
|
||||||
|
"""日志级别"""
|
||||||
|
|
||||||
|
LEVELS = {
|
||||||
|
'DEBUG': {'mask': False, 'retention_days': 7},
|
||||||
|
'INFO': {'mask': False, 'retention_days': 30},
|
||||||
|
'WARNING': {'mask': False, 'retention_days': 90},
|
||||||
|
'ERROR': {'mask': False, 'retention_days': 365},
|
||||||
|
'SENSITIVE': {'mask': True, 'retention_days': 365} # 敏感日志必须脱敏
|
||||||
|
}
|
||||||
|
|
||||||
|
def should_mask(self, level: str) -> bool:
|
||||||
|
"""是否需要脱敏"""
|
||||||
|
return self.LEVELS.get(level, {}).get('mask', False)
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 6. 密钥定期轮换
|
||||||
|
|
||||||
|
### 6.1 定期轮换策略
|
||||||
|
|
||||||
|
```python
|
||||||
|
class KeyRotationScheduler:
|
||||||
|
"""密钥定期轮换 - 修复S-D-03"""
|
||||||
|
|
||||||
|
# 轮换配置
|
||||||
|
ROTATION_CONFIG = {
|
||||||
|
'api_key': {'days': 90, 'warning_days': 14},
|
||||||
|
'internal_key': {'days': 30, 'warning_days': 7},
|
||||||
|
'provider_key': {'days': 60, 'warning_days': 10}
|
||||||
|
}
|
||||||
|
|
||||||
|
async def schedule_rotation(self):
|
||||||
|
"""调度轮换"""
|
||||||
|
while True:
|
||||||
|
# 1. 查找需要轮换的Key
|
||||||
|
keys_due = await self.find_keys_due_for_rotation()
|
||||||
|
|
||||||
|
# 2. 发送提醒
|
||||||
|
for key in keys_due:
|
||||||
|
await self.send_rotation_warning(key)
|
||||||
|
|
||||||
|
# 3. 自动轮换(超过宽限期)
|
||||||
|
keys_expired = await self.find_expired_keys()
|
||||||
|
for key in keys_expired:
|
||||||
|
await self.auto_rotate(key)
|
||||||
|
|
||||||
|
await asyncio.sleep(3600) # 每小时检查
|
||||||
|
|
||||||
|
async def auto_rotate(self, key: APIKey):
|
||||||
|
"""自动轮换"""
|
||||||
|
# 1. 创建新Key
|
||||||
|
new_key = await self.generate_key(key.user_id, key.description)
|
||||||
|
|
||||||
|
# 2. 标记旧Key
|
||||||
|
key.status = 'rotating'
|
||||||
|
key.rotated_at = datetime.now()
|
||||||
|
key.replaced_by = new_key.id
|
||||||
|
|
||||||
|
# 3. 通知用户
|
||||||
|
await self.notify_user(key.user_id, {
|
||||||
|
'type': 'key_rotated',
|
||||||
|
'old_key_id': key.id,
|
||||||
|
'new_key': new_key.key_prefix + '***'
|
||||||
|
})
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 7. 实施计划
|
||||||
|
|
||||||
|
### 7.1 优先级
|
||||||
|
|
||||||
|
| 任务 | 负责人 | 截止 | 依赖 |
|
||||||
|
|------|--------|------|------|
|
||||||
|
| 计费防篡改机制 | 后端 | S1前 | - |
|
||||||
|
| 跨租户隔离强化 | 架构 | S1前 | - |
|
||||||
|
| 密钥轮换机制 | 后端 | S0-M1 | - |
|
||||||
|
| 激活码安全强化 | 后端 | S0-M1 | - |
|
||||||
|
| DDoS防护机制 | 安全 | S0-M2 | - |
|
||||||
|
| 日志脱敏规则 | 后端 | S0-M1 | - |
|
||||||
|
| 密钥定期轮换 | 后端 | S0-M2 | - |
|
||||||
|
|
||||||
|
### 7.2 验证标准
|
||||||
|
|
||||||
|
- 所有计费操作都有审计日志
|
||||||
|
- 跨租户访问被强制拦截
|
||||||
|
- Key可以正常轮换和失效
|
||||||
|
- 激活码无法伪造
|
||||||
|
- DDoS攻击可被检测和阻断
|
||||||
|
- 敏感日志自动脱敏
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
**文档状态**:安全解决方案(修复版)
|
||||||
|
**关联文档**:
|
||||||
|
- `security_api_key_vulnerability_analysis_v1_2026-03-18.md`
|
||||||
|
- `supply_detailed_design_v1_2026-03-18.md`
|
||||||
139
docs/sub2api_integration_readiness_checklist_2026-03-16.md
Normal file
139
docs/sub2api_integration_readiness_checklist_2026-03-16.md
Normal file
@@ -0,0 +1,139 @@
|
|||||||
|
# Sub2API(subapi)集成准入清单(商用版)
|
||||||
|
|
||||||
|
- 文档日期:2026-03-16
|
||||||
|
- 评估对象:`sub2api`(用户口语中的 `subapi`)
|
||||||
|
- 评估目标:判断是否可作为企业级 LLM 通用转发网关中的可集成模块
|
||||||
|
- 结论类型:产品与架构准入评估(非代码实现设计)
|
||||||
|
|
||||||
|
## 1. 执行结论(先看)
|
||||||
|
|
||||||
|
1. 是否可集成:**可以**,但建议按“独立服务模块(HTTP/API)”方式集成,而不是 SDK 内嵌。
|
||||||
|
2. 开源是否完整:**整体完整度高**(含后端、前端、部署、CI、安全扫描、测试)。
|
||||||
|
3. 是否可直接商用:**有条件可商用**,但必须先完成法务与合规闸门,尤其是上游 ToS 风险审查。
|
||||||
|
4. 当前建议:按“受控灰度 + 强安全基线 + 明确回滚”推进 PoC,不建议直接全量生产接入。
|
||||||
|
|
||||||
|
## 2. 关键证据(本地仓库)
|
||||||
|
|
||||||
|
1. 许可证为 MIT(许可层面可商用):
|
||||||
|
[LICENSE](/home/long/project/立交桥/llm-gateway-competitors/sub2api-tar/LICENSE)
|
||||||
|
2. README 存在 ToS/研究用途提示(法务红线):
|
||||||
|
[README.md](/home/long/project/立交桥/llm-gateway-competitors/sub2api-tar/README.md:535)
|
||||||
|
3. 架构入口是服务化应用装配,不是轻量 SDK:
|
||||||
|
[main.go](/home/long/project/立交桥/llm-gateway-competitors/sub2api-tar/backend/cmd/server/main.go:131)
|
||||||
|
4. 路由与中间件注册耦合较强,偏完整网关服务:
|
||||||
|
[gateway.go](/home/long/project/立交桥/llm-gateway-competitors/sub2api-tar/backend/internal/server/routes/gateway.go:15)
|
||||||
|
5. CI、测试、安全扫描链路完整:
|
||||||
|
[backend-ci.yml](/home/long/project/立交桥/llm-gateway-competitors/sub2api-tar/.github/workflows/backend-ci.yml)
|
||||||
|
[security-scan.yml](/home/long/project/立交桥/llm-gateway-competitors/sub2api-tar/.github/workflows/security-scan.yml)
|
||||||
|
|
||||||
|
## 3. 集成模式优先级(从稳到险)
|
||||||
|
|
||||||
|
1. **模式 A:服务化集成(推荐)**
|
||||||
|
- 做法:Sub2API 作为独立网关服务部署,你的主平台通过内部 API 调用。
|
||||||
|
- 优点:隔离风险、回滚简单、运维可控、便于灰度。
|
||||||
|
- 风险:多一层调用与运维成本。
|
||||||
|
|
||||||
|
2. **模式 B:反向代理链路集成(次选)**
|
||||||
|
- 做法:放在现有网关后作为特定模型/渠道的转发层。
|
||||||
|
- 优点:改动小、接入快。
|
||||||
|
- 风险:故障定位复杂,链路调试成本较高。
|
||||||
|
|
||||||
|
3. **模式 C:Fork 后深度改造(谨慎)**
|
||||||
|
- 做法:长期维护私有分支并做平台级改造。
|
||||||
|
- 优点:可控性最高。
|
||||||
|
- 风险:版本跟进成本大,研发负担重。
|
||||||
|
|
||||||
|
## 4. Go/No-Go 判定表(红黄绿)
|
||||||
|
|
||||||
|
1. **红线(任一命中即 No-Go)**
|
||||||
|
- 法务未完成上游 ToS 审查与书面结论。
|
||||||
|
- 生产仍启用 `simple mode`(跳过关键账单流程风险)。
|
||||||
|
- 允许 `allow_insecure_http=true`。
|
||||||
|
- 未启用预算/账单熔断。
|
||||||
|
|
||||||
|
2. **黄线(可受限 Go,但必须限流灰度)**
|
||||||
|
- 仅单可用区部署,无容灾。
|
||||||
|
- 无审计日志对接(请求、成本、调用人、模型)。
|
||||||
|
- 监控告警覆盖不足(延迟、错误率、成本突增)。
|
||||||
|
|
||||||
|
3. **绿线(可推进扩大流量)**
|
||||||
|
- 法务通过并形成可追溯结论。
|
||||||
|
- 生产基线配置全部达标。
|
||||||
|
- 7 天灰度稳定达标且完成回滚演练。
|
||||||
|
|
||||||
|
## 5. 生产硬化配置基线(必须项)
|
||||||
|
|
||||||
|
1. 运行模式与账单
|
||||||
|
- `run_mode=standard`(禁止 `simple`)
|
||||||
|
- `billing.circuit_breaker.enabled=true`
|
||||||
|
|
||||||
|
2. URL 安全策略(默认值需反转)
|
||||||
|
- `security.url_allowlist.enabled=true`
|
||||||
|
- `security.url_allowlist.allow_insecure_http=false`
|
||||||
|
- `security.url_allowlist.allow_private_hosts=false`(如必须访问内网,需最小范围白名单)
|
||||||
|
|
||||||
|
3. 代理与入口安全
|
||||||
|
- `server.trusted_proxies` 显式设置(禁止宽泛信任)
|
||||||
|
- 对外接口启用统一鉴权、限流、审计字段
|
||||||
|
|
||||||
|
4. 功能范围控制
|
||||||
|
- 首发禁用不稳定能力(如 README 标注不建议生产依赖的模块)
|
||||||
|
- `gateway.sora_*` 不纳入首发 SLA
|
||||||
|
|
||||||
|
## 6. 主要技术风险(按优先级)
|
||||||
|
|
||||||
|
1. **合规风险(最高)**
|
||||||
|
- 代码 MIT 不等于业务合规,ToS 约束来自上游服务条款而非开源许可本身。
|
||||||
|
|
||||||
|
2. **架构耦合风险**
|
||||||
|
- Sub2API 更像完整网关服务,不是可插拔 SDK;深嵌会提升主平台耦合度。
|
||||||
|
|
||||||
|
3. **配置误用风险**
|
||||||
|
- 默认安全开关存在“偏宽松”项,若未硬化会放大 SSRF/内网访问风险面。
|
||||||
|
|
||||||
|
4. **可用性与成本风险**
|
||||||
|
- 若熔断、限流、成本审计不完整,易出现成本失控或突发级联故障。
|
||||||
|
|
||||||
|
## 7. 最小 PoC 验收标准(进入下一阶段前)
|
||||||
|
|
||||||
|
1. 功能
|
||||||
|
- 覆盖你规划中的主流模型路由路径(至少 3 家提供商、核心模型调用成功率稳定)。
|
||||||
|
|
||||||
|
2. 稳定性
|
||||||
|
- 在目标并发下,错误率与 p95 延迟满足内部 SLO。
|
||||||
|
|
||||||
|
3. 成本
|
||||||
|
- 能按租户/应用/模型维度统计成本,异常成本有告警和自动熔断。
|
||||||
|
|
||||||
|
4. 安全
|
||||||
|
- 完成白名单策略、鉴权、审计字段落库、关键接口限流。
|
||||||
|
|
||||||
|
5. 可运维
|
||||||
|
- 一键回滚路径清晰,可在约定时限内完成演练回退。
|
||||||
|
|
||||||
|
## 8. 灰度与回滚策略(建议模板)
|
||||||
|
|
||||||
|
1. 灰度阶段
|
||||||
|
- 阶段 1:5% 内部流量(1-2 天)
|
||||||
|
- 阶段 2:20% 非关键业务流量(2-3 天)
|
||||||
|
- 阶段 3:50% 混合流量(2 天)
|
||||||
|
- 阶段 4:100%(仅当全部指标连续达标)
|
||||||
|
|
||||||
|
2. 观察指标
|
||||||
|
- 可用性:5xx 比例、超时率
|
||||||
|
- 性能:p95/p99 延迟
|
||||||
|
- 成本:每千 token 成本、异常突增
|
||||||
|
- 业务:成功调用率、用户投诉率
|
||||||
|
|
||||||
|
3. 回滚触发阈值(任一满足即回滚)
|
||||||
|
- 连续 10 分钟错误率超过阈值
|
||||||
|
- p95 延迟连续 15 分钟超出阈值
|
||||||
|
- 单小时成本增幅异常(超过预算策略)
|
||||||
|
- 出现高危安全事件或合规告警
|
||||||
|
|
||||||
|
## 9. 对“是否能作为模块”的最终判定
|
||||||
|
|
||||||
|
1. **可以作为模块集成**:是,但应定义为“外部服务模块”,非“代码库模块”。
|
||||||
|
2. **开源是否完整**:是,工程资产完整度高。
|
||||||
|
3. **当前最大阻碍**:不是代码完整性,而是 ToS 合规与生产配置硬化。
|
||||||
|
4. **建议决策**:先完成法务闸门与 7 天灰度,再决定是否扩大到主路径。
|
||||||
333
docs/sub2api_scheduler_billing_flow_deep_dive_v2_2026-03-17.md
Normal file
333
docs/sub2api_scheduler_billing_flow_deep_dive_v2_2026-03-17.md
Normal file
@@ -0,0 +1,333 @@
|
|||||||
|
# sub2api 调度-并发-Failover-用量计费链路深挖(v2)
|
||||||
|
|
||||||
|
- 版本:v2.0
|
||||||
|
- 日期:2026-03-17
|
||||||
|
- 代码基线:`/home/long/project/立交桥/llm-gateway-competitors/sub2api-tar/backend`
|
||||||
|
- 目标:沉淀 S2 阶段可直接迁移到自研 Router Core 的主路径能力(特别是国内供应商 100% 接管场景)
|
||||||
|
|
||||||
|
## 1. 迁移状态确认(回答你的问题)
|
||||||
|
|
||||||
|
已确认开源项目库整体已迁入统一项目根:
|
||||||
|
|
||||||
|
- `立交桥/llm-gateway-competitors/sub2api-tar`
|
||||||
|
- `立交桥/llm-gateway-competitors/sub2api-src`
|
||||||
|
- `立交桥/llm-gateway-competitors/sub2api-full`
|
||||||
|
- `立交桥/llm-gateway-competitors/sub2api-code`
|
||||||
|
- `立交桥/llm-gateway-competitors/litellm`
|
||||||
|
- `立交桥/llm-gateway-competitors/one-api`
|
||||||
|
- `立交桥/llm-gateway-competitors/new-api`
|
||||||
|
|
||||||
|
结论:`subapi/sub2api`及其他对比仓库都已在`立交桥`目录下,可作为后续持续深读与集成源。
|
||||||
|
|
||||||
|
## 2. 本次深挖覆盖范围
|
||||||
|
|
||||||
|
核心入口与主链路文件:
|
||||||
|
|
||||||
|
1. 调度
|
||||||
|
- `internal/service/openai_account_scheduler.go`
|
||||||
|
- `internal/service/openai_gateway_service.go`
|
||||||
|
|
||||||
|
2. Handler 主流程(Responses / Chat Completions / Anthropic 兼容)
|
||||||
|
- `internal/handler/openai_gateway_handler.go`
|
||||||
|
- `internal/handler/openai_chat_completions.go`
|
||||||
|
|
||||||
|
3. 并发控制
|
||||||
|
- `internal/handler/gateway_helper.go`
|
||||||
|
- `internal/service/concurrency_service.go`
|
||||||
|
|
||||||
|
4. 异步用量任务池
|
||||||
|
- `internal/service/usage_record_worker_pool.go`
|
||||||
|
|
||||||
|
5. 计费幂等
|
||||||
|
- `internal/service/gateway_service.go`
|
||||||
|
- `internal/service/usage_billing.go`
|
||||||
|
- `internal/repository/usage_billing_repo.go`
|
||||||
|
|
||||||
|
6. 流式 failover 边界(对照)
|
||||||
|
- `internal/handler/gateway_handler.go`
|
||||||
|
- `internal/handler/failover_loop.go`
|
||||||
|
- `internal/handler/gateway_handler_stream_failover_test.go`
|
||||||
|
|
||||||
|
## 3. 端到端主链路(OpenAI Responses / Chat Completions)
|
||||||
|
|
||||||
|
```mermaid
|
||||||
|
sequenceDiagram
|
||||||
|
autonumber
|
||||||
|
participant C as Client
|
||||||
|
participant H as OpenAIGatewayHandler
|
||||||
|
participant S as OpenAIGatewayService
|
||||||
|
participant SCH as OpenAIAccountScheduler
|
||||||
|
participant CON as ConcurrencyService
|
||||||
|
participant U as Upstream(OpenAI)
|
||||||
|
participant P as UsageRecordWorkerPool
|
||||||
|
participant B as UsageBillingRepo
|
||||||
|
|
||||||
|
C->>H: 请求(/v1/responses 或 /v1/chat/completions)
|
||||||
|
H->>CON: TryAcquireUserSlot
|
||||||
|
alt 未拿到用户槽位
|
||||||
|
H->>CON: IncrementWaitCount + AcquireUserSlotWithWait
|
||||||
|
end
|
||||||
|
|
||||||
|
H->>SCH: SelectAccountWithScheduler(previous_response_id/session_hash/model)
|
||||||
|
SCH-->>H: AccountSelectionResult(已拿槽 or WaitPlan)
|
||||||
|
|
||||||
|
alt selection.Acquired = false
|
||||||
|
H->>CON: TryAcquireAccountSlot
|
||||||
|
alt 快速未拿到
|
||||||
|
H->>CON: IncrementAccountWaitCount + AcquireAccountSlotWithWaitTimeout
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
H->>S: Forward(...)
|
||||||
|
S->>U: 上游请求
|
||||||
|
|
||||||
|
alt 上游可 failover 错误(如 429/5xx)
|
||||||
|
U-->>S: error(status>=400)
|
||||||
|
S-->>H: UpstreamFailoverError
|
||||||
|
H->>H: 同账号重试/换号重试(受 max switches 限制)
|
||||||
|
else 正常返回
|
||||||
|
U-->>S: success(stream 或 non-stream)
|
||||||
|
S-->>H: OpenAIForwardResult(usage/request_id/ttft)
|
||||||
|
H->>P: submitUsageRecordTask
|
||||||
|
P->>S: RecordUsage
|
||||||
|
S->>B: applyUsageBilling(幂等)
|
||||||
|
end
|
||||||
|
|
||||||
|
H-->>C: 响应(JSON 或 SSE)
|
||||||
|
```
|
||||||
|
|
||||||
|
## 4. 调度机制细节(SelectAccountWithScheduler)
|
||||||
|
|
||||||
|
`OpenAIGatewayService.SelectAccountWithScheduler`(`openai_account_scheduler.go`)是 3 层选择逻辑:
|
||||||
|
|
||||||
|
1. `previous_response_id` 粘性层(最高优先级)
|
||||||
|
- 入口:`Select()` 内先尝试 `SelectAccountByPreviousResponseID`
|
||||||
|
- 命中后 `decision.Layer=previous_response_id`
|
||||||
|
- 若有 `sessionHash`,会同步绑定 sticky session
|
||||||
|
|
||||||
|
2. `session_hash` 粘性层
|
||||||
|
- 读取 sticky account id
|
||||||
|
- 校验账号是否仍可调度、模型是否匹配、传输协议是否兼容
|
||||||
|
- 优先尝试直接抢账号槽位;失败则返回 WaitPlan(sticky 专用 timeout/max waiting)
|
||||||
|
|
||||||
|
3. 负载均衡层(load_balance)
|
||||||
|
- 过滤维度:排除集、可调度状态、模型支持、传输协议兼容
|
||||||
|
- 从并发服务批量拉取 `loadRate/waitingCount`
|
||||||
|
- 融合运行时统计(error_rate EWMA + TTFT EWMA)
|
||||||
|
- 评分项:`priority/load/queue/error_rate/ttft`
|
||||||
|
- 策略:先取 Top-K,再按权重随机顺序尝试抢槽,避免单账号长期垄断
|
||||||
|
- 若都抢不到,返回 fallback WaitPlan
|
||||||
|
|
||||||
|
评分来源(可调参数)在 `openAIWSSchedulerWeights()`,默认:
|
||||||
|
|
||||||
|
- Priority: 1.0
|
||||||
|
- Load: 1.0
|
||||||
|
- Queue: 0.7
|
||||||
|
- ErrorRate: 0.8
|
||||||
|
- TTFT: 0.5
|
||||||
|
|
||||||
|
## 5. 并发槽位模型(用户槽 + 账号槽)
|
||||||
|
|
||||||
|
### 5.1 获取顺序
|
||||||
|
|
||||||
|
在 `openai_gateway_handler.go` 中是明确的双层门控:
|
||||||
|
|
||||||
|
1. 先用户并发槽(防止单用户打爆)
|
||||||
|
2. 再账号并发槽(防止单账号过载)
|
||||||
|
|
||||||
|
### 5.2 快慢路径
|
||||||
|
|
||||||
|
`gateway_helper.go` 实现了统一并发辅助:
|
||||||
|
|
||||||
|
- 快路径:`TryAcquireUserSlot/TryAcquireAccountSlot`
|
||||||
|
- 慢路径:`Acquire*WithWait` + 指数退避 + 抖动
|
||||||
|
- 流式请求等待中会发 SSE ping(避免客户端超时)
|
||||||
|
|
||||||
|
### 5.3 等待队列上限控制
|
||||||
|
|
||||||
|
- 用户等待队列:`IncrementWaitCount`,上限=`CalculateMaxWait(userConcurrency)`
|
||||||
|
- 账号等待队列:`IncrementAccountWaitCount`,上限来自调度返回的 `WaitPlan.MaxWaiting`
|
||||||
|
- 队列满直接返回 429
|
||||||
|
|
||||||
|
### 5.4 槽位释放安全性
|
||||||
|
|
||||||
|
`wrapReleaseOnDone` 用 `context.AfterFunc + sync.Once` 保证:
|
||||||
|
|
||||||
|
- 正常完成会释放
|
||||||
|
- context 取消(客户端断开/超时)也会释放
|
||||||
|
- 多次释放只执行一次
|
||||||
|
|
||||||
|
这是并发槽不泄漏的关键机制。
|
||||||
|
|
||||||
|
## 6. Failover 机制与边界
|
||||||
|
|
||||||
|
## 6.1 触发条件
|
||||||
|
|
||||||
|
OpenAI 侧 failover 判定在 service 层:
|
||||||
|
|
||||||
|
- 显式状态码:`401/402/403/429/529` 与 `>=500`
|
||||||
|
- 以及 OpenAI 瞬态处理错误(内容解析)
|
||||||
|
|
||||||
|
满足时返回 `UpstreamFailoverError` 给 handler 进行重试/换号。
|
||||||
|
|
||||||
|
## 6.2 Handler 层重试策略
|
||||||
|
|
||||||
|
在 `openai_gateway_handler.go` / `openai_chat_completions.go`:
|
||||||
|
|
||||||
|
1. 若 `RetryableOnSameAccount=true` 且账号是 pool mode:
|
||||||
|
- 同账号短延迟重试(受 `pool retry limit` 限制)
|
||||||
|
|
||||||
|
2. 同账号重试耗尽:
|
||||||
|
- 计入失败账号集合,切换账号
|
||||||
|
- `switchCount` 超过 `maxAccountSwitches` 后 failover 结束
|
||||||
|
|
||||||
|
## 6.3 流式 no-replay 边界
|
||||||
|
|
||||||
|
对“已向客户端写出流式内容后是否还能 failover”的处理,代码体现为两种模式:
|
||||||
|
|
||||||
|
1. 通用 Gateway 路径(Anthropic/Gemini)有显式保护
|
||||||
|
- `gateway_handler.go` 记录 `writerSizeBeforeForward`
|
||||||
|
- 若 `UpstreamFailoverError` 返回时 `Writer.Size()` 已变化,判定“流已写出”,禁止继续 failover
|
||||||
|
- 测试 `gateway_handler_stream_failover_test.go` 明确验证“防止双 message_start 流拼接腐化”
|
||||||
|
|
||||||
|
2. OpenAI Responses/ChatCompat 路径
|
||||||
|
- failover 只在上游 `status>=400` 的响应阶段触发(此时尚未进入流转换写出)
|
||||||
|
- 进入 `handleStreamingResponse/handleChatStreamingResponse/handleAnthropicStreamingResponse` 后,发生的是流读写错误或超时,不再转成 `UpstreamFailoverError` 进行换号
|
||||||
|
|
||||||
|
推断(基于代码行为):OpenAI 这条链路等价于“流开始后不做 replay failover”。
|
||||||
|
|
||||||
|
## 7. 异步用量记录:submitUsageRecordTask -> WorkerPool
|
||||||
|
|
||||||
|
`openai_gateway_handler.go` 在 Forward 成功后统一走:
|
||||||
|
|
||||||
|
- `requestPayloadHash := HashUsageRequestPayload(body)`
|
||||||
|
- `submitUsageRecordTask(func(ctx){ RecordUsage(...) })`
|
||||||
|
|
||||||
|
`usage_record_worker_pool.go` 关键点:
|
||||||
|
|
||||||
|
1. 有界队列 + worker 池
|
||||||
|
- 默认 `worker=128`,`queue=16384`
|
||||||
|
|
||||||
|
2. 队列满降级策略
|
||||||
|
- `drop`:直接丢
|
||||||
|
- `sync`:同步执行
|
||||||
|
- `sample`:按比例同步执行(默认 10%)其余丢弃
|
||||||
|
|
||||||
|
3. 自动扩缩容
|
||||||
|
- 扩容触发:队列占比超过上阈值
|
||||||
|
- 缩容触发:队列空且运行利用率低
|
||||||
|
|
||||||
|
4. 任务保护
|
||||||
|
- 每个任务有超时上下文
|
||||||
|
- panic recover
|
||||||
|
|
||||||
|
5. 兜底路径
|
||||||
|
- 如果 handler 未注入 worker 池,改为同步执行(避免无界 goroutine)
|
||||||
|
|
||||||
|
## 8. 计费与幂等:request_id + fingerprint
|
||||||
|
|
||||||
|
## 8.1 request_id 生成优先级
|
||||||
|
|
||||||
|
`resolveUsageBillingRequestID()` 的优先级:
|
||||||
|
|
||||||
|
1. `ctxkey.ClientRequestID` -> `client:<id>`
|
||||||
|
2. `ctxkey.RequestID` -> `local:<id>`
|
||||||
|
3. 上游 `result.RequestID`
|
||||||
|
4. `generated:<uuid>`
|
||||||
|
|
||||||
|
中间件来源:
|
||||||
|
|
||||||
|
- `ClientRequestID()`:注入 `ctx_client_request_id`
|
||||||
|
- `RequestLogger()`:读取/生成 `X-Request-ID` 并注入 `ctx_request_id`
|
||||||
|
|
||||||
|
## 8.2 payload fingerprint
|
||||||
|
|
||||||
|
`resolveUsageBillingPayloadFingerprint()`:
|
||||||
|
|
||||||
|
- 优先使用 `requestPayloadHash`(即 `HashUsageRequestPayload(payload)`)
|
||||||
|
- 其次回退到 client/local request id
|
||||||
|
|
||||||
|
目的:降低“同一个 request_id 被误复用”导致的静默误去重风险。
|
||||||
|
|
||||||
|
## 8.3 UsageBillingCommand 字段映射
|
||||||
|
|
||||||
|
| 来源 | 字段 | 说明 |
|
||||||
|
|---|---|---|
|
||||||
|
| `requestID` | `RequestID` | 幂等主键组成部分 |
|
||||||
|
| `APIKey.ID` | `APIKeyID` | 幂等主键组成部分 |
|
||||||
|
| `RequestPayloadHash` | `RequestPayloadHash` | 幂等冲突鉴别增强 |
|
||||||
|
| `UsageLog.Model` | `Model` | 计费维度 |
|
||||||
|
| `UsageLog.InputTokens` | `InputTokens` | 输入 token |
|
||||||
|
| `UsageLog.OutputTokens` | `OutputTokens` | 输出 token |
|
||||||
|
| `UsageLog.CacheCreationTokens` | `CacheCreationTokens` | cache create token |
|
||||||
|
| `UsageLog.CacheReadTokens` | `CacheReadTokens` | cache read token |
|
||||||
|
| `UsageLog.ImageCount` | `ImageCount` | 图像请求计费 |
|
||||||
|
| `Cost.ActualCost` | `BalanceCost` | 余额扣费 |
|
||||||
|
| `Cost.TotalCost` + 订阅 | `SubscriptionCost` | 订阅计费 |
|
||||||
|
| `Cost.ActualCost` + API Key quota | `APIKeyQuotaCost` | key 配额计费 |
|
||||||
|
| `Cost.ActualCost` + key ratelimit | `APIKeyRateLimitCost` | key 限速窗口用量 |
|
||||||
|
| `Cost.TotalCost*AccountRateMultiplier` | `AccountQuotaCost` | 账号配额计费 |
|
||||||
|
|
||||||
|
## 8.4 仓储层幂等执行(强一致)
|
||||||
|
|
||||||
|
`usage_billing_repo.go` 关键行为:
|
||||||
|
|
||||||
|
1. 事务内先 `claimUsageBillingKey`
|
||||||
|
- `INSERT usage_billing_dedup(request_id, api_key_id, request_fingerprint)`
|
||||||
|
- 冲突则读取已有 fingerprint 对比
|
||||||
|
|
||||||
|
2. 对比规则
|
||||||
|
- fingerprint 相同:视为重复请求,`Applied=false`(幂等成功,不重复扣费)
|
||||||
|
- fingerprint 不同:返回 `ErrUsageBillingRequestConflict`
|
||||||
|
|
||||||
|
3. claim 成功后执行扣费副作用
|
||||||
|
- 订阅累计
|
||||||
|
- 用户余额扣减
|
||||||
|
- API Key 配额/限速窗口
|
||||||
|
- 账号 quota(含 daily/weekly)
|
||||||
|
|
||||||
|
结论:这是“请求级 at-most-once 记账”语义,而非“至少一次”。
|
||||||
|
|
||||||
|
## 9. 失败模式与重试边界
|
||||||
|
|
||||||
|
| 阶段 | 错误类型 | 默认行为 | 是否自动重试 | 备注 |
|
||||||
|
|---|---|---|---|---|
|
||||||
|
| 用户槽获取 | 并发满/超时 | 429 | 否 | 可等待到超时 |
|
||||||
|
| 账号槽获取 | 并发满/超时 | 429 | 否 | 受 WaitPlan 限制 |
|
||||||
|
| 上游请求前 | 网络错误 | 502 | 否 | 非 failover 错误路径 |
|
||||||
|
| 上游 HTTP 错误 | failover 状态码 | 换号/同号重试 | 是 | 受 max switches/同号重试上限 |
|
||||||
|
| 流写出后错误(通用网关) | UpstreamFailoverError | 终止 failover | 否 | 防止流拼接腐化 |
|
||||||
|
| 流处理中断(OpenAI) | scan/read/timeout | 返回已采集 usage + 错误 | 否 | 不 replay failover |
|
||||||
|
| 用量任务池满 | 队列溢出 | drop/sample/sync | 视策略 | 可观测 dropped 指标 |
|
||||||
|
| 计费重复请求 | dedup 命中 | Applied=false | N/A | 不重复扣费 |
|
||||||
|
| 计费冲突 | 同 request_id 不同指纹 | 错误 | 否 | 需上层治理 request_id 生成 |
|
||||||
|
|
||||||
|
## 10. 对自研 Router Core 的直接启发(S2 主路径)
|
||||||
|
|
||||||
|
结合你的目标(S2 结束 >=60% 主路径接管,国内供应商 100%):
|
||||||
|
|
||||||
|
1. 必须先自研的“不可外包”能力
|
||||||
|
- 并发双槽模型(user/account)+ wait queue 上限
|
||||||
|
- 幂等计费仓储(request_id + fingerprint)
|
||||||
|
- 流式 no-replay 边界控制(防流拼接)
|
||||||
|
|
||||||
|
2. 可先复用 subapi、后续替换的能力
|
||||||
|
- 账号调度具体打分权重
|
||||||
|
- 多协议转换细节(Responses/Chat/Messages)
|
||||||
|
|
||||||
|
3. 国内供应商 100% 接管建议
|
||||||
|
- 在 Router Core 里先实现国内供应商专属 scheduler + billing pipeline
|
||||||
|
- subapi connector 仅保留海外通道与过渡流量
|
||||||
|
|
||||||
|
4. 核心指标(建议纳入 S2 验收)
|
||||||
|
- 调度层命中率:`previous_response/session/load_balance`
|
||||||
|
- failover 成功率与平均切换次数
|
||||||
|
- 并发等待队列溢出率
|
||||||
|
- usage task dropped/sync fallback 比例
|
||||||
|
- billing conflict rate(request_id 冲突)
|
||||||
|
|
||||||
|
## 11. 后续 v3 建议
|
||||||
|
|
||||||
|
1. 深挖 `gateway_service.go` 的 Anthropic/Gemini 混合调度与模型路由规则优先级
|
||||||
|
2. 逐项抽取可复用测试用例为 Router Core 契约测试
|
||||||
|
3. 建立“流式写出后 failover 禁止”跨协议统一中间件(避免行为分叉)
|
||||||
224
docs/subapi_connector_contract_v1_2026-03-17.md
Normal file
224
docs/subapi_connector_contract_v1_2026-03-17.md
Normal file
@@ -0,0 +1,224 @@
|
|||||||
|
# Subapi Connector 契约清单 v1
|
||||||
|
|
||||||
|
- 版本:v1.0
|
||||||
|
- 日期:2026-03-17
|
||||||
|
- 适用阶段:S1-S2(`subapi` 外部服务模块化接入 + 客户迁移)
|
||||||
|
- 契约目标:为我方 Router Core 与 `subapi` 之间的调用定义“稳定接口 + 稳定语义 + 可回归验证”。
|
||||||
|
|
||||||
|
## 1. 设计目标与边界
|
||||||
|
|
||||||
|
## 1.1 目标
|
||||||
|
|
||||||
|
1. 将 `subapi` 视为“可替换外部能力模块”,而不是核心业务真相源。
|
||||||
|
2. 固定北向协议与字段语义,避免上游快速迭代导致我方主路径抖动。
|
||||||
|
3. 对请求、响应、错误、流式事件建立统一归一模型,支撑审计、计费、告警。
|
||||||
|
|
||||||
|
## 1.2 非目标
|
||||||
|
|
||||||
|
1. 不在本契约覆盖 `subapi` 后台管理 API(`/admin/*`)。
|
||||||
|
2. 不在本契约定义我方控制面业务模型(租户、账务、RBAC)的内部存储结构。
|
||||||
|
3. 不将 `subapi` 私有实现细节(调度算法内部参数)暴露到我方公共 API。
|
||||||
|
|
||||||
|
## 2. 接入范围(协议与端点)
|
||||||
|
|
||||||
|
## 2.1 Canonical 端点(Connector 只使用这一组)
|
||||||
|
|
||||||
|
| 协议域 | Method | Path | 说明 |
|
||||||
|
|---|---|---|---|
|
||||||
|
| Anthropic 兼容 | `POST` | `/v1/messages` | 统一消息入口 |
|
||||||
|
| Anthropic 兼容 | `POST` | `/v1/messages/count_tokens` | 仅计数,不计费记录 |
|
||||||
|
| OpenAI 兼容 | `POST` | `/v1/chat/completions` | Chat Completions |
|
||||||
|
| OpenAI 兼容 | `POST` | `/v1/responses` | Responses |
|
||||||
|
| OpenAI 兼容 | `POST` | `/v1/responses/*subpath` | Responses 子资源 |
|
||||||
|
| OpenAI 兼容(WS) | `GET` | `/v1/responses` | WebSocket 升级入口 |
|
||||||
|
| 通用 | `GET` | `/v1/models` | 模型目录 |
|
||||||
|
| 通用 | `GET` | `/v1/usage` | 用量/额度信息 |
|
||||||
|
| Gemini 原生 | `GET` | `/v1beta/models` | 模型列表 |
|
||||||
|
| Gemini 原生 | `GET` | `/v1beta/models/:model` | 模型详情 |
|
||||||
|
| Gemini 原生 | `POST` | `/v1beta/models/*modelAction` | `generateContent`/`streamGenerateContent` |
|
||||||
|
|
||||||
|
## 2.2 Alias 端点处理
|
||||||
|
|
||||||
|
1. `subapi` 提供不带 `/v1` 的别名(如 `/responses`、`/chat/completions`)。
|
||||||
|
2. Connector 禁止调用 alias,统一走 canonical 端点,避免路由歧义。
|
||||||
|
|
||||||
|
## 3. 认证与请求头契约
|
||||||
|
|
||||||
|
## 3.1 认证头优先级
|
||||||
|
|
||||||
|
1. `/v1/*`:
|
||||||
|
- `Authorization: Bearer <key>` > `x-api-key` > `x-goog-api-key`
|
||||||
|
2. `/v1beta/*`:
|
||||||
|
- `x-goog-api-key` > `Authorization: Bearer <key>` > `x-api-key` > `key(query,仅兼容)`
|
||||||
|
|
||||||
|
## 3.2 禁止项
|
||||||
|
|
||||||
|
1. 禁止通过 query 传 `key`/`api_key`(仅保留兼容读取,不作为标准路径)。
|
||||||
|
2. Connector 默认不发送 query key。
|
||||||
|
|
||||||
|
## 3.3 会话亲和相关头
|
||||||
|
|
||||||
|
1. OpenAI 兼容:支持 `session_id`、`conversation_id`,请求体支持 `prompt_cache_key`。
|
||||||
|
2. Gemini CLI:支持 `x-gemini-api-privileged-user-id` + 请求体 tmp 路径哈希会话识别。
|
||||||
|
3. Anthropic 兼容:可使用 `metadata.user_id` 补充分组内会话亲和。
|
||||||
|
|
||||||
|
## 3.4 北向/南向边界(安全强约束,新增)
|
||||||
|
|
||||||
|
1. 北向(客户 -> 我方网关):
|
||||||
|
- 禁止接收任何 query key(`key`/`api_key`),统一要求 header 鉴权。
|
||||||
|
- 外部携带 query key 的请求必须被拒绝并记录审计事件。
|
||||||
|
2. 南向(我方网关 -> subapi connector):
|
||||||
|
- 仅允许 header 方式传递凭证(`Authorization`/`x-api-key`/`x-goog-api-key`)。
|
||||||
|
- Connector 默认不透传 query key。
|
||||||
|
3. 历史兼容策略:
|
||||||
|
- 若为极少数遗留客户端保留“内部改写”能力,必须在北向入口完成 query->header 改写,且该兼容规则需要白名单与审计日志。
|
||||||
|
- 该兼容规则默认关闭,并纳入版本化淘汰计划。
|
||||||
|
|
||||||
|
## 4. 请求体契约(最小强约束)
|
||||||
|
|
||||||
|
## 4.1 通用约束
|
||||||
|
|
||||||
|
1. Body 必须是非空 JSON。
|
||||||
|
2. `model` 必须为非空字符串。
|
||||||
|
3. `stream` 若出现,必须是布尔值。
|
||||||
|
|
||||||
|
## 4.2 OpenAI Responses 额外约束
|
||||||
|
|
||||||
|
1. `previous_response_id` 若存在,必须是 `resp_*` 风格,不可传 message id。
|
||||||
|
2. `input.type=function_call_output` 场景必须满足 call 上下文约束(否则应返回 `400 invalid_request_error`)。
|
||||||
|
|
||||||
|
## 4.3 Gemini Action 路径约束
|
||||||
|
|
||||||
|
1. `*modelAction` 支持两种格式:`{model}:{action}` 或 `{model}/{action}`。
|
||||||
|
2. `streamGenerateContent` 视为流式请求。
|
||||||
|
|
||||||
|
## 5. 响应归一契约(Connector 内部模型)
|
||||||
|
|
||||||
|
Connector 必须将不同协议响应归一为以下内部结构:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"protocol": "openai|anthropic|gemini",
|
||||||
|
"request_id": "string",
|
||||||
|
"upstream_request_id": "string",
|
||||||
|
"model_requested": "string",
|
||||||
|
"model_actual": "string",
|
||||||
|
"stream": true,
|
||||||
|
"usage": {
|
||||||
|
"input_tokens": 0,
|
||||||
|
"output_tokens": 0,
|
||||||
|
"cache_creation_input_tokens": 0,
|
||||||
|
"cache_read_input_tokens": 0,
|
||||||
|
"total_tokens": 0
|
||||||
|
},
|
||||||
|
"first_token_ms": 0,
|
||||||
|
"duration_ms": 0,
|
||||||
|
"client_disconnect": false
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## 5.1 字段规则
|
||||||
|
|
||||||
|
1. `upstream_request_id`:优先从响应头 `x-request-id` 提取。
|
||||||
|
2. `request_id`:若协议返回 body request id,则保留;否则由 Connector 生成稳定 ID。
|
||||||
|
3. `usage.total_tokens`:若上游未提供,按 `input + output` 计算;均缺失时置 0。
|
||||||
|
4. `usage` 缺失不视为失败,但必须打 `usage_extracted=false` 观测标签(用于后续补偿分析)。
|
||||||
|
|
||||||
|
## 6. 错误归一契约
|
||||||
|
|
||||||
|
## 6.1 上游原生错误格式(三类)
|
||||||
|
|
||||||
|
1. OpenAI 风格:`{"error":{"type":"...","message":"..."}}`
|
||||||
|
2. Anthropic 风格:`{"type":"error","error":{"type":"...","message":"..."}}`
|
||||||
|
3. Google 风格:`{"error":{"code":403,"message":"...","status":"PERMISSION_DENIED"}}`
|
||||||
|
|
||||||
|
## 6.2 Connector 统一错误结构
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"http_status": 429,
|
||||||
|
"category": "auth|billing|rate_limit|upstream|validation|internal",
|
||||||
|
"code": "RATE_LIMIT_EXCEEDED",
|
||||||
|
"message": "human readable",
|
||||||
|
"retryable": true
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## 6.3 默认映射规则
|
||||||
|
|
||||||
|
1. `401/403` 上游鉴权类错误 -> `category=auth`,`retryable=false`。
|
||||||
|
2. `429` -> `category=rate_limit`,`retryable=true`。
|
||||||
|
3. `500/502/503/504/529` -> `category=upstream`,`retryable=true`。
|
||||||
|
4. 计费检查失败(余额/订阅/额度) -> `category=billing`,`retryable` 取决于具体码。
|
||||||
|
5. JSON/字段校验失败 -> `category=validation`,`retryable=false`。
|
||||||
|
|
||||||
|
## 7. 流式契约(SSE/WS)
|
||||||
|
|
||||||
|
## 7.1 SSE
|
||||||
|
|
||||||
|
1. 流开始后如发生错误,按事件帧返回,不再回写普通 JSON 错误。
|
||||||
|
2. 一旦已向客户端写出任何流内容,Connector 禁止触发“同请求 failover 重放”。
|
||||||
|
3. 必须记录首字时延 `first_token_ms` 与终止类型(正常结束/上游错误/客户端断开)。
|
||||||
|
|
||||||
|
## 7.2 WebSocket(OpenAI Responses)
|
||||||
|
|
||||||
|
1. 首帧必须包含合法 JSON 且含 `model`。
|
||||||
|
2. 若首帧不合法,立即关闭连接并返回协议错误。
|
||||||
|
3. 多轮 turn 必须重新获取并发槽位(避免长连接长期占槽)。
|
||||||
|
|
||||||
|
## 8. 重试与回退契约(Connector 侧)
|
||||||
|
|
||||||
|
1. 仅在“未输出任何字节”时允许请求级重试。
|
||||||
|
2. 流式一旦开始输出,禁止自动重试。
|
||||||
|
3. 推荐重试上限:
|
||||||
|
- 非流式:最多 2 次(指数退避)
|
||||||
|
- 流式:0 次(依赖上游内部 failover)
|
||||||
|
4. HTTP 429/503 可进入短退避重试;4xx 校验/鉴权错误直接失败。
|
||||||
|
|
||||||
|
## 9. 版本与兼容治理
|
||||||
|
|
||||||
|
## 9.1 版本锁定
|
||||||
|
|
||||||
|
1. 生产固定 `subapi` 精确版本(`vX.Y.Z`),不允许漂移到未验证版本。
|
||||||
|
2. 升级必须进入周级升级窗口,禁止临时直升生产。
|
||||||
|
|
||||||
|
## 9.2 契约测试门槛(每次升级必须通过)
|
||||||
|
|
||||||
|
1. OpenAI `/v1/chat/completions` 非流/流式基础场景。
|
||||||
|
2. OpenAI `/v1/responses` + `previous_response_id` 校验场景。
|
||||||
|
3. Anthropic `/v1/messages` + `count_tokens` 场景。
|
||||||
|
4. Gemini `/v1beta/models/*` 普通与流式场景。
|
||||||
|
5. 三类错误格式的归一验证(OpenAI/Anthropic/Google)。
|
||||||
|
6. 流式中断与客户端断开场景(保证不重放、不漏记)。
|
||||||
|
|
||||||
|
## 10. 与 S2 目标的对齐(执行约束)
|
||||||
|
|
||||||
|
1. `S2` 结束时:
|
||||||
|
- 全供应商主路径由自研 Router Core 接管率 `>= 60%`
|
||||||
|
- 国内 LLM 供应商主路径接管率 `= 100%`
|
||||||
|
2. `subapi` 在 S2 后只承担:
|
||||||
|
- 长尾协议兼容
|
||||||
|
- 备用回退通道
|
||||||
|
|
||||||
|
## 11. 证据来源(本地代码)
|
||||||
|
|
||||||
|
1. 路由范围与协议入口:
|
||||||
|
`/home/long/project/立交桥/llm-gateway-competitors/sub2api-tar/backend/internal/server/routes/gateway.go`
|
||||||
|
2. OpenAI/Anthropic 错误与流式处理:
|
||||||
|
`/home/long/project/立交桥/llm-gateway-competitors/sub2api-tar/backend/internal/handler/openai_gateway_handler.go`
|
||||||
|
3. Claude 兼容入口、`models`/`usage`/`count_tokens`:
|
||||||
|
`/home/long/project/立交桥/llm-gateway-competitors/sub2api-tar/backend/internal/handler/gateway_handler.go`
|
||||||
|
4. Gemini 原生入口与错误格式:
|
||||||
|
`/home/long/project/立交桥/llm-gateway-competitors/sub2api-tar/backend/internal/handler/gemini_v1beta_handler.go`
|
||||||
|
5. 认证优先级与禁用 query key:
|
||||||
|
`/home/long/project/立交桥/llm-gateway-competitors/sub2api-tar/backend/internal/server/middleware/api_key_auth.go`
|
||||||
|
`/home/long/project/立交桥/llm-gateway-competitors/sub2api-tar/backend/internal/server/middleware/api_key_auth_google.go`
|
||||||
|
6. 会话哈希与 header 语义:
|
||||||
|
`/home/long/project/立交桥/llm-gateway-competitors/sub2api-tar/backend/internal/service/openai_gateway_service.go`
|
||||||
|
`/home/long/project/立交桥/llm-gateway-competitors/sub2api-tar/backend/internal/service/gateway_service.go`
|
||||||
|
|
||||||
|
## 12. 下一步(v1 -> v1.1)
|
||||||
|
|
||||||
|
1. 把本契约转成机器可执行测试清单(YAML + golden cases)。
|
||||||
|
2. 为每个端点补充“最小请求样例 + 最小响应样例”文件。
|
||||||
|
3. 将错误映射表下沉为配置化规则,减少硬编码发布频率。
|
||||||
@@ -0,0 +1,81 @@
|
|||||||
|
# Subapi 集成与替换方案全面评审报告(Skills 专业评审版,v1)
|
||||||
|
|
||||||
|
- 评审日期:2026-03-17
|
||||||
|
- 评审方式:线上文档/代码证据审查(不进行线下评审)
|
||||||
|
- 评审目标:验证“可集成 subapi、可逐步替换、可达企业级商用 LLM 网关目标”的可执行性与风险闭环
|
||||||
|
- 评审技能视角:
|
||||||
|
- `security`:安全与合规红线
|
||||||
|
- `api-design`:协议契约与版本兼容
|
||||||
|
- `backend`:架构可运维性与可靠性
|
||||||
|
- `writing-clearly-and-concisely`:问题可执行表达
|
||||||
|
|
||||||
|
## 1. 评审结论
|
||||||
|
|
||||||
|
当前结论:`CONDITIONAL GO`(有条件推进)
|
||||||
|
|
||||||
|
必须先关闭以下门槛,才可进入外部专家 Round-1 正式博弈:
|
||||||
|
|
||||||
|
1. 关闭全部 P0(本报告共 2 项)。
|
||||||
|
2. 将 P1 中影响评审可信度的项(FND-P1-01/02/03/04)纳入强制整改排期与负责人。
|
||||||
|
3. 完成专家名册扩展(产品专家、重构项目专家、技术专家)并发出定向邀请。
|
||||||
|
|
||||||
|
## 2. Findings(按严重级别)
|
||||||
|
|
||||||
|
### 2.1 P0(阻断级)
|
||||||
|
|
||||||
|
| 编号 | 问题 | 证据 | 风险 | 建议整改 |
|
||||||
|
|---|---|---|---|---|
|
||||||
|
| FND-P0-01 | 设计要求了“内网隔离 + mTLS”,但两周任务单未把这两项作为显式交付任务。 | 设计要求见 `subapi_integration_compat_security_reliability_design_v1_2026-03-17.md:90-93`;任务单仅覆盖 egress 与配置硬化,未见 mTLS/内网暴露关闭任务:`subapi_integration_risk_controls_execution_tasks_v1_2026-03-17.md:58-76`。 | 若缺少东西向链路强认证与暴露面收敛,subapi 作为外部模块会成为横向移动入口,安全基线与审计责任不可闭环。 | 新增 `SEC-007`(subapi 内网隔离与公网不可达验证)+ `SEC-008`(网关<->subapi mTLS 双向证书与轮换演练),并设为 Week Gate 必过项。 |
|
||||||
|
| FND-P0-02 | query key 策略在“契约文档、设计文档、上游代码事实”三处存在边界歧义,存在误放行风险。 | 契约仍保留 `/v1beta/*` query key 兼容:`subapi_connector_contract_v1_2026-03-17.md:52,56`;我方设计要求北向入口禁止 query key、仅做内转:`subapi_integration_compat_security_reliability_design_v1_2026-03-17.md:105-107`;上游 Gemini 中间件确实允许特定路径 query key:`api_key_auth_google.go:146-157`。 | 如果网关边界策略不严,外部客户端可绕过 header-only 规范,导致密钥泄露面扩大、日志污染和审计判责困难。 | 在接口契约增加“北向/南向边界规则”专章;新增强制测试 `SEC-009`:外部 query key 全拒绝、内部改写链路可用且可审计。 |
|
||||||
|
|
||||||
|
### 2.2 P1(高优先级)
|
||||||
|
|
||||||
|
| 编号 | 问题 | 证据 | 风险 | 建议整改 |
|
||||||
|
|---|---|---|---|---|
|
||||||
|
| FND-P1-01 | 接管率 SQL 的主路径过滤与 Connector 契约存在冲突,口径有漂移风险。 | 主路径 SQL 包含 `inbound_endpoint IS NULL` 及 alias `/responses`:`router_core_takeover_metrics_sql_dashboard_v1_2026-03-17.md:75-83`;而 Connector 契约明确禁止 alias:`subapi_connector_contract_v1_2026-03-17.md:42-44`。 | 验收口径可能混入非 canonical 流量或历史脏数据,影响 `>=60% / 100%` 决策可信度。 | 固化 canonical-only 主路径过滤;把 alias 流量单独做异常面板,不计入验收分母。 |
|
||||||
|
| FND-P1-02 | `cn_platforms` 在 SQL 中仍以示例硬编码,缺少配置中心强绑定。 | 多处示例硬编码 `ARRAY['antigravity']`:`router_core_takeover_metrics_sql_dashboard_v1_2026-03-17.md:110,166,225,333`;虽有文字提示“不应硬编码”:`...:101,374`。 | 国内供应商清单变更后会导致统计失真,直接影响 `cn_takeover=100%` 验收。 | 新增配置表 `gateway_provider_taxonomy`,由 SQL 读取,禁止脚本内硬编码。 |
|
||||||
|
| FND-P1-03 | Wave Gate 没有把“路由标记覆盖率”纳入 Go 条件,评审可能在低覆盖数据上推进。 | 标记覆盖率 KPI 目标 `>=99.9%`:`router_core_takeover_metrics_sql_dashboard_v1_2026-03-17.md:384`;但 Wave-CN/Wave-Global Go 条件未包含该项:`router_core_s2_acceptance_test_cases_v1_2026-03-17.md:99-107`。 | 数据覆盖不足时,接管率结论不可靠,容易误升波或误回切。 | 在 Wave Gate 增加硬条件:`route_mark_coverage_pct >= 99.9%`,否则一律 Stop。 |
|
||||||
|
| FND-P1-04 | 专家评审轮次文件已补充预审输入,但仍缺“owner/截止日期/证据链接”的实填项。 | 预审输入已加入:`round1_architecture_review.md:6-15`、`round2_compat_billing_review.md:6-15`、`round3_security_compliance_review.md:6-15`、`round4_reliability_wargame_review.md:6-15`;正式问题清单区仍待会后填实:`round1...:24-35` 等。 | 若会后不立即回填,问题闭环会断档,影响 `EXP-006` 决议可信度。 | 会后 4 小时内必须完成 owner/due date/证据链接回填,并纳入评审秘书检查清单。 |
|
||||||
|
| FND-P1-05 | 迁移方案有技术灰度,但缺少客户成功侧“迁移中断沟通与赔付/SLA”机制。 | 迁移目标强调留存与成功率:`llm_gateway_subapi_evolution_plan_v2_2026-03-17.md:107-119`;未定义客户沟通/SLA 补救机制。 | 迁移期若出现协议回归或账务争议,商业层面容易出现升级投诉和续费风险。 | 新增 `PROD-001` 客户迁移事件分级与告知模板、`PROD-002` 账务争议 SLA 与赔付边界。 |
|
||||||
|
| FND-P1-06 | 任务单职责仍是角色占位,未落到实名与备份人。 | 角色占位说明:`subapi_integration_risk_controls_execution_tasks_v1_2026-03-17.md:20-31`。 | 发生 P0 时可能出现授权链路不清、恢复动作延迟。 | 启动会上完成实名 RACI + backup owner,并写入任务单 v1.1。 |
|
||||||
|
|
||||||
|
### 2.3 P2(建议优化)
|
||||||
|
|
||||||
|
| 编号 | 问题 | 证据 | 风险 | 建议整改 |
|
||||||
|
|---|---|---|---|---|
|
||||||
|
| FND-P2-01 | 术语“subapi/sub2api”混用,跨文档追踪成本偏高。 | 多份文档并存两种命名:例如 `sub2api_integration_readiness_checklist_2026-03-16.md:1-4` 与 `llm_gateway_subapi_evolution_plan_v2_2026-03-17.md:15-16`。 | 评审/实施时易产生“同名异义”沟通误差。 | 统一术语字典:对外叫 `subapi`,代码与仓库名保持 `sub2api`,在文档页首固定声明。 |
|
||||||
|
| FND-P2-02 | 专家执行台账字段粒度不足,难以快速看清“优先级+邀请状态+轮次覆盖”。 | 旧名册/台账字段偏基础:`review/experts_roster_2026-03-18.md:9-20`、`review/invitation_dispatch_tracker_2026-03-17.md:8-19`。 | 邀请执行过程不够透明,不利于秘书做催办与替补。 | 扩展为“专家类型/优先级/邀请状态/预计轮次/是否已签 NDA”。(本次已更新) |
|
||||||
|
|
||||||
|
## 3. 整改任务映射(新增与绑定)
|
||||||
|
|
||||||
|
| Finding | 任务ID | 类型 | 截止日期 | 验收标准 |
|
||||||
|
|---|---|---|---|---|
|
||||||
|
| FND-P0-01 | SEC-007 | 新增 | 2026-03-20 | subapi 对公网不可达,且有网络策略与扫描证据 |
|
||||||
|
| FND-P0-01 | SEC-008 | 新增 | 2026-03-24 | mTLS 双向认证联通,证书轮换演练通过 |
|
||||||
|
| FND-P0-02 | SEC-009 | 新增 | 2026-03-21 | 外部 query key 全拒绝;内部改写链路可观测 |
|
||||||
|
| FND-P1-01 | COMP-007 | 新增 | 2026-03-22 | 主路径 SQL 与 canonical 契约一致,alias 单列监控 |
|
||||||
|
| FND-P1-02 | COMP-008 | 新增 | 2026-03-22 | `cn_platforms` 改为配置表驱动 |
|
||||||
|
| FND-P1-03 | COMP-009 | 新增 | 2026-03-23 | Wave Gate 加入覆盖率硬门槛并生效 |
|
||||||
|
| FND-P1-04 | EXP-002~005 | 绑定 | 2026-03-19 起 | 四轮评审均有会前问题清单与会后闭环 |
|
||||||
|
| FND-P1-05 | PROD-001/002 | 新增 | 2026-03-24 | 发布客户沟通与赔付/SLA 流程文档 |
|
||||||
|
| FND-P1-06 | PMO-001 | 新增 | 2026-03-18 | 任务单实名 owner + backup owner 完成 |
|
||||||
|
|
||||||
|
## 4. 专家博弈输入建议(会前统一口径)
|
||||||
|
|
||||||
|
1. 先审 P0,再谈路线优选:P0 不闭环时不进入扩流讨论。
|
||||||
|
2. Round-1 必问三题:锁定风险、止血路径、替换可逆性。
|
||||||
|
3. Round-2 必核数据口径:canonical 分母、覆盖率门槛、CN 平台清单来源。
|
||||||
|
4. Round-3 必做攻防演练:query key 绕过、SSRF、内网访问阻断、审计追踪。
|
||||||
|
5. Round-4 必做恢复演练:10 分钟触发回切、30 分钟恢复、账务一致性验收。
|
||||||
|
|
||||||
|
## 5. 当前执行状态(本次已完成)
|
||||||
|
|
||||||
|
1. 已完成 skills 内部全面评审并形成分级 findings。
|
||||||
|
2. 已更新专家名册与邀请跟踪字段,显式纳入产品专家、重构项目专家、技术专家。
|
||||||
|
3. 已生成逐人邀请文本,可直接发送。
|
||||||
|
|
||||||
|
## 6. 三角色联合复审回链(2026-03-18)
|
||||||
|
|
||||||
|
1. 已补充三角色联合评审文档:`subapi_role_based_review_wargame_optimization_v1_2026-03-18.md`。
|
||||||
|
2. 已将用户代表、测试专家、网关专家纳入专家名册与邀请跟踪(E13/E14/E15)。
|
||||||
|
3. 已将联合评审任务映射到执行任务单(`UXR-*`、`TST-*`、`GAT-*`、`EXP-007`)。
|
||||||
194
docs/subapi_expert_review_wargame_plan_v1_2026-03-17.md
Normal file
194
docs/subapi_expert_review_wargame_plan_v1_2026-03-17.md
Normal file
@@ -0,0 +1,194 @@
|
|||||||
|
# Subapi 集成专家审核与博弈机制(v1)
|
||||||
|
|
||||||
|
- 版本:v1.0
|
||||||
|
- 日期:2026-03-17
|
||||||
|
- 适用范围:S1-S2(集成 subapi + 逐步替换 + 企业级商用达标)
|
||||||
|
- 关联文档:
|
||||||
|
- `llm_gateway_subapi_evolution_plan_v2_2026-03-17.md`
|
||||||
|
- `router_core_takeover_execution_plan_v3_2026-03-17.md`
|
||||||
|
- `subapi_integration_compat_security_reliability_design_v1_2026-03-17.md`
|
||||||
|
- `subapi_integration_risk_controls_execution_tasks_v1_2026-03-17.md`
|
||||||
|
|
||||||
|
## 1. 机制目标
|
||||||
|
|
||||||
|
1. 在实施前对架构、兼容、安全、可靠性和商业可运营性做“全面独立审查”。
|
||||||
|
2. 通过“对抗式博弈(Red vs Blue)”提前暴露盲点,而不是上线后被事故教育。
|
||||||
|
3. 形成可执行的 GO / CONDITIONAL GO / NO-GO 决策与整改清单。
|
||||||
|
|
||||||
|
## 2. 专家组成(建议 9-11 人)
|
||||||
|
|
||||||
|
## 2.1 内部专家(必选)
|
||||||
|
|
||||||
|
1. 架构负责人(Router Core 负责人)。
|
||||||
|
2. 平台工程负责人(CI/CD、发布与配置)。
|
||||||
|
3. SRE 负责人(稳定性与故障恢复)。
|
||||||
|
4. 安全负责人(应用安全 + 云安全)。
|
||||||
|
5. 计费/财务数据负责人(对账与幂等)。
|
||||||
|
6. 合规/法务接口人(ToS、审计与数据合规)。
|
||||||
|
7. 产品负责人(商用目标与边界)。
|
||||||
|
8. 测试负责人(兼容回归与质量门禁)。
|
||||||
|
|
||||||
|
## 2.2 外部专家(建议)
|
||||||
|
|
||||||
|
1. LLM 网关实战专家(有多供应商网关生产经验)。
|
||||||
|
2. 攻防专家(API 安全、SSRF、密钥泄漏与供应链风险)。
|
||||||
|
3. 高并发系统专家(流式、重试、背压与故障隔离)。
|
||||||
|
4. 可选:企业采购/交付顾问(SLA、审计、交付可行性)。
|
||||||
|
5. 核心用户代表(迁移试点客户,验证真实可用性)。
|
||||||
|
6. 重构项目专家(大规模替换路径与技术债治理)。
|
||||||
|
|
||||||
|
## 2.3 回避与独立性规则
|
||||||
|
|
||||||
|
1. 同一模块负责人不能单独裁定自己模块通过。
|
||||||
|
2. 安全与法务拥有一票否决权(P0 风险未关闭时)。
|
||||||
|
3. 所有评审结论必须记录反对意见,不允许“口头通过”。
|
||||||
|
|
||||||
|
## 3. 博弈规则(防止形式主义评审)
|
||||||
|
|
||||||
|
## 3.1 Red Team vs Blue Team
|
||||||
|
|
||||||
|
1. Blue Team:提出当前方案“为何可行”。
|
||||||
|
2. Red Team:站在“攻击者 + 故障 + 合规审计 + 客户投诉”视角拆解方案。
|
||||||
|
3. 每轮必须回答三个问题:
|
||||||
|
- 这个设计最先会在哪个条件下失败?
|
||||||
|
- 失败后会造成什么业务损失?
|
||||||
|
- 30 分钟内如何止血并可审计复盘?
|
||||||
|
|
||||||
|
## 3.2 强制替代方案对比
|
||||||
|
|
||||||
|
每个关键决策至少对比 2 个方案(例如:
|
||||||
|
`subapi 外部模块化` vs `深度 fork`),并给出:
|
||||||
|
|
||||||
|
1. 复杂度成本。
|
||||||
|
2. 失败半径。
|
||||||
|
3. 回滚难度。
|
||||||
|
4. 团队运维负担。
|
||||||
|
|
||||||
|
## 3.3 预演失败(Pre-mortem)
|
||||||
|
|
||||||
|
假设“2026-06-30 项目失败”,倒推失败原因并映射到当前行动项:
|
||||||
|
|
||||||
|
1. 兼容回归导致客户 SDK 大面积失败。
|
||||||
|
2. 账务冲突引发客户争议与财务风险。
|
||||||
|
3. 安全配置疏漏导致 SSRF/密钥风险事件。
|
||||||
|
4. 运维流程复杂导致故障恢复超时。
|
||||||
|
|
||||||
|
## 4. 审核维度与评分模型
|
||||||
|
|
||||||
|
## 4.1 评分维度(100 分制)
|
||||||
|
|
||||||
|
| 维度 | 权重 | 核心问题 |
|
||||||
|
|---|---:|---|
|
||||||
|
| 兼容性 | 25 | 协议、错误语义、流式边界是否稳定 |
|
||||||
|
| 安全性 | 25 | SSRF、鉴权、密钥、供应链与审计是否可控 |
|
||||||
|
| 可靠性 | 20 | 故障隔离、熔断、回滚、恢复时间是否达标 |
|
||||||
|
| 运维简化 | 15 | 是否可标准化操作,是否减少人肉介入 |
|
||||||
|
| 账务正确性 | 10 | 幂等、对账、冲突告警是否闭环 |
|
||||||
|
| 合规可审计 | 5 | ToS、审计链、证据导出是否完整 |
|
||||||
|
|
||||||
|
## 4.2 通过门槛
|
||||||
|
|
||||||
|
1. 总分 >= 85。
|
||||||
|
2. 任一维度不得低于 70。
|
||||||
|
3. 安全/合规存在 P0 未闭环:直接 NO-GO。
|
||||||
|
|
||||||
|
## 5. 四轮审核流程(建议)
|
||||||
|
|
||||||
|
## Round-1:架构与替换路径审核(2026-03-19)
|
||||||
|
|
||||||
|
1. 输入:v2/v3 规划文档、替换路径图、接口边界。
|
||||||
|
2. 重点:是否能从“集成”平滑走到“替换”,且不锁死在 subapi。
|
||||||
|
3. 输出:架构问题清单(含优先级与 owner)。
|
||||||
|
4. 必邀角色:架构负责人、重构项目专家、网关专家、核心用户代表。
|
||||||
|
|
||||||
|
## Round-2:兼容与计费一致性审核(2026-03-22)
|
||||||
|
|
||||||
|
1. 输入:Connector 契约、接管率 SQL、验收用例。
|
||||||
|
2. 重点:协议兼容、错误归一、流式 no-replay、幂等扣费。
|
||||||
|
3. 输出:兼容差异矩阵 + 账务风险清单。
|
||||||
|
4. 必邀角色:测试专家、网关专家、计费负责人、核心用户代表。
|
||||||
|
|
||||||
|
## Round-3:安全与合规攻防审核(2026-03-25)
|
||||||
|
|
||||||
|
1. 输入:配置硬化基线、认证链路、ToS 风险台账。
|
||||||
|
2. 重点:query key、SSRF、出网策略、凭证安全、审计可追溯。
|
||||||
|
3. 输出:安全整改单(P0/P1/P2)+ 合规结论。
|
||||||
|
4. 必邀角色:安全负责人、测试专家、法务接口人、安全攻防专家。
|
||||||
|
|
||||||
|
## Round-4:可靠性与运维演练审核(2026-03-29)
|
||||||
|
|
||||||
|
1. 输入:告警看板、Runbook、回滚脚本。
|
||||||
|
2. 重点:升级失败自动回退、30 分钟恢复、值班可执行性。
|
||||||
|
3. 输出:演练报告 + GO/CONDITIONAL GO/NO-GO 决议。
|
||||||
|
4. 必邀角色:SRE、测试专家、产品负责人、核心用户代表。
|
||||||
|
|
||||||
|
## 6. 决策与治理机制
|
||||||
|
|
||||||
|
## 6.1 决策类型
|
||||||
|
|
||||||
|
1. `GO`:可按计划推进。
|
||||||
|
2. `CONDITIONAL GO`:允许推进,但必须在明确日期前关闭指定问题。
|
||||||
|
3. `NO-GO`:冻结升波,先整改后复审。
|
||||||
|
|
||||||
|
## 6.2 一票否决条件(任一满足)
|
||||||
|
|
||||||
|
1. 存在未闭环安全 P0 风险。
|
||||||
|
2. 存在账务正确性 P0 风险。
|
||||||
|
3. 无法在 30 分钟内完成回滚恢复演练。
|
||||||
|
4. 法务未给出 ToS 风险可接受结论。
|
||||||
|
|
||||||
|
## 6.3 争议升级路径
|
||||||
|
|
||||||
|
1. 技术争议:架构负责人 + SRE + 安全三方联裁。
|
||||||
|
2. 商业/合规争议:产品负责人 + 法务 + 管理层联裁。
|
||||||
|
3. 所有联裁必须形成 ADR(Architecture Decision Record)。
|
||||||
|
|
||||||
|
## 7. 必备评审材料(会前 24h 发出)
|
||||||
|
|
||||||
|
1. 架构图与替换路线图(现状/目标/过渡)。
|
||||||
|
2. 接口契约与兼容差异报告。
|
||||||
|
3. 风险清单(含 P0/P1/P2、状态、owner、截止日期)。
|
||||||
|
4. 近两周指标快照(P95、5xx、接管率、账务冲突率)。
|
||||||
|
5. 上次整改项关闭证据。
|
||||||
|
|
||||||
|
## 8. 评审输出模板(必须留档)
|
||||||
|
|
||||||
|
1. 评分表(总分 + 分维度评分)。
|
||||||
|
2. 问题清单(编号、等级、负责人、截止日期)。
|
||||||
|
3. 决议结果(GO/CONDITIONAL GO/NO-GO)。
|
||||||
|
4. 风险接受记录(谁批准、基于什么证据)。
|
||||||
|
|
||||||
|
## 9. 与两周任务单的绑定方式
|
||||||
|
|
||||||
|
1. 每轮专家评审都要映射到任务单任务 ID(COMP/SEC/REL/EXP)。
|
||||||
|
2. 若评审新增问题,必须生成新任务 ID 并进入 daily gate。
|
||||||
|
3. Round-4 决议是两周验收(2026-03-31)的前置条件。
|
||||||
|
4. 三角色联合复审(`EXP-007`)是 `EXP-006` 最终决议前置条件之一。
|
||||||
|
|
||||||
|
## 10. 立即可执行动作(本周)
|
||||||
|
|
||||||
|
1. 确认专家名单与角色映射(内部 + 外部)。
|
||||||
|
2. 发布 Round-1 邀请与会前材料清单。
|
||||||
|
3. 指定评审秘书,负责记录与问题跟踪。
|
||||||
|
4. 将评审结论同步到风险任务单与周报。
|
||||||
|
|
||||||
|
## 11. 已准备好的执行模板(可直接使用)
|
||||||
|
|
||||||
|
1. 专家名单与回避:`review/experts_roster_2026-03-18.md`
|
||||||
|
2. 邮件邀请模板:`review/templates/expert_invitation_email_templates_2026-03-17.md`
|
||||||
|
3. IM 邀请模板:`review/templates/expert_im_message_templates_2026-03-17.md`
|
||||||
|
4. 外部专家保密与回避声明:`review/templates/external_expert_nda_coi_template_2026-03-17.md`
|
||||||
|
5. 评分表模板:`review/templates/expert_review_scorecard_2026-03-17.md`
|
||||||
|
6. 会议纪要模板:`review/templates/expert_review_minutes_template_2026-03-17.md`
|
||||||
|
7. 问题台账模板:`review/templates/expert_issue_register_template_2026-03-17.md`
|
||||||
|
8. 邀请发送跟踪台账:`review/invitation_dispatch_tracker_2026-03-17.md`
|
||||||
|
9. 邀请发出前检查单:`review/dispatch_ready_checklist_2026-03-17.md`
|
||||||
|
|
||||||
|
## 12. 三角色联合评审输入(新增)
|
||||||
|
|
||||||
|
为强化“用户可接受性 + 质量阻断 + 网关可替换性”的联合校验,新增:
|
||||||
|
|
||||||
|
1. `subapi_role_based_review_wargame_optimization_v1_2026-03-18.md`
|
||||||
|
- 用户代表、测试专家、网关专家三角色的 Red/Blue 博弈结论
|
||||||
|
- 新增任务 `UXR/TST/GAT/EXP-007` 映射
|
||||||
|
- 作为 Round-2 与 Round-4 的强制预读材料
|
||||||
@@ -0,0 +1,199 @@
|
|||||||
|
# Subapi 集成兼容性、安全与运维可靠性设计(v1.1)
|
||||||
|
|
||||||
|
- 版本:v1.1
|
||||||
|
- 日期:2026-03-24
|
||||||
|
- 适用阶段:S1-S2
|
||||||
|
- 关联文档:
|
||||||
|
- `subapi_connector_contract_v1_2026-03-17.md`
|
||||||
|
- `sub2api_integration_readiness_checklist_2026-03-16.md`
|
||||||
|
- `router_core_takeover_execution_plan_v3_2026-03-17.md`
|
||||||
|
- `acceptance_gate_single_source_v1_2026-03-18.md`(v1.1)
|
||||||
|
- `llm_gateway_subapi_evolution_plan_v4_2_2026-03-24.md`
|
||||||
|
|
||||||
|
## 1. 结论先行(回答四个核心问题)
|
||||||
|
|
||||||
|
1. 我们是否考虑了与 subapi 的技术兼容性:
|
||||||
|
- 已考虑了“契约层兼容”和“灰度回滚”,但还缺“版本漂移自动检测 + 协议回归自动阻断 + 兼容风险分级”的闭环。
|
||||||
|
|
||||||
|
2. 我们是否了解 subapi 的安全风险:
|
||||||
|
- 已识别部分风险,但需要把风险从“清单级”升级到“强制控制级”。
|
||||||
|
- 典型高风险包括:URL allowlist 默认关闭、私网/HTTP 默认放开、Gemini 路径保留 query key 兼容、Simple 模式会跳过关键计费校验。
|
||||||
|
|
||||||
|
3. 集成 subapi 如何防止风险:
|
||||||
|
- 必须采用“外部服务模块化 + 网络隔离 + 配置硬化 + 双层鉴权 + 契约测试闸门 + 安全告警”组合拳,不接受单点措施。
|
||||||
|
|
||||||
|
4. 架构是否兼顾运维简单和可靠性:
|
||||||
|
- 当前方向正确(灰度、回滚、观测已有),但还需补齐“最小化运维复杂度”的具体机制:单一控制面、统一配置发布、标准化 runbook、以及故障域隔离(cell 化)。
|
||||||
|
|
||||||
|
## 2. 兼容性设计(防止实施期协议翻车)
|
||||||
|
|
||||||
|
## 2.1 已有基础(可复用)
|
||||||
|
|
||||||
|
1. Connector 契约已定义 canonical 端点、错误归一、流式边界、重试约束。
|
||||||
|
2. S2 迁移路径已定义 Wave 灰度与 stop/go 条件。
|
||||||
|
3. 已有接管率与验收用例文档可做质量门禁基础。
|
||||||
|
|
||||||
|
## 2.2 仍需补齐的兼容闭环
|
||||||
|
|
||||||
|
新增三层闸门(必须全部启用):
|
||||||
|
|
||||||
|
1. **Schema Gate(接口形态)**
|
||||||
|
- 校验请求/响应 JSON 结构、必填字段、字段类型、错误码结构。
|
||||||
|
- 重点接口:`/v1/messages`、`/v1/chat/completions`、`/v1/responses`、`/v1beta/models/*`。
|
||||||
|
|
||||||
|
2. **Behavior Gate(行为语义)**
|
||||||
|
- 校验 streaming 行为、首 token 前后错误处理、no-replay 规则。
|
||||||
|
- 校验 header 优先级、模型映射、会话粘性、fallback 行为。
|
||||||
|
|
||||||
|
3. **Performance Gate(性能与稳定)**
|
||||||
|
- 校验 P95、5xx、超时率、账务差错率、幂等冲突率。
|
||||||
|
- 不达标直接阻断升级,回退上一稳定版本。
|
||||||
|
|
||||||
|
## 2.3 兼容性风险分级(建议)
|
||||||
|
|
||||||
|
| 等级 | 定义 | 示例 | 处理策略 |
|
||||||
|
|---|---|---|---|
|
||||||
|
| P0 | 会导致错误计费、协议不可用或大面积失败 | 流式 replay、错误码语义变化导致无限重试 | 立即阻断上线,强制回退 |
|
||||||
|
| P1 | 影响部分场景或部分租户 | 某端点字段新增导致旧 SDK 解析失败 | 灰度暂停,48h 内修复 |
|
||||||
|
| P2 | 非核心功能偏差 | 次要元数据缺失 | 记录并进入下个迭代 |
|
||||||
|
|
||||||
|
## 3. subapi 安全风险台账(针对当前代码事实)
|
||||||
|
|
||||||
|
## 3.1 配置默认值风险
|
||||||
|
|
||||||
|
1. `security.url_allowlist.enabled=false`(默认关闭)。
|
||||||
|
2. `allow_private_hosts=true`、`allow_insecure_http=true`(默认放开)。
|
||||||
|
3. 这意味着若不硬化,SSRF/内网访问风险面会扩大。
|
||||||
|
|
||||||
|
## 3.2 认证与参数风险
|
||||||
|
|
||||||
|
1. 常规 API 已拒绝 query key(`key`/`api_key`)。
|
||||||
|
2. 但 Gemini 路径保留了 `query key` 兼容(`/v1beta*`),需要在我方入口再做强制拦截与改写策略。
|
||||||
|
|
||||||
|
## 3.3 运行模式风险
|
||||||
|
|
||||||
|
1. `run_mode=simple` 下,认证后会跳过部分计费/订阅校验流程。
|
||||||
|
2. 生产若误用 simple,会带来额度与计费控制失效风险。
|
||||||
|
|
||||||
|
## 3.4 代理与来源 IP 风险
|
||||||
|
|
||||||
|
1. `server.trusted_proxies` 默认空数组。
|
||||||
|
2. 虽有告警日志,但若部署链路未正确设置,来源 IP 信任链可能失真,影响风控与审计。
|
||||||
|
|
||||||
|
## 3.5 合规与法律风险
|
||||||
|
|
||||||
|
1. 上游仓库 README 明确提示 ToS 风险与研究用途声明。
|
||||||
|
2. MIT 许可仅覆盖开源代码使用,不覆盖上游模型服务条款合规。
|
||||||
|
|
||||||
|
## 4. 风险防护设计(集成必须项)
|
||||||
|
|
||||||
|
## 4.1 网络与边界隔离
|
||||||
|
|
||||||
|
1. subapi 只允许内网访问,不直接暴露公网。
|
||||||
|
2. 主网关与 subapi 之间启用 mTLS(至少双向证书校验)。
|
||||||
|
3. 出网层做 egress allowlist(域名/IP 双层),禁直连内网和不受信目的地。
|
||||||
|
|
||||||
|
## 4.2 配置硬化基线(生产强制)
|
||||||
|
|
||||||
|
1. `run_mode=standard`。
|
||||||
|
2. `security.url_allowlist.enabled=true`。
|
||||||
|
3. `security.url_allowlist.allow_private_hosts=false`。
|
||||||
|
4. `security.url_allowlist.allow_insecure_http=false`。
|
||||||
|
5. `billing.circuit_breaker.enabled=true`。
|
||||||
|
6. `server.trusted_proxies` 必须显式配置。
|
||||||
|
|
||||||
|
## 4.3 认证与密钥策略
|
||||||
|
|
||||||
|
1. 我方北向入口禁止 query key;统一只接收 header 鉴权。
|
||||||
|
2. 对 Gemini 兼容流量,在入口层将 query key 转换为 header 后再转发,外部请求直接带 query key 一律拒绝。
|
||||||
|
3. API Key 与上游凭证分离管理,凭证仅在 Adapter 层短时解密。
|
||||||
|
4. 需求方仅可使用平台签发凭证访问平台入口,`platform_credential_ingress_coverage_pct` 必须为 100%。
|
||||||
|
5. 禁止向需求方返回供应方上游凭证(API/控制台/导出/错误信息均不允许)。
|
||||||
|
6. 需求方绕过平台直连供应方视为 P0 安全事件,必须可观测、可告警、可阻断。
|
||||||
|
|
||||||
|
## 4.4 版本与发布治理
|
||||||
|
|
||||||
|
1. 固定 subapi 精确版本(`vX.Y.Z`),禁止漂移。
|
||||||
|
2. 每次升级必须通过 Schema/Behavior/Performance 三重 Gate。
|
||||||
|
3. 灰度比例:5% -> 20% -> 50% -> 100%。
|
||||||
|
4. 任一 P0 风险触发,自动回退上一稳定版本。
|
||||||
|
|
||||||
|
## 4.5 观测与审计
|
||||||
|
|
||||||
|
1. 强制 request_id 全链路透传。
|
||||||
|
2. 记录 `router_engine`、`inbound_endpoint`、`upstream_endpoint`、`request_type`。
|
||||||
|
3. 安全告警纳入统一事件中心:
|
||||||
|
- query key 拦截次数
|
||||||
|
- 非法上游域名命中
|
||||||
|
- 私网地址访问拦截
|
||||||
|
- 账务冲突率突增
|
||||||
|
- 供应方上游凭证泄露事件(`supplier_credential_exposure_events`)
|
||||||
|
- 需求方绕过平台直连供应方事件(`direct_supplier_call_by_consumer_events`)
|
||||||
|
- 平台凭证入站覆盖率下降(`platform_credential_ingress_coverage_pct < 100%`)
|
||||||
|
|
||||||
|
## 5. 运维简单 + 可靠性架构(目标态)
|
||||||
|
|
||||||
|
## 5.1 运维简单(减少人肉操作)
|
||||||
|
|
||||||
|
1. **单一控制面**:所有路由开关、灰度比例、熔断阈值在我方控制面发布。
|
||||||
|
2. **单一发布流水线**:subapi 升级与 Router Core 升级共享同一套 Gate 与回滚动作。
|
||||||
|
3. **标准化运行手册**:按“告警 -> 判断 -> 操作 -> 验证”四段式 Runbook 固化。
|
||||||
|
|
||||||
|
## 5.2 高可靠(避免级联故障)
|
||||||
|
|
||||||
|
1. **故障域隔离(Cell)**:按租户或区域切分 subapi 实例池,避免单点故障扩散。
|
||||||
|
2. **双通路兜底**:自研主路径 + subapi connector 兜底,并支持一键回切。
|
||||||
|
3. **幂等与补偿**:请求级幂等扣费 + 冲突告警 + 对账补偿任务。
|
||||||
|
4. **流式保护**:首字输出后禁止 replay,防止双流拼接与重复扣费。
|
||||||
|
|
||||||
|
## 5.3 SLO 与错误预算(建议)
|
||||||
|
|
||||||
|
1. 可用性 SLO:99.9%(网关维度)。
|
||||||
|
2. 附加延迟 SLO:P95 <= 60ms。
|
||||||
|
3. 账务正确性 SLO:差错率 <= 0.1%,冲突率 <= 0.01%。
|
||||||
|
4. 每周审查 error budget;超预算自动冻结升波。
|
||||||
|
|
||||||
|
## 6. 两周内可落地动作(最小闭环)
|
||||||
|
|
||||||
|
1. 新增“兼容三重 Gate”流水线,并接入升级流程。
|
||||||
|
2. 生产配置硬化巡检(按 4.2 清单逐项验收)。
|
||||||
|
3. 在入口层落地“query key 全拦截(含 Gemini 兼容改写)”。
|
||||||
|
4. 建立安全告警面板(SSRF 拦截、query key 拦截、账务冲突)。
|
||||||
|
5. 增加凭证边界专项检查(上游凭证零外发 + 平台凭证入站覆盖率100%)。
|
||||||
|
6. 完成一次“升级 + 灰度 + 自动回滚”演练并沉淀复盘。
|
||||||
|
|
||||||
|
## 7. 验收标准(本设计是否落地)
|
||||||
|
|
||||||
|
1. 任一 subapi 升级都能产出 Gate 报告并可追溯。
|
||||||
|
2. 生产环境不存在宽松高风险配置(4.2 全部满足)。
|
||||||
|
3. 发生兼容或安全异常时,30 分钟内可回切到稳定版本。
|
||||||
|
4. 需求方仅使用平台凭证入站,`platform_credential_ingress_coverage_pct = 100%`。
|
||||||
|
5. `supplier_credential_exposure_events = 0` 且 `direct_supplier_call_by_consumer_events = 0`。
|
||||||
|
6. 运维团队按 Runbook 可独立处置常见告警,无需临时拍脑袋决策。
|
||||||
|
|
||||||
|
## 8. 代码证据(用于本设计判断)
|
||||||
|
|
||||||
|
1. 默认安全配置与告警日志:
|
||||||
|
`backend/internal/config/config.go`
|
||||||
|
2. API Key 鉴权与 simple mode 逻辑:
|
||||||
|
`backend/internal/server/middleware/api_key_auth.go`
|
||||||
|
3. Gemini 认证优先级与 query key 兼容:
|
||||||
|
`backend/internal/server/middleware/api_key_auth_google.go`
|
||||||
|
4. URL 校验器(含 allowlist/private/insecure 与 DNS rebinding 注释):
|
||||||
|
`backend/internal/util/urlvalidator/validator.go`
|
||||||
|
5. trusted proxies 与 release 模式告警:
|
||||||
|
`backend/internal/server/http.go`
|
||||||
|
6. 上游 ToS 风险提示:
|
||||||
|
`README.md`(sub2api 仓库)
|
||||||
|
|
||||||
|
## 9. 执行任务单(新增)
|
||||||
|
|
||||||
|
为将本设计转换为可执行排期,新增任务单:
|
||||||
|
|
||||||
|
1. `subapi_integration_risk_controls_execution_tasks_v1_2026-03-17.md`
|
||||||
|
- 两周里程碑(2026-03-18 至 2026-03-31)
|
||||||
|
- 任务 ID / 责任角色 / 截止日期 / 验收标准 / 证据产物
|
||||||
|
- Daily Gate / Weekly Gate / 回滚演练闭环
|
||||||
|
2. `subapi_expert_review_wargame_plan_v1_2026-03-17.md`
|
||||||
|
- 专家组成、对抗式博弈规则、评分模型与一票否决条件
|
||||||
|
- 四轮评审(架构/兼容计费/安全合规/可靠性演练)与最终决议机制
|
||||||
@@ -0,0 +1,223 @@
|
|||||||
|
# Subapi 集成风险控制实施任务单(两周执行版,v1.4)
|
||||||
|
|
||||||
|
- 版本:v1.4
|
||||||
|
- 日期:2026-03-25
|
||||||
|
- 执行窗口:2026-03-18 至 2026-03-31(两周)
|
||||||
|
- 关联文档:
|
||||||
|
- `subapi_integration_compat_security_reliability_design_v1_2026-03-17.md`
|
||||||
|
- `subapi_expert_review_wargame_plan_v1_2026-03-17.md`
|
||||||
|
- `router_core_takeover_execution_plan_v3_2026-03-17.md`
|
||||||
|
- `router_core_takeover_metrics_sql_dashboard_v1_2026-03-17.md`
|
||||||
|
- `router_core_s2_acceptance_test_cases_v1_2026-03-17.md`
|
||||||
|
- `acceptance_gate_single_source_v1_2026-03-18.md`(v1.1, 2026-03-24)
|
||||||
|
- `llm_gateway_subapi_evolution_plan_v4_2_2026-03-24.md`
|
||||||
|
- `supply_button_level_prd_v1_2026-03-25.md`
|
||||||
|
- `supply_api_contract_openapi_draft_v1_2026-03-25.yaml`
|
||||||
|
- `supply_ui_test_cases_executable_v1_2026-03-25.md`
|
||||||
|
- `supply_gate_command_playbook_v1_2026-03-25.md`
|
||||||
|
- `supply_technical_design_enhanced_v1_2026-03-25.md`
|
||||||
|
- `supply_test_plan_enhanced_v1_2026-03-25.md`
|
||||||
|
- `supply_uiux_design_spec_v1_2026-03-25.md`
|
||||||
|
- `tests/supply/ui_design_qa_cases_v1_2026-03-25.md`
|
||||||
|
- `reports/supply_gate_preflight_2026-03-25.md`
|
||||||
|
- `review/multi_expert_planning_review_v1_2026-03-25.md`
|
||||||
|
|
||||||
|
## 1. 执行目标(两周必须达成)
|
||||||
|
|
||||||
|
1. 建立 subapi 升级“兼容三重 Gate”(Schema/Behavior/Performance)并接入发布前闸门。
|
||||||
|
2. 完成生产配置硬化,消除已识别高风险默认项。
|
||||||
|
3. 建立安全告警和回滚演练闭环,确保 30 分钟内可回切。
|
||||||
|
4. 将风险控制纳入日常运维流程,避免靠临时人工判断。
|
||||||
|
5. 建立“凭证边界”硬门禁:需求方仅用平台凭证,供应方上游凭证零外发。
|
||||||
|
6. 建立供应侧发布门禁链路(SUP):账号挂载 -> 套餐发布 -> 结算提现全链路可验收。
|
||||||
|
7. 建立四专家整改发布链路(XR):技术/测试/UIUX/业主条款与门禁统一闭环。
|
||||||
|
|
||||||
|
## 2. 责任角色映射(实名RACI)
|
||||||
|
|
||||||
|
| 角色 | 实名负责人(主/备) | 职责 |
|
||||||
|
|---|---|---|
|
||||||
|
| `ARCH`(架构负责人) | 王磊 / 赵凯 | 兼容策略、闸门标准、最终技术裁决 |
|
||||||
|
| `PLAT`(平台工程) | 李娜 / 陈涛 | 流水线、配置发布、网关接入改造 |
|
||||||
|
| `SEC`(安全负责人) | 周敏 / 郭强 | 安全基线、威胁验证、告警策略 |
|
||||||
|
| `SRE`(稳定性负责人) | 刘洋 / 韩雪 | 监控、演练、故障响应与回滚 |
|
||||||
|
| `QA`(测试负责人) | 孙悦 / 吴航 | 契约回归、流式边界、验收报告 |
|
||||||
|
| `FIN`(计费/数据) | 何静 / 彭程 | 对账、幂等冲突监控、成本异常告警 |
|
||||||
|
|
||||||
|
说明:任务表中的角色标识(`ARCH/PLAT/SEC/SRE/QA/FIN`)按本表实名映射执行,并纳入 on-call 值班表。
|
||||||
|
|
||||||
|
## 3. 两周里程碑(绝对日期)
|
||||||
|
|
||||||
|
| 里程碑 | 截止日期 | 验收条件 |
|
||||||
|
|---|---|---|
|
||||||
|
| M1:基线冻结 | 2026-03-20 | 风险清单冻结;硬化项可检测 |
|
||||||
|
| M2:兼容闸门联通 | 2026-03-24 | 三重 Gate 在 CI 可执行并有报告 |
|
||||||
|
| M3:安全硬化完成 | 2026-03-27 | 高风险默认项全部改为生产安全值 |
|
||||||
|
| M4:回滚演练通过 | 2026-03-30 | 升级失败自动回退演练完成 |
|
||||||
|
| M5:两周验收 | 2026-03-31 | 交付证据包齐全并评审通过 |
|
||||||
|
|
||||||
|
## 4. 任务清单(可直接排期)
|
||||||
|
|
||||||
|
## 4.1 Workstream A:兼容性 Gate
|
||||||
|
|
||||||
|
| 任务ID | 任务 | Owner | 截止日期 | 依赖 | 验收标准 | 证据产物 |
|
||||||
|
|---|---|---|---|---|---|---|
|
||||||
|
| COMP-001 | 固化 canonical 端点矩阵(OpenAI/Anthropic/Gemini) | `ARCH` + `QA` | 2026-03-19 | 无 | 端点矩阵冻结并评审通过 | `docs/compat/canonical_endpoint_matrix.md` |
|
||||||
|
| COMP-002 | 产出 Schema Gate 用例(请求/响应/错误结构) | `QA` | 2026-03-21 | COMP-001 | 覆盖核心 6 条链路;失败可定位到字段级 | `tests/compat/schema_gate_report.md` |
|
||||||
|
| COMP-003 | 产出 Behavior Gate 用例(流式/no-replay/重试) | `QA` | 2026-03-22 | COMP-001 | 覆盖流式边界与错误语义;无歧义 case | `tests/compat/behavior_gate_report.md` |
|
||||||
|
| COMP-004 | 产出 Performance Gate 阈值脚本(P95/5xx/账务) | `SRE` + `FIN` | 2026-03-23 | COMP-001 | 阈值可配置,支持阻断 | `scripts/gate/perf_gate_check.sh` |
|
||||||
|
| COMP-005 | 三重 Gate 接入发布流水线 | `PLAT` | 2026-03-24 | COMP-002/003/004 | 发布前自动执行,任一失败阻断发布 | CI 记录 + Gate 汇总报告 |
|
||||||
|
| COMP-006 | 定义兼容风险分级处置(P0/P1/P2) | `ARCH` | 2026-03-24 | COMP-005 | 每级别有明确响应时限与动作 | `docs/compat/risk_severity_playbook.md` |
|
||||||
|
| COMP-007 | 主路径 SQL 与 canonical 契约对齐(移除 alias/空端点歧义) | `ARCH` + `FIN` | 2026-03-22 | COMP-001 | 验收分母仅包含 canonical 主路径 | `sql/takeover_main_path_canonical.sql` |
|
||||||
|
| COMP-008 | 国内平台清单配置化(替代 SQL 硬编码) | `PLAT` + `FIN` | 2026-03-22 | COMP-007 | `cn_platforms` 来自配置表/配置中心 | `docs/compat/cn_platform_mapping.md` |
|
||||||
|
| COMP-009 | Wave Gate 增加 `route_mark_coverage>=99.9%` 硬门槛 | `ARCH` + `QA` | 2026-03-23 | COMP-007 | 覆盖率不达标自动 Stop | Wave Gate 配置快照 + 演练记录 |
|
||||||
|
|
||||||
|
## 4.2 Workstream B:安全硬化
|
||||||
|
|
||||||
|
| 任务ID | 任务 | Owner | 截止日期 | 依赖 | 验收标准 | 证据产物 |
|
||||||
|
|---|---|---|---|---|---|---|
|
||||||
|
| SEC-001 | 生产配置扫描器(检测高风险默认项) | `SEC` + `PLAT` | 2026-03-20 | 无 | 可检测 run_mode/url_allowlist/private/http/trusted_proxies | `scripts/security/config_hardening_scan.sh` |
|
||||||
|
| SEC-002 | 生产环境配置硬化发布(标准模式+URL 策略) | `PLAT` | 2026-03-25 | SEC-001 | 所有环境通过扫描,无高危项 | 发布变更单 + 前后配置 diff |
|
||||||
|
| SEC-003 | 北向入口 query key 全拦截策略上线 | `PLAT` + `SEC` | 2026-03-25 | SEC-001 | 外部 query key 请求全部拒绝并告警 | 网关策略配置 + 拦截日志样本 |
|
||||||
|
| SEC-004 | Gemini 兼容请求 header 改写策略(内转) | `PLAT` | 2026-03-26 | SEC-003 | 兼容客户端可用,且不暴露 query key 通路 | 联调记录 + 回归测试报告 |
|
||||||
|
| SEC-005 | 出网 egress allowlist 与私网访问阻断 | `SEC` + `SRE` | 2026-03-27 | SEC-002 | 未授权域名/私网访问被阻断 | 防火墙/代理策略快照 + 拦截告警 |
|
||||||
|
| SEC-006 | ToS 合规审查记录归档(法务接口) | `SEC` | 2026-03-27 | 无 | 上游条款风险有书面结论 | `compliance/subapi_tos_assessment_2026-03-27.pdf` |
|
||||||
|
| SEC-007 | subapi 内网隔离与公网不可达验证 | `SEC` + `SRE` | 2026-03-20 | SEC-001 | subapi 服务不对公网开放,扫描验证通过 | 网络策略清单 + 连通性测试报告 |
|
||||||
|
| SEC-008 | 网关<->subapi mTLS 双向认证与证书轮换演练 | `PLAT` + `SEC` | 2026-03-24 | SEC-007 | 双向证书校验生效,轮换不影响可用性 | mTLS 配置 + 轮换演练报告 |
|
||||||
|
| SEC-009 | query key 外拒内转策略强制测试 | `SEC` + `QA` | 2026-03-27 | SEC-003, SEC-004 | 外部 query key 全拒绝,内部改写链路可追踪 | `tests/security/query_key_boundary_report.md` |
|
||||||
|
| SEC-010 | 供应方上游凭证泄露扫描与脱敏基线 | `SEC` + `PLAT` | 2026-03-26 | SEC-002 | `supplier_credential_exposure_events=0`,日志/报表无敏感片段 | `tests/security/credential_exposure_scan_report.md` |
|
||||||
|
| SEC-011 | 需求方绕过平台直连供应方检测策略上线 | `SEC` + `SRE` | 2026-03-27 | SEC-005 | `direct_supplier_call_by_consumer_events=0` 可观测可告警 | `docs/security/direct_supplier_call_detection_v1.md` |
|
||||||
|
| SEC-012 | 平台凭证入站覆盖率审计任务 | `PLAT` + `SEC` | 2026-03-26 | SEC-003 | `platform_credential_ingress_coverage_pct=100%` | `reports/security/platform_credential_ingress_coverage_2026-03-26.md` |
|
||||||
|
|
||||||
|
## 4.3 Workstream C:运维简单与可靠性
|
||||||
|
|
||||||
|
| 任务ID | 任务 | Owner | 截止日期 | 依赖 | 验收标准 | 证据产物 |
|
||||||
|
|---|---|---|---|---|---|---|
|
||||||
|
| REL-001 | 单一控制面发布流程定义(变更入口统一) | `ARCH` + `PLAT` | 2026-03-21 | 无 | 路由开关/灰度/熔断统一入口 | `docs/ops/unified_change_flow.md` |
|
||||||
|
| REL-002 | 安全+兼容+质量告警看板搭建 | `SRE` | 2026-03-26 | COMP-005, SEC-002 | 包含 query key、SSRF、冲突率、takeover | 看板截图 + 指标定义清单 |
|
||||||
|
| REL-003 | 回滚自动化脚本(版本回切) | `PLAT` + `SRE` | 2026-03-27 | COMP-005 | 10 分钟内触发回切,30 分钟内恢复 | `scripts/release/rollback_subapi.sh` + 演练日志 |
|
||||||
|
| REL-004 | Runbook 标准化(告警->判断->操作->验证) | `SRE` | 2026-03-28 | REL-002 | 至少覆盖 8 类高频告警 | `docs/runbook/subapi_integration_runbook_v1.md` |
|
||||||
|
| REL-005 | 一次完整演练(升级失败自动回退) | `SRE` + `QA` | 2026-03-30 | REL-003, REL-004 | 演练成功且复盘闭环 | 演练记录 + 复盘报告 |
|
||||||
|
| REL-006 | 两周验收评审与风险复盘 | `ARCH` + 全员 | 2026-03-31 | 全部任务 | 验收结论明确(通过/有条件通过/不通过) | `reports/sprint_risk_control_review_2026-03-31.md` |
|
||||||
|
| REL-007 | 凭证边界告警看板(M-013~M-016) | `SRE` + `SEC` | 2026-03-27 | SEC-010, SEC-011, SEC-012 | 凭证边界指标分钟级可观测并支持阈值告警 | 看板截图 + 告警策略快照 |
|
||||||
|
|
||||||
|
## 4.4 Workstream D:专家审核与博弈
|
||||||
|
|
||||||
|
| 任务ID | 任务 | Owner | 截止日期 | 依赖 | 验收标准 | 证据产物 |
|
||||||
|
|---|---|---|---|---|---|---|
|
||||||
|
| EXP-001 | 确认专家名单与角色回避规则 | `ARCH` + `SEC` | 2026-03-18 | 无 | 专家名单冻结(含用户代表/测试专家/网关专家),独立性规则确认 | `review/experts_roster_2026-03-18.md` |
|
||||||
|
| EXP-002 | Round-1 架构与替换路径评审 | `ARCH` | 2026-03-19 | EXP-001 | 形成问题清单与决策建议 | `review/rounds/round1_architecture_review.md` |
|
||||||
|
| EXP-003 | Round-2 兼容与账务一致性评审 | `QA` + `FIN` | 2026-03-22 | COMP-002, COMP-003 | 兼容差异与账务风险可追踪 | `review/rounds/round2_compat_billing_review.md` |
|
||||||
|
| EXP-004 | Round-3 安全与合规攻防评审 | `SEC` | 2026-03-25 | SEC-002, SEC-003 | 安全/合规 P0 是否清零有明确结论 | `review/rounds/round3_security_compliance_review.md` |
|
||||||
|
| EXP-005 | Round-4 可靠性与回滚演练评审 | `SRE` | 2026-03-29 | REL-003, REL-004 | 演练满足 30 分钟恢复目标 | `review/rounds/round4_reliability_wargame_review.md` |
|
||||||
|
| EXP-006 | 专家最终决议(GO/CONDITIONAL GO/NO-GO) | `ARCH` + 管理层 | 2026-03-31 | EXP-002~005 | 决议与风险接受记录齐全 | `review/final_decision_2026-03-31.md` |
|
||||||
|
|
||||||
|
## 4.5 Workstream E:产品与项目治理闭环(新增)
|
||||||
|
|
||||||
|
| 任务ID | 任务 | Owner | 截止日期 | 依赖 | 验收标准 | 证据产物 |
|
||||||
|
|---|---|---|---|---|---|---|
|
||||||
|
| PROD-001 | 迁移异常客户沟通模板与分级机制 | `产品` + `CS` | 2026-03-24 | EXP-002 | 迁移异常 30 分钟内有标准对外沟通 | `docs/product/migration_incident_comms_v1.md` |
|
||||||
|
| PROD-002 | 账务争议 SLA 与补偿边界定义 | `产品` + `FIN` + `法务` | 2026-03-24 | EXP-003 | 客户争议处理时限与补偿规则可执行 | `docs/product/billing_dispute_sla_v1.md` |
|
||||||
|
| PMO-001 | 任务实名 RACI 与备份负责人落地 | `PMO` + `ARCH` | 2026-03-18 | 无 | 所有 P0/P1 任务均有 owner+backup | `reports/raci_snapshot_2026-03-18.md` |
|
||||||
|
|
||||||
|
## 4.6 Workstream F:三角色联合评审落地(用户/测试/网关)
|
||||||
|
|
||||||
|
| 任务ID | 任务 | Owner | 截止日期 | 依赖 | 验收标准 | 证据产物 |
|
||||||
|
|---|---|---|---|---|---|---|
|
||||||
|
| UXR-001 | 用户代表迁移旅程验收走查(含告警通知链路) | `产品` + `CS` + `用户代表` | 2026-03-25 | PROD-001 | 迁移异常场景 15 分钟内通知链路实测通过 | `reports/user_representative_migration_walkthrough_2026-03-25.md` |
|
||||||
|
| UXR-002 | 用户代表账务争议流程演练与反馈闭环 | `产品` + `FIN` + `用户代表` | 2026-03-25 | PROD-002 | 争议流程演练通过且用户侧反馈关闭 | `reports/user_billing_dispute_drill_2026-03-25.md` |
|
||||||
|
| TST-001 | 契约漂移检测接入 CI 阻断 | `QA` + `PLAT` | 2026-03-25 | COMP-005 | 漂移失败自动阻断发布 | `tests/compat/contract_drift_ci_report.md` |
|
||||||
|
| TST-002 | 流式+Failover 高压回归套件落地 | `QA` + `SRE` | 2026-03-27 | COMP-003, REL-002 | no-replay 与切换策略在高压下稳定通过 | `tests/compat/stream_failover_stress_report.md` |
|
||||||
|
| TST-003 | 升波证据包模板标准化 | `QA` + `SRE` | 2026-03-23 | COMP-009 | 每次升波均产出统一证据包 | `evidence/*/wave_gate_bundle.md` |
|
||||||
|
| TST-004 | 凭证边界回归测试(平台凭证入站/上游凭证不外发) | `QA` + `SEC` | 2026-03-27 | SEC-010, SEC-012 | 用例失败自动阻断发布 | `tests/security/credential_boundary_regression_report.md` |
|
||||||
|
| GAT-001 | Provider 能力矩阵与缺口清单 | `ARCH` + `PLAT` | 2026-03-22 | COMP-001 | 已接入供应商能力矩阵覆盖率 100% | `docs/gateway/provider_capability_matrix_v1.md` |
|
||||||
|
| GAT-002 | 三层降级策略与演练脚本 | `ARCH` + `SRE` | 2026-03-28 | REL-003 | 演练可在 30 分钟内止血恢复 | `docs/gateway/degrade_playbook_v1.md` |
|
||||||
|
| GAT-003 | Adapter SPI 版本兼容规范 | `ARCH` | 2026-03-26 | GAT-001 | 新增适配器均有 SPI 兼容校验 | `docs/gateway/adapter_spi_versioning_v1.md` |
|
||||||
|
| EXP-007 | 三角色联合复审(用户/测试/网关) | `ARCH` + `QA` + `产品` | 2026-03-27 | UXR-001, TST-001, GAT-001 | 形成联合复审结论并决定是否继续升波 | `docs/subapi_role_based_review_wargame_optimization_v1_2026-03-18.md` |
|
||||||
|
|
||||||
|
## 4.7 Workstream G:供应侧发布门禁链路(SUP,新增)
|
||||||
|
|
||||||
|
| 任务ID | 任务 | Owner | 截止日期 | 依赖 | 验收标准 | 证据产物 |
|
||||||
|
|---|---|---|---|---|---|---|
|
||||||
|
| SUP-001 | 供应侧按钮级 PRD 冻结(3 页面) | `产品` + `ARCH` | 2026-03-26 | 无 | 页面字段、按钮、状态机、错误码冻结 | `docs/supply_button_level_prd_v1_2026-03-25.md` |
|
||||||
|
| SUP-002 | 供应侧 OpenAPI 契约冻结(3 页面) | `PLAT` + `ARCH` | 2026-03-26 | SUP-001 | 请求/响应字段、枚举、错误码冻结 | `docs/supply_api_contract_openapi_draft_v1_2026-03-25.yaml` |
|
||||||
|
| SUP-003 | UI-SUP 可执行用例评审通过 | `QA` + `产品` | 2026-03-27 | SUP-001, SUP-002 | `UI-SUP-*` + `UI-DESIGN-QA-*` 全量可执行,覆盖按钮/状态/权限/可访问性 | `docs/supply_ui_test_cases_executable_v1_2026-03-25.md` + `tests/supply/ui_design_qa_cases_v1_2026-03-25.md` |
|
||||||
|
| SUP-004 | 账号挂载链路联调(验证/创建/激活/暂停) | `PLAT` + `QA` | 2026-03-28 | SUP-002, SUP-003 | `UI-SUP-ACC-001~006` 通过率 100% | `scripts/supply-gate/sup004_accounts.sh` + `tests/supply/ui_sup_acc_report_2026-03-28.md` |
|
||||||
|
| SUP-005 | 套餐发布链路联调(草稿/上架/暂停/下架/复制) | `PLAT` + `QA` | 2026-03-29 | SUP-002, SUP-003 | `UI-SUP-PKG-001~006` 通过率 100% | `scripts/supply-gate/sup005_packages.sh` + `tests/supply/ui_sup_pkg_report_2026-03-29.md` |
|
||||||
|
| SUP-006 | 结算提现链路联调(刷新/提现/撤销/导出) | `PLAT` + `FIN` + `QA` | 2026-03-29 | SUP-002, SUP-003 | `UI-SUP-SET-001~005` 通过率 100%,状态机无跳态 | `scripts/supply-gate/sup006_settlements.sh` + `tests/supply/ui_sup_set_report_2026-03-29.md` |
|
||||||
|
| SUP-007 | 供应侧凭证边界专项回归(SEC-SUP) | `SEC` + `QA` | 2026-03-30 | SUP-004, SUP-005, SUP-006 | `SEC-SUP-001~002` 通过,M-013~M-016 持续达标 | `scripts/supply-gate/sup007_boundary.sh` + `tests/supply/sec_sup_boundary_report_2026-03-30.md` |
|
||||||
|
| SUP-008 | 供应侧 Gate 汇总与发布结论 | `ARCH` + `QA` + `产品` | 2026-03-31 | SUP-004~SUP-007 | SUP Gate 结论为通过或有条件通过 | `reports/supply_gate_review_2026-03-31.md` |
|
||||||
|
|
||||||
|
## 4.8 Workstream H:四专家整改与复核链路(XR,新增)
|
||||||
|
|
||||||
|
| 任务ID | 任务 | Owner | 截止日期 | 依赖 | 验收标准 | 证据产物 |
|
||||||
|
|---|---|---|---|---|---|---|
|
||||||
|
| XR-001 | 供应侧技术设计增强落地(幂等/并发/不变量/事务) | `ARCH` + `PLAT` | 2026-03-26 | SUP-002 | 关键写路径均具备双键幂等和冲突语义 | `docs/supply_technical_design_enhanced_v1_2026-03-25.md` |
|
||||||
|
| XR-002 | 供应侧测试方案增强落地(追踪矩阵+并发重放) | `QA` + `ARCH` | 2026-03-27 | XR-001 | Requirement->API->Test->Metric->Gate 全量可追踪 | `docs/supply_test_plan_enhanced_v1_2026-03-25.md` + `reports/supply_traceability_matrix_2026-03-25.csv` + `reports/supply_flaky_budget_2026-03-25.md` |
|
||||||
|
| XR-003 | 供应侧 UI/UX 规范与设计验收清单落地 | `产品` + `UIUX` + `QA` | 2026-03-27 | SUP-003 | DQA P0=0,P1 通过率>=95% | `docs/supply_uiux_design_spec_v1_2026-03-25.md` |
|
||||||
|
| XR-004 | 业主 SLA/申诉/赔付条款并入门禁验收 | `产品` + `CS` + `FIN` | 2026-03-28 | XR-002, XR-003 | 条款可执行可测且签字确认 | `docs/product/owner_sla_dispute_compensation_rules_v1.md` |
|
||||||
|
| XR-005 | 四专家再次对齐复核并形成发布结论 | `ARCH` + `QA` + `产品` + `UIUX` | 2026-03-28 | XR-001~XR-004 | 复核结论明确(GO/CONDITIONAL GO/NO-GO) | `review/multi_expert_alignment_recheck_v1_2026-03-25.md` |
|
||||||
|
|
||||||
|
## 5. 验收门禁(每日/每周)
|
||||||
|
|
||||||
|
## 5.1 Daily Gate(每日 18:00)
|
||||||
|
|
||||||
|
1. 高危配置扫描是否全部通过。
|
||||||
|
2. 兼容 Gate 失败数是否为 0。
|
||||||
|
3. 账务冲突率是否 <= 0.01%。
|
||||||
|
4. `query_key_external_reject_rate_pct` 是否 = 100%(否则即 P0)。
|
||||||
|
5. `platform_credential_ingress_coverage_pct` 是否 = 100%(否则即 P0)。
|
||||||
|
6. `supplier_credential_exposure_events` 是否 = 0(非0即 P0)。
|
||||||
|
7. `direct_supplier_call_by_consumer_events` 是否 = 0(非0即 P0)。
|
||||||
|
8. `route_mark_coverage_pct` 是否 >= 99.9%(不足即禁止升波)。
|
||||||
|
9. 迁移异常是否在 15 分钟内完成用户通知(未达标即 P1)。
|
||||||
|
10. 契约漂移检测是否通过(未通过即阻断发布)。
|
||||||
|
11. 供应侧 UI Gate 是否全绿(`UI-SUP-ACC-* / UI-SUP-PKG-* / UI-SUP-SET-*`)。
|
||||||
|
12. 供应侧凭证边界专项(`SEC-SUP-*`)是否全绿(失败即 P0)。
|
||||||
|
13. 四专家整改链路(XR-001~XR-003)是否全绿(未完成即禁止进入 SUP-008 结论环节)。
|
||||||
|
|
||||||
|
## 5.2 Weekly Gate(2026-03-24 / 2026-03-31)
|
||||||
|
|
||||||
|
1. 是否满足 M2 / M5 里程碑验收条件。
|
||||||
|
2. 是否触发 P0 事件(触发则冻结升级)。
|
||||||
|
3. 凭证边界指标(M-013~M-016)是否连续 7 天达标。
|
||||||
|
4. 是否完成回滚演练并达到 30 分钟恢复目标。
|
||||||
|
5. 是否完成当周专家评审并关闭必须整改项。
|
||||||
|
6. 供应侧 Gate(SUP-004~SUP-008)是否完成并出具结论。
|
||||||
|
7. 四专家复核链路(XR-001~XR-005)是否完成并形成签署结论。
|
||||||
|
|
||||||
|
## 6. 风险与阻断规则
|
||||||
|
|
||||||
|
| 触发条件 | 等级 | 处理动作 |
|
||||||
|
|---|---|---|
|
||||||
|
| 账务错误、双流拼接、大面积协议失败 | P0 | 立即回滚,冻结发布,24h 内复盘 |
|
||||||
|
| 上游凭证泄露、需求方绕过平台直连供应方、平台凭证入站覆盖不足 | P0 | 立即回滚,冻结发布,启动安全应急并完成法务告警 |
|
||||||
|
| 供应侧结算状态跳态、提现资金对不平、按钮权限越权 | P0 | 冻结供应侧发布链路,执行资金核对与权限审计 |
|
||||||
|
| 兼容回归影响部分租户 | P1 | 暂停升波,48h 内修复并补测 |
|
||||||
|
| 非关键指标偏差 | P2 | 记录到 backlog,下迭代修复 |
|
||||||
|
|
||||||
|
## 7. 证据包目录规范(建议)
|
||||||
|
|
||||||
|
```text
|
||||||
|
立交桥/
|
||||||
|
evidence/
|
||||||
|
2026-03-31-risk-control/
|
||||||
|
gate-reports/
|
||||||
|
supply-gate/
|
||||||
|
security-scans/
|
||||||
|
rollback-drill/
|
||||||
|
dashboards/
|
||||||
|
review/
|
||||||
|
```
|
||||||
|
|
||||||
|
所有任务验收必须至少提供:
|
||||||
|
|
||||||
|
1. 原始执行日志。
|
||||||
|
2. 指标/截图证据。
|
||||||
|
3. 结论与责任人签字(电子审批记录)。
|
||||||
|
|
||||||
|
## 8. 启动会议议程(30 分钟模板)
|
||||||
|
|
||||||
|
1. 确认实名 RACI 与 on-call(按第2章映射执行)。
|
||||||
|
2. 确认 P0 红线(含凭证边界)及回滚授权链路。
|
||||||
|
3. 确认 M1-M5 日期不变。
|
||||||
|
4. 锁定每日站会与每周评审时间。
|
||||||
@@ -0,0 +1,140 @@
|
|||||||
|
# Subapi 三角色专业评审与博弈优化报告(用户代表/测试专家/网关专家,v1)
|
||||||
|
|
||||||
|
- 版本:v1.0
|
||||||
|
- 日期:2026-03-18
|
||||||
|
- 评审方式:专业 skills 驱动的线上联合评审(不进行线下评审)
|
||||||
|
- 角色范围:用户代表、测试专家、网关专家
|
||||||
|
- 关联文档:
|
||||||
|
- `subapi_design_comprehensive_review_findings_v1_2026-03-17.md`
|
||||||
|
- `subapi_expert_review_wargame_plan_v1_2026-03-17.md`
|
||||||
|
- `subapi_integration_risk_controls_execution_tasks_v1_2026-03-17.md`
|
||||||
|
- `router_core_takeover_execution_plan_v3_2026-03-17.md`
|
||||||
|
|
||||||
|
## 1. 结论先行
|
||||||
|
|
||||||
|
本轮三角色联合结论:`CONDITIONAL GO`。
|
||||||
|
|
||||||
|
触发条件:
|
||||||
|
|
||||||
|
1. 用户代表视角的迁移可接受性门槛必须上线(迁移成功率、回退时效、告知时效)。
|
||||||
|
2. 测试专家视角的阻断性质量门禁必须前置(契约漂移、流式回归、账务一致性)。
|
||||||
|
3. 网关专家视角的替换可逆性必须可验证(能力矩阵、降级策略、故障域隔离)。
|
||||||
|
|
||||||
|
若任一条件未满足,则默认进入 `NO-GO` 候选。
|
||||||
|
|
||||||
|
## 2. 角色化评审结果
|
||||||
|
|
||||||
|
## 2.1 用户代表评审(迁移可用性与商业可接受性)
|
||||||
|
|
||||||
|
### 关键风险
|
||||||
|
|
||||||
|
1. 迁移路径有技术灰度,但缺少“用户侧感知”的分层告知与承诺边界。
|
||||||
|
2. 缺少“兼容回归时用户可自助止血”的最小工具(例如一键切换备用入口提示)。
|
||||||
|
3. 账务争议处理存在流程草案,但未形成对外 SLA 承诺模板。
|
||||||
|
|
||||||
|
### Red vs Blue 博弈
|
||||||
|
|
||||||
|
1. Red:先做技术替换,用户沟通后补,会更快。
|
||||||
|
2. Blue:没有用户侧承诺,迁移中断会直接伤害续费与口碑。
|
||||||
|
3. 裁决:以“客户信任优先”为准,迁移计划必须绑定沟通与 SLA 机制。
|
||||||
|
|
||||||
|
### 角色结论
|
||||||
|
|
||||||
|
1. 等级:`P0`(商业阻断)
|
||||||
|
2. 通过条件:
|
||||||
|
- 迁移事件分级与通知模板上线。
|
||||||
|
- 用户可见状态页/告警消息与回退指引就绪。
|
||||||
|
- 账务争议 SLA 文档签署生效。
|
||||||
|
|
||||||
|
## 2.2 测试专家评审(质量门禁与回归可证据性)
|
||||||
|
|
||||||
|
### 关键风险
|
||||||
|
|
||||||
|
1. 已有验收用例,但“阻断发布”的自动化回归证据链还不完整。
|
||||||
|
2. 流式边界(no-replay)与 failover 组合场景尚缺高压故障注入报告。
|
||||||
|
3. 接管率统计和契约口径已在收敛,但仍需长期漂移监控机制。
|
||||||
|
|
||||||
|
### Red vs Blue 博弈
|
||||||
|
|
||||||
|
1. Red:核心链路手工回归即可,自动化先不做全量。
|
||||||
|
2. Blue:S2 阶段变更频率高,手工回归无法稳定阻断风险发布。
|
||||||
|
3. 裁决:核心链路必须“自动化阻断 + 手工抽检”双轨。
|
||||||
|
|
||||||
|
### 角色结论
|
||||||
|
|
||||||
|
1. 等级:`P0`(质量阻断)
|
||||||
|
2. 通过条件:
|
||||||
|
- 契约漂移检测任务接入 CI,失败即阻断。
|
||||||
|
- 流式/Failover/账务一致性场景有固定回归套件。
|
||||||
|
- 每轮升波必须附带可复核证据包(日志、指标、报告)。
|
||||||
|
|
||||||
|
## 2.3 网关专家评审(架构可替换性与运行风险)
|
||||||
|
|
||||||
|
### 关键风险
|
||||||
|
|
||||||
|
1. 现有方案明确了接管目标,但仍需防止适配层能力分散导致替换困难。
|
||||||
|
2. Provider 差异快速增加时,若没有能力矩阵和降级策略,故障半径会变大。
|
||||||
|
3. 缺少针对“适配器接口稳定性”的长期治理规范(版本与兼容边界)。
|
||||||
|
|
||||||
|
### Red vs Blue 博弈
|
||||||
|
|
||||||
|
1. Red:优先快速接入更多供应商,治理后置。
|
||||||
|
2. Blue:没有能力分层和降级策略,规模越大越难收敛风险。
|
||||||
|
3. 裁决:先固化能力矩阵与降级模型,再扩接新供应商。
|
||||||
|
|
||||||
|
### 角色结论
|
||||||
|
|
||||||
|
1. 等级:`P1`(高风险)
|
||||||
|
2. 通过条件:
|
||||||
|
- 发布统一的 Provider 能力矩阵(鉴权、流式、工具调用、账务字段)。
|
||||||
|
- 定义三层降级策略(同平台换号、同区域换平台、全局降级)。
|
||||||
|
- 明确 Adapter SPI 的兼容版本规则。
|
||||||
|
|
||||||
|
## 3. 联合裁决与规划优化
|
||||||
|
|
||||||
|
## 3.1 规划优化项(新增)
|
||||||
|
|
||||||
|
1. 新增“迁移可接受性门槛”:
|
||||||
|
- `migration_success_rate >= 99%`(试点租户)
|
||||||
|
- `migration_incident_notify_sla <= 15 分钟`
|
||||||
|
- `rollback_effective_time <= 30 分钟`
|
||||||
|
2. 新增“测试阻断门槛”:
|
||||||
|
- 契约漂移检测必须通过
|
||||||
|
- 流式 no-replay 回归必须通过
|
||||||
|
- 账务一致性抽样必须通过
|
||||||
|
3. 新增“网关治理门槛”:
|
||||||
|
- Provider 能力矩阵覆盖率 `=100%`(已接入供应商)
|
||||||
|
- 降级策略演练通过(每周至少一次)
|
||||||
|
- Adapter SPI 版本规则可追踪
|
||||||
|
|
||||||
|
## 3.2 执行策略优化(博弈后定稿)
|
||||||
|
|
||||||
|
1. 迁移节奏从“纯流量百分比驱动”升级为“流量 + 用户体验双门槛”驱动。
|
||||||
|
2. 质量策略从“验收测试清单”升级为“CI 阻断 + 演练证据包”双重门禁。
|
||||||
|
3. 网关扩容策略从“先接入再治理”改为“先矩阵治理再扩容”。
|
||||||
|
|
||||||
|
## 4. 任务映射(新增)
|
||||||
|
|
||||||
|
| 新任务ID | 来源角色 | 任务 | 截止日期 | 验收标准 | 证据产物 |
|
||||||
|
|---|---|---|---|---|---|
|
||||||
|
| UXR-001 | 用户代表 | 迁移旅程验收走查(含通知链路) | 2026-03-22 | 迁移异常 15 分钟通知链路实测通过 | `reports/user_representative_migration_walkthrough_2026-03-22.md` |
|
||||||
|
| UXR-002 | 用户代表 | 账务争议流程演练与反馈闭环 | 2026-03-25 | 争议演练通过且用户反馈关闭 | `reports/user_billing_dispute_drill_2026-03-25.md` |
|
||||||
|
| TST-001 | 测试专家 | 契约漂移检测接入 CI 阻断 | 2026-03-22 | 漂移失败即阻断发布 | `tests/compat/contract_drift_ci_report.md` |
|
||||||
|
| TST-002 | 测试专家 | 流式+Failover 高压回归套件 | 2026-03-24 | no-replay 与切换策略稳定通过 | `tests/compat/stream_failover_stress_report.md` |
|
||||||
|
| TST-003 | 测试专家 | 升波证据包标准化 | 2026-03-23 | 每次升波有统一证据目录 | `evidence/*/wave_gate_bundle.md` |
|
||||||
|
| GAT-001 | 网关专家 | Provider 能力矩阵与缺口清单 | 2026-03-22 | 已接入供应商覆盖率 100% | `docs/gateway/provider_capability_matrix_v1.md` |
|
||||||
|
| GAT-002 | 网关专家 | 三层降级策略与演练脚本 | 2026-03-25 | 演练可在 30 分钟内止血 | `docs/gateway/degrade_playbook_v1.md` |
|
||||||
|
| GAT-003 | 网关专家 | Adapter SPI 版本兼容规范 | 2026-03-26 | 新增适配器必须通过 SPI 兼容校验 | `docs/gateway/adapter_spi_versioning_v1.md` |
|
||||||
|
| EXP-007 | 联合评审 | 三角色联合复审与放行结论 | 2026-03-27 | 形成 GO/CONDITIONAL GO/NO-GO | `review/rounds/round2_compat_billing_review.md` |
|
||||||
|
|
||||||
|
## 5. 决策建议(提交管理层)
|
||||||
|
|
||||||
|
1. 批准三角色新增任务并纳入 `Daily Gate` 与 `Weekly Gate`。
|
||||||
|
2. 将 `EXP-007` 设为 `EXP-006` 最终决议前置条件。
|
||||||
|
3. 将用户代表意见纳入风险接受记录,避免纯技术维度放行。
|
||||||
|
|
||||||
|
## 6. 本轮产出清单
|
||||||
|
|
||||||
|
1. 三角色评审与博弈优化报告(本文档)。
|
||||||
|
2. 任务映射建议(UXR/TST/GAT/EXP-007)。
|
||||||
|
3. 规划与执行文档的回链更新(由对应文档维护)。
|
||||||
1006
docs/supply_api_contract_openapi_draft_v1_2026-03-25.yaml
Normal file
1006
docs/supply_api_contract_openapi_draft_v1_2026-03-25.yaml
Normal file
File diff suppressed because it is too large
Load Diff
241
docs/supply_button_level_prd_v1_2026-03-25.md
Normal file
241
docs/supply_button_level_prd_v1_2026-03-25.md
Normal file
@@ -0,0 +1,241 @@
|
|||||||
|
# 供应侧按钮级 PRD 分解(首批 3 个核心页面)
|
||||||
|
|
||||||
|
- 版本:v1.0(草案)
|
||||||
|
- 日期:2026-03-25
|
||||||
|
- 适用范围:供应侧 S0/S1 首批上线页面
|
||||||
|
- 关联 SSOT:
|
||||||
|
- `llm_gateway_subapi_evolution_plan_v4_2_2026-03-24.md`
|
||||||
|
- `acceptance_gate_single_source_v1_2026-03-18.md`
|
||||||
|
- `supply_side_product_design_v1_2026-03-18.md`
|
||||||
|
- `supply_detailed_design_v1_2026-03-18.md`
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 1. 目标与范围
|
||||||
|
|
||||||
|
本稿用于把供应侧功能从“流程级”下钻到“按钮级”,用于:
|
||||||
|
|
||||||
|
1. 前端实现不歧义。
|
||||||
|
2. 后端接口可映射。
|
||||||
|
3. QA 可直接编写用例。
|
||||||
|
4. 审计与埋点可落地。
|
||||||
|
|
||||||
|
首批覆盖页面:
|
||||||
|
|
||||||
|
1. `SUP-PAGE-001` 供应账号挂载页。
|
||||||
|
2. `SUP-PAGE-002` 套餐发布与上下架页。
|
||||||
|
3. `SUP-PAGE-003` 收益结算与提现页。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 2. 全局硬约束(所有页面生效)
|
||||||
|
|
||||||
|
1. 用户A(供应方)仅向平台提交上游凭证;用户B不可见、不可得。
|
||||||
|
2. 页面、接口、导出、错误文案均不得返回可复用上游凭证片段。
|
||||||
|
3. 凭证相关动作必须有审计事件,且支持按 `request_id/operator_id` 追踪。
|
||||||
|
4. 任何违反凭证边界的行为按 P0 处理,阻断发布(M-013~M-016)。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 3. 页面一:供应账号挂载(SUP-PAGE-001)
|
||||||
|
|
||||||
|
## 3.1 页面目标
|
||||||
|
|
||||||
|
供应方完成账号挂载、验证、激活/停用,确保只在平台托管上游凭证。
|
||||||
|
|
||||||
|
## 3.2 字段规格
|
||||||
|
|
||||||
|
| 字段ID | 字段名 | 类型 | 必填 | 校验规则 | 脱敏规则 |
|
||||||
|
|---|---|---|---|---|---|
|
||||||
|
| F-001 | `provider` | 下拉 | 是 | 枚举:openai/anthropic/gemini/baidu/xfyun/tencent | 不脱敏 |
|
||||||
|
| F-002 | `account_type` | 单选 | 是 | 枚举:api_key/oauth | 不脱敏 |
|
||||||
|
| F-003 | `credential_input` | 密文输入框 | 是 | 长度、前缀、字符集校验 | UI 全程掩码,后端不回显 |
|
||||||
|
| F-004 | `account_alias` | 文本 | 否 | 1-100 字符,禁止敏感词 | 不脱敏 |
|
||||||
|
| F-005 | `min_quota_threshold` | 数值 | 否 | >=0,最多 6 位小数 | 不脱敏 |
|
||||||
|
| F-006 | `risk_ack` | 勾选框 | 是 | 必须勾选协议确认 | 不脱敏 |
|
||||||
|
|
||||||
|
## 3.3 按钮级规格
|
||||||
|
|
||||||
|
| 按钮ID | 按钮文案 | 可见条件 | 可点击条件 | 触发动作 | 成功态 | 失败态 | 审计事件 | 测试用例ID |
|
||||||
|
|---|---|---|---|---|---|---|---|---|
|
||||||
|
| BTN-ACC-001 | 立即验证 | 已选择 `provider` 且输入凭证 | F-001/F-002/F-003 合法,未处于提交中 | `POST /api/v1/supply/accounts/verify` | 展示验证通过与额度摘要 | 展示错误码与修复建议 | `supply.account.verify` | `UI-SUP-ACC-001` |
|
||||||
|
| BTN-ACC-002 | 提交挂载 | 验证通过后显示 | 风险确认已勾选 | `POST /api/v1/supply/accounts` | 新建记录 `pending/active` | 留在当前页,字段高亮报错 | `supply.account.create` | `UI-SUP-ACC-002` |
|
||||||
|
| BTN-ACC-003 | 激活账号 | 账号状态为 `pending/suspended` | 当前用户拥有供应方权限 | `POST /api/v1/supply/accounts/{id}/activate` | 状态变更 `active` | 状态不变并提示原因 | `supply.account.activate` | `UI-SUP-ACC-003` |
|
||||||
|
| BTN-ACC-004 | 暂停账号 | 账号状态为 `active` | 账号无未结清风险单 | `POST /api/v1/supply/accounts/{id}/suspend` | 状态变更 `suspended` | 状态不变并提示依赖项 | `supply.account.suspend` | `UI-SUP-ACC-004` |
|
||||||
|
| BTN-ACC-005 | 删除账号 | 账号状态非 `active` | 无关联在售套餐 | `DELETE /api/v1/supply/accounts/{id}` | 列表移除 | 删除失败并提示阻塞原因 | `supply.account.delete` | `UI-SUP-ACC-005` |
|
||||||
|
| BTN-ACC-006 | 查看审计 | 用户有审计权限 | 总是可点 | `GET /api/v1/supply/accounts/{id}/audit-logs` | 打开侧边栏 | 提示“暂无审计数据”或加载失败 | `supply.account.audit.view` | `UI-SUP-ACC-006` |
|
||||||
|
|
||||||
|
## 3.4 状态机
|
||||||
|
|
||||||
|
`pending -> active -> suspended -> disabled`
|
||||||
|
|
||||||
|
约束:
|
||||||
|
|
||||||
|
1. `active` 状态不可直接删除。
|
||||||
|
2. `disabled` 为平台风控态,仅管理员可恢复。
|
||||||
|
|
||||||
|
## 3.5 错误码映射(页面级)
|
||||||
|
|
||||||
|
| 错误码 | 文案 | 前端动作 |
|
||||||
|
|---|---|---|
|
||||||
|
| `SUP_ACC_4001` | 凭证格式非法 | 高亮 `credential_input` |
|
||||||
|
| `SUP_ACC_4002` | 上游连通性校验失败 | 展示“重试验证”入口 |
|
||||||
|
| `SUP_ACC_4003` | ToS 不允许该账号进入供给池 | 阻断提交并展示合规说明 |
|
||||||
|
| `SUP_ACC_5001` | 系统繁忙,请稍后再试 | 保留输入并允许重试 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 4. 页面二:套餐发布与上下架(SUP-PAGE-002)
|
||||||
|
|
||||||
|
## 4.1 页面目标
|
||||||
|
|
||||||
|
供应方发布可售套餐,执行上架/下架/暂停/恢复,受价格与风控规则约束。
|
||||||
|
|
||||||
|
## 4.2 字段规格
|
||||||
|
|
||||||
|
| 字段ID | 字段名 | 类型 | 必填 | 校验规则 | 备注 |
|
||||||
|
|---|---|---|---|---|---|
|
||||||
|
| F-101 | `supply_account_id` | 下拉 | 是 | 必须为 `active` 账号 | 来源 SUP-PAGE-001 |
|
||||||
|
| F-102 | `model` | 下拉 | 是 | 模型白名单内 | 按供应商过滤 |
|
||||||
|
| F-103 | `total_quota` | 数值 | 是 | >0 且 <= 账户可用额度 | 单位与主账务一致 |
|
||||||
|
| F-104 | `price_per_1m_input` | 数值 | 是 | >= 平台最低保护价 | 6 位小数 |
|
||||||
|
| F-105 | `price_per_1m_output` | 数值 | 是 | >= 平台最低保护价 | 6 位小数 |
|
||||||
|
| F-106 | `valid_days` | 数值 | 是 | 1-365 | 过期自动下架 |
|
||||||
|
| F-107 | `max_concurrent` | 数值 | 否 | 1-1000 | 默认 10 |
|
||||||
|
| F-108 | `rate_limit_rpm` | 数值 | 否 | 1-100000 | 默认 60 |
|
||||||
|
|
||||||
|
## 4.3 按钮级规格
|
||||||
|
|
||||||
|
| 按钮ID | 按钮文案 | 可见条件 | 可点击条件 | 触发动作 | 成功态 | 失败态 | 审计事件 | 测试用例ID |
|
||||||
|
|---|---|---|---|---|---|---|---|---|
|
||||||
|
| BTN-PKG-001 | 保存草稿 | 已进入编辑页 | 基础字段合法 | `POST /api/v1/supply/packages/draft` | 状态 `draft` 持久化 | 保留页面并提示字段错误 | `supply.package.draft.save` | `UI-SUP-PKG-001` |
|
||||||
|
| BTN-PKG-002 | 发布上架 | 套餐为 `draft/paused` | 价格、额度、有效期全部通过 | `POST /api/v1/supply/packages/{id}/publish` | 状态变更 `active` | 阻断并展示具体不满足项 | `supply.package.publish` | `UI-SUP-PKG-002` |
|
||||||
|
| BTN-PKG-003 | 暂停售卖 | 套餐为 `active` | 无平台强制冻结 | `POST /api/v1/supply/packages/{id}/pause` | 状态变更 `paused` | 状态不变并提示原因 | `supply.package.pause` | `UI-SUP-PKG-003` |
|
||||||
|
| BTN-PKG-004 | 立即下架 | 套餐为 `active/paused` | 无未完成的结算锁 | `POST /api/v1/supply/packages/{id}/unlist` | 状态变更 `expired` 或 `paused` | 状态不变并提示阻塞订单 | `supply.package.unlist` | `UI-SUP-PKG-004` |
|
||||||
|
| BTN-PKG-005 | 批量调价 | 列表页多选后显示 | 所选套餐均可编辑 | `POST /api/v1/supply/packages/batch-price` | 批量更新成功数回显 | 部分失败返回明细 | `supply.package.price.batch_update` | `UI-SUP-PKG-005` |
|
||||||
|
| BTN-PKG-006 | 复制套餐 | 任意套餐行可见 | 原套餐存在 | `POST /api/v1/supply/packages/{id}/clone` | 新增草稿套餐 | 复制失败提示 | `supply.package.clone` | `UI-SUP-PKG-006` |
|
||||||
|
|
||||||
|
## 4.4 状态机
|
||||||
|
|
||||||
|
`draft -> active -> paused -> sold_out -> expired`
|
||||||
|
|
||||||
|
约束:
|
||||||
|
|
||||||
|
1. `sold_out` 仅系统自动迁移,不可人工强制设置。
|
||||||
|
2. `expired` 后仅允许“复制套餐”,不允许直接恢复。
|
||||||
|
|
||||||
|
## 4.5 错误码映射(页面级)
|
||||||
|
|
||||||
|
| 错误码 | 文案 | 前端动作 |
|
||||||
|
|---|---|---|
|
||||||
|
| `SUP_PKG_4001` | 售价低于保护价 | 锁定发布按钮并高亮价格字段 |
|
||||||
|
| `SUP_PKG_4002` | 可用额度不足 | 高亮额度字段并提示可用值 |
|
||||||
|
| `SUP_PKG_4003` | 账号状态不可发布套餐 | 跳转账号页处理 |
|
||||||
|
| `SUP_PKG_4091` | 套餐状态冲突,请刷新后重试 | 强制刷新当前行 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 5. 页面三:收益结算与提现(SUP-PAGE-003)
|
||||||
|
|
||||||
|
## 5.1 页面目标
|
||||||
|
|
||||||
|
供应方查看收益、发起提现、追踪结算状态,形成可审计资金链路。
|
||||||
|
|
||||||
|
## 5.2 字段规格
|
||||||
|
|
||||||
|
| 字段ID | 字段名 | 类型 | 必填 | 校验规则 | 备注 |
|
||||||
|
|---|---|---|---|---|---|
|
||||||
|
| F-201 | `available_amount` | 只读金额 | 是 | >=0 | 后端计算 |
|
||||||
|
| F-202 | `withdraw_amount` | 数值输入 | 是 | >0 且 <= `available_amount` | 2 位小数 |
|
||||||
|
| F-203 | `payment_method` | 单选 | 是 | 枚举:bank/alipay/wechat | 与账户一致 |
|
||||||
|
| F-204 | `payment_account` | 文本 | 是 | 按通道校验账号格式 | 敏感信息掩码展示 |
|
||||||
|
| F-205 | `sms_code` | 验证码 | 是 | 时效 5 分钟 | 提交后失效 |
|
||||||
|
|
||||||
|
## 5.3 按钮级规格
|
||||||
|
|
||||||
|
| 按钮ID | 按钮文案 | 可见条件 | 可点击条件 | 触发动作 | 成功态 | 失败态 | 审计事件 | 测试用例ID |
|
||||||
|
|---|---|---|---|---|---|---|---|---|
|
||||||
|
| BTN-SET-001 | 刷新收益 | 页面可见 | 总是可点 | `GET /api/v1/supplier/billing` | 卡片与趋势图更新 | 提示“刷新失败,请稍后重试” | `supply.settlement.refresh` | `UI-SUP-SET-001` |
|
||||||
|
| BTN-SET-002 | 发起提现 | 有可提现金额 | 金额、账户、验证码合法 | `POST /api/v1/supply/settlements/withdraw` | 生成结算单 `pending` | 错误提示并保留输入 | `supply.settlement.withdraw.create` | `UI-SUP-SET-002` |
|
||||||
|
| BTN-SET-003 | 撤销申请 | 结算单状态为 `pending` | 申请属于本人 | `POST /api/v1/supply/settlements/{id}/cancel` | 状态变更 `failed/cancelled` | 提示不可撤销原因 | `supply.settlement.withdraw.cancel` | `UI-SUP-SET-003` |
|
||||||
|
| BTN-SET-004 | 下载对账单 | 有结算记录 | 总是可点 | `GET /api/v1/supply/settlements/{id}/statement` | 下载文件成功 | 弹窗提示失败原因 | `supply.settlement.statement.export` | `UI-SUP-SET-004` |
|
||||||
|
| BTN-SET-005 | 查看流水明细 | 页面可见 | 总是可点 | `GET /api/v1/supply/earnings/records` | 打开明细抽屉 | 提示无数据/加载失败 | `supply.earnings.records.view` | `UI-SUP-SET-005` |
|
||||||
|
|
||||||
|
## 5.4 状态机
|
||||||
|
|
||||||
|
`pending -> processing -> completed/failed`
|
||||||
|
|
||||||
|
约束:
|
||||||
|
|
||||||
|
1. `completed` 状态不可撤销。
|
||||||
|
2. `processing` 状态禁止重复提交提现申请。
|
||||||
|
|
||||||
|
## 5.5 错误码映射(页面级)
|
||||||
|
|
||||||
|
| 错误码 | 文案 | 前端动作 |
|
||||||
|
|---|---|---|
|
||||||
|
| `SUP_SET_4001` | 可提现余额不足 | 高亮金额并提示可提现额度 |
|
||||||
|
| `SUP_SET_4002` | 收款账户校验失败 | 高亮账户字段 |
|
||||||
|
| `SUP_SET_4003` | 验证码失效或错误 | 清空验证码并可重新获取 |
|
||||||
|
| `SUP_SET_4091` | 当前有处理中提现单 | 禁用“发起提现”按钮 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 6. 页面级埋点与审计最小集
|
||||||
|
|
||||||
|
## 6.1 埋点事件(分析)
|
||||||
|
|
||||||
|
1. `sup_page_view`:页面访问。
|
||||||
|
2. `sup_button_click`:按钮点击(含按钮 ID)。
|
||||||
|
3. `sup_submit_success`:关键提交成功。
|
||||||
|
4. `sup_submit_fail`:关键提交失败(含错误码)。
|
||||||
|
|
||||||
|
## 6.2 审计事件(合规)
|
||||||
|
|
||||||
|
1. 账号相关:创建、激活、暂停、删除、查看审计日志。
|
||||||
|
2. 套餐相关:发布、调价、暂停、下架、复制。
|
||||||
|
3. 结算相关:提现发起、提现撤销、对账单导出。
|
||||||
|
|
||||||
|
审计最小字段:
|
||||||
|
|
||||||
|
1. `event_id`
|
||||||
|
2. `operator_id`
|
||||||
|
3. `tenant_id`
|
||||||
|
4. `object_type`
|
||||||
|
5. `object_id`
|
||||||
|
6. `before_state`
|
||||||
|
7. `after_state`
|
||||||
|
8. `request_id`
|
||||||
|
9. `created_at`
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 7. 与凭证边界门禁映射
|
||||||
|
|
||||||
|
| 约束 | 页面控制点 | 对应 Gate/指标 |
|
||||||
|
|---|---|---|
|
||||||
|
| 供应方上游凭证不外发 | SUP-PAGE-001 的 `credential_input` 只入不出;所有页面禁止回显原文 | M-013 |
|
||||||
|
| 需求方仅平台凭证入站 | 页面与接口文案统一“平台凭证”,不提供上游凭证下载入口 | M-014 |
|
||||||
|
| 禁止需求方绕过平台直连上游 | 无任何“上游直连参数配置”入口;异常由安全策略阻断 | M-015 |
|
||||||
|
| 外部 query key 全拒绝 | 页面帮助文档和 SDK 示例仅给 header 鉴权方式 | M-016 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 8. 测试用例骨架(新增)
|
||||||
|
|
||||||
|
| 用例ID | 页面 | 关注点 | 期望 |
|
||||||
|
|---|---|---|---|
|
||||||
|
| UI-SUP-ACC-001~006 | SUP-PAGE-001 | 按钮可见性、禁用态、状态迁移、审计 | 与按钮规格一致 |
|
||||||
|
| UI-SUP-PKG-001~006 | SUP-PAGE-002 | 发布前校验、状态机、批量操作 | 不越权、不跳态 |
|
||||||
|
| UI-SUP-SET-001~005 | SUP-PAGE-003 | 提现流程、并发防重、撤销边界 | 资金状态一致 |
|
||||||
|
| SEC-SUP-001 | 全局 | 错误体/导出脱敏 | 不出现可复用上游凭证 |
|
||||||
|
| SEC-SUP-002 | 全局 | 凭证边界回归(与 CB-001~CB-004 对齐) | M-013~M-016 达标 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 9. 待拍板项(进入 PRD v1.0 前必须确认)
|
||||||
|
|
||||||
|
1. `POST /api/v1/supply/*` 系列接口是否按本稿命名冻结。
|
||||||
|
2. 提现金额风控阈值(单笔/单日)与冷却期。
|
||||||
|
3. 套餐“下架”与“暂停”的财务影响口径是否一致。
|
||||||
|
4. 供应方是否允许批量导入账号(当前建议 S1 后)。
|
||||||
644
docs/supply_detailed_design_v1_2026-03-18.md
Normal file
644
docs/supply_detailed_design_v1_2026-03-18.md
Normal file
@@ -0,0 +1,644 @@
|
|||||||
|
# 用户供应LLM功能 - 完整详细设计(补充版)
|
||||||
|
|
||||||
|
> 本文档对"用户分享LLM供应"功能进行完整详细的设计补充,包括安全机制、账单记录等完整内容。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 1. 功能架构
|
||||||
|
|
||||||
|
### 1.1 整体架构
|
||||||
|
|
||||||
|
```
|
||||||
|
┌─────────────────────────────────────────────────────────────────────────────────┐
|
||||||
|
│ 供应侧功能完整架构 │
|
||||||
|
├─────────────────────────────────────────────────────────────────────────────────┤
|
||||||
|
│ │
|
||||||
|
│ ┌─────────────────────────────────────────────────────────────────────────┐ │
|
||||||
|
│ │ 供应方侧(User A) │ │
|
||||||
|
│ │ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │ │
|
||||||
|
│ │ │ 账号挂载 │ │ 套餐发布 │ │ 收益查看 │ │ 提现 │ │ │
|
||||||
|
│ │ │ 模块 │ │ 模块 │ │ 模块 │ │ 模块 │ │ │
|
||||||
|
│ │ └─────────────┘ └─────────────┘ └─────────────┘ └─────────────┘ │ │
|
||||||
|
│ └─────────────────────────────────────────────────────────────────────────┘ │
|
||||||
|
│ │ │
|
||||||
|
│ ▼ │
|
||||||
|
│ ┌─────────────────────────────────────────────────────────────────────────┐ │
|
||||||
|
│ │ 平台核心层 │ │
|
||||||
|
│ │ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │ │
|
||||||
|
│ │ │ 账号验证 │ │ 套餐管理 │ │ 调度引擎 │ │ 计费引擎 │ │ │
|
||||||
|
│ │ │ 服务 │ │ 服务 │ │ │ │ │ │ │
|
||||||
|
│ │ └─────────────┘ └─────────────┘ └─────────────┘ └─────────────┘ │ │
|
||||||
|
│ │ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │ │
|
||||||
|
│ │ │ 风控服务 │ │ 合规检测 │ │ 通知服务 │ │ 审计服务 │ │ │
|
||||||
|
│ │ │ │ │ 服务 │ │ │ │ │ │ │
|
||||||
|
│ │ └─────────────┘ └─────────────┘ └─────────────┘ └─────────────┘ │ │
|
||||||
|
│ └─────────────────────────────────────────────────────────────────────────┘ │
|
||||||
|
│ │ │
|
||||||
|
│ ▼ │
|
||||||
|
│ ┌─────────────────────────────────────────────────────────────────────────┐ │
|
||||||
|
│ │ 需求方侧(User B) │ │
|
||||||
|
│ │ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │ │
|
||||||
|
│ │ │ 套餐选购 │ │ 调用API │ │ 使用账单 │ │ 消耗统计 │ │ │
|
||||||
|
│ │ │ 模块 │ │ 模块 │ │ 模块 │ │ 模块 │ │ │
|
||||||
|
│ │ └─────────────┘ └─────────────┘ └─────────────┘ └─────────────┘ │ │
|
||||||
|
│ └─────────────────────────────────────────────────────────────────────────┘ │
|
||||||
|
│ │
|
||||||
|
└─────────────────────────────────────────────────────────────────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 2. 安全机制详细设计
|
||||||
|
|
||||||
|
### 2.1 账号安全
|
||||||
|
|
||||||
|
#### 2.1.1 账号挂载安全流程
|
||||||
|
|
||||||
|
```
|
||||||
|
┌─────────────────────────────────────────────────────────────────────────────┐
|
||||||
|
│ 账号挂载安全验证流程 │
|
||||||
|
└─────────────────────────────────────────────────────────────────────────────┘
|
||||||
|
|
||||||
|
步骤1: 用户提交账号
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
步骤2: 账号格式校验
|
||||||
|
│ ├── API Key 格式检查
|
||||||
|
│ ├── OAuth 授权链接生成
|
||||||
|
│ └── 格式不合格 → 拒绝
|
||||||
|
▼
|
||||||
|
步骤3: 账号有效性验证
|
||||||
|
│ ├── 调用供应商 API 验证账号可用
|
||||||
|
│ ├── 获取账号基本信息
|
||||||
|
│ └── 验证失败 → 拒绝
|
||||||
|
▼
|
||||||
|
步骤4: 额度查询
|
||||||
|
│ ├── 获取当前剩余额度
|
||||||
|
│ ├── 记录额度快照
|
||||||
|
│ └── 额度不足 → 警告
|
||||||
|
▼
|
||||||
|
步骤5: ToS 合规检查
|
||||||
|
│ ├── 检查供应商 ToS 是否允许共享
|
||||||
|
│ ├── 检查账号类型是否合规
|
||||||
|
│ └── 不合规 → 拒绝
|
||||||
|
▼
|
||||||
|
步骤6: 风险评估
|
||||||
|
│ ├── 账号历史行为分析
|
||||||
|
│ ├── 异常检测
|
||||||
|
│ └── 高风险 → 人工审核
|
||||||
|
▼
|
||||||
|
步骤7: 加密存储
|
||||||
|
│ ├── 使用 KMS 加密
|
||||||
|
│ ├── 生成账号唯一标识
|
||||||
|
│ └── 存储到数据库
|
||||||
|
▼
|
||||||
|
步骤8: 通知用户
|
||||||
|
├── 挂载成功/失败通知
|
||||||
|
└── 后续操作指引
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 2.1.2 账号存储安全
|
||||||
|
|
||||||
|
| 安全措施 | 说明 | 实现 |
|
||||||
|
|----------|------|------|
|
||||||
|
| **加密存储** | API Key 必须加密存储 | AES-256-GCM 加密 |
|
||||||
|
| **KMS 集成** | 使用密钥管理服务 | AWS KMS / 自建 |
|
||||||
|
| **字段级加密** | 敏感字段单独加密 | 密钥ID + 密文 |
|
||||||
|
| **访问控制** | 最小权限原则 | RBAC 控制 |
|
||||||
|
| **审计日志** | 所有访问记录日志 | 操作人 + 时间 + IP |
|
||||||
|
| **禁止导出** | 禁止明文导出 Key | 脱敏展示 |
|
||||||
|
|
||||||
|
#### 2.1.3 凭证边界强制约束
|
||||||
|
|
||||||
|
1. 供应方上游凭证仅由平台密态托管(KMS + 字段级加密)。
|
||||||
|
2. 需求方只允许使用平台签发凭证调用统一入口。
|
||||||
|
3. 平台禁止向需求方返回供应方上游凭证(API/控制台/导出均不允许)。
|
||||||
|
|
||||||
|
### 2.2 调用安全
|
||||||
|
|
||||||
|
#### 2.2.1 请求验证流程
|
||||||
|
|
||||||
|
```
|
||||||
|
请求进入
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
┌─────────────────┐
|
||||||
|
│ 1. API Key 验证 │ ──▶ 无效 → 401 Unauthorized
|
||||||
|
└────────┬────────┘
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
┌─────────────────┐
|
||||||
|
│ 2. 套餐有效性 │ ──▶ 过期/售罄 → 403 Forbidden
|
||||||
|
└────────┬────────┘
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
┌─────────────────┐
|
||||||
|
│ 3. 额度检查 │ ──▶ 额度不足 → 402 Payment Required
|
||||||
|
└────────┬────────┘
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
┌─────────────────┐
|
||||||
|
│ 4. 风控检查 │ ──▶ 风险拦截 → 429 Too Many Requests
|
||||||
|
└────────┬────────┘
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
┌─────────────────┐
|
||||||
|
│ 5. ToS 合规 │ ──▶ 违规拦截 → 403 Forbidden
|
||||||
|
└────────┬────────┘
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
转发到上游
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 2.2.2 安全防护措施
|
||||||
|
|
||||||
|
| 防护项 | 说明 | 配置 |
|
||||||
|
|--------|------|------|
|
||||||
|
| **IP 白名单** | 限制调用来源 IP | 可配置 |
|
||||||
|
| **Referer 校验** | 限制调用来源域名 | 可配置 |
|
||||||
|
| **调用频率限制** | RPM/TPM 限制 | 按套餐配置 |
|
||||||
|
| **并发限制** | 同时请求数限制 | 按套餐配置 |
|
||||||
|
| **模型限制** | 可用模型白名单 | 按套餐配置 |
|
||||||
|
| **额度预警** | 额度低于阈值告警 | 可配置 |
|
||||||
|
|
||||||
|
### 2.3 防欺诈机制
|
||||||
|
|
||||||
|
#### 2.3.1 欺诈检测规则
|
||||||
|
|
||||||
|
| 规则 | 描述 | 动作 |
|
||||||
|
|------|------|------|
|
||||||
|
| **额度异常消耗** | 单日消耗 > 平均3倍 | 告警 + 审核 |
|
||||||
|
| **短时间大量调用** | 1分钟内 > 100次 | 限流 + 审核 |
|
||||||
|
| **新账号高额使用** | 注册24h内使用 > $100 | 审核 |
|
||||||
|
| **跨地区调用** | IP 地区突然变化 | 告警 |
|
||||||
|
| **模式异常** | 调用模式偏离历史 | 告警 |
|
||||||
|
| **账号共享检测** | 多 IP 同时使用 | 封禁 + 审核 |
|
||||||
|
|
||||||
|
#### 2.3.2 保证金机制
|
||||||
|
|
||||||
|
| 供应方类型 | 保证金要求 | 退还条件 |
|
||||||
|
|------------|------------|----------|
|
||||||
|
| 个人 | ¥500 | 最后一笔交易30天后无异常 |
|
||||||
|
| 企业 | ¥5,000 | 最后一笔交易90天后无异常 |
|
||||||
|
|
||||||
|
**扣除场景**:
|
||||||
|
- 欺诈行为
|
||||||
|
- 账号封禁导致需求方损失
|
||||||
|
- 恶意套现
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 3. 账单与记录详细设计
|
||||||
|
|
||||||
|
### 3.1 数据模型
|
||||||
|
|
||||||
|
执行口径说明(重要):
|
||||||
|
1. 本节内联 SQL 用于逻辑模型说明,不作为生产执行脚本。
|
||||||
|
2. 生产执行 DDL 以 `sql/postgresql/supply_schema_v1.sql` 为唯一脚本来源(PostgreSQL 方言)。
|
||||||
|
3. 若内联示意与执行脚本冲突,以执行脚本为准。
|
||||||
|
|
||||||
|
#### 3.1.1 供应方账号表
|
||||||
|
|
||||||
|
执行 DDL:`sql/postgresql/supply_schema_v1.sql`(表:`supply_accounts`)。
|
||||||
|
|
||||||
|
关键字段:
|
||||||
|
1. 基础:`id/user_id/platform/account_type/status`。
|
||||||
|
2. 凭证:`encrypted_credentials/key_id`(仅密态托管)。
|
||||||
|
3. 风控:`risk_level/risk_score/is_frozen`。
|
||||||
|
4. 审计:`created_at/updated_at/created_by/updated_by`。
|
||||||
|
|
||||||
|
#### 3.1.2 供应套餐表
|
||||||
|
|
||||||
|
执行 DDL:`sql/postgresql/supply_schema_v1.sql`(表:`supply_packages`)。
|
||||||
|
|
||||||
|
关键字段:
|
||||||
|
1. 关联:`supply_account_id/user_id/platform/model`。
|
||||||
|
2. 定价:`price_per_1m_input/price_per_1m_output`。
|
||||||
|
3. 额度:`total_quota/available_quota/sold_quota/reserved_quota`。
|
||||||
|
4. 状态:`draft/active/paused/sold_out/expired`。
|
||||||
|
|
||||||
|
#### 3.1.3 订单表
|
||||||
|
|
||||||
|
执行 DDL:`sql/postgresql/supply_schema_v1.sql`(表:`supply_orders`)。
|
||||||
|
|
||||||
|
关键字段:
|
||||||
|
1. 订单主键:`id/order_no`。
|
||||||
|
2. 交易信息:`buyer_user_id/supplier_user_id/supply_package_id`。
|
||||||
|
3. 金额信息:`total_amount/platform_fee/supplier_earnings`。
|
||||||
|
4. 状态:`pending/paid/using/expired/refunded`。
|
||||||
|
|
||||||
|
#### 3.1.4 使用记录表(详细到每一次调用)
|
||||||
|
|
||||||
|
执行 DDL:`sql/postgresql/supply_schema_v1.sql`(表:`supply_usage_records`)。
|
||||||
|
|
||||||
|
关键字段:
|
||||||
|
1. 追踪主键:`request_id/upstream_request_id/order_id`。
|
||||||
|
2. 模型维度:`platform/model/endpoint`。
|
||||||
|
3. 计费维度:`request_tokens/response_tokens/total_tokens/total_cost`。
|
||||||
|
4. 响应维度:`response_status/latency_ms/success`。
|
||||||
|
|
||||||
|
#### 3.1.5 供应方收益表
|
||||||
|
|
||||||
|
执行 DDL:`sql/postgresql/supply_schema_v1.sql`(表:`supply_earnings`)。
|
||||||
|
|
||||||
|
关键字段:
|
||||||
|
1. 收益类型:`usage/bonus/refund`。
|
||||||
|
2. 资金状态:`pending/available/withdrawn/frozen`。
|
||||||
|
3. 金额字段:`amount/available_amount/frozen_amount/withdrawn_amount`。
|
||||||
|
|
||||||
|
#### 3.1.6 结算记录表
|
||||||
|
|
||||||
|
执行 DDL:`sql/postgresql/supply_schema_v1.sql`(表:`supply_settlements`)。
|
||||||
|
|
||||||
|
关键字段:
|
||||||
|
1. 结算状态:`pending/processing/completed/failed`。
|
||||||
|
2. 金额字段:`total_amount/fee_amount/net_amount`。
|
||||||
|
3. 周期字段:`period_start/period_end`。
|
||||||
|
|
||||||
|
### 3.2 账单查询API
|
||||||
|
|
||||||
|
#### 3.2.1 供应方账单API
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
# 供应方账单查询
|
||||||
|
GET /api/v1/supplier/billing
|
||||||
|
|
||||||
|
Query Parameters:
|
||||||
|
- start_date: string (可选) 开始日期 YYYY-MM-DD
|
||||||
|
- end_date: string (可选) 结束日期 YYYY-MM-DD
|
||||||
|
- page: int (可选) 页码
|
||||||
|
- page_size: int (可选) 每页数量
|
||||||
|
|
||||||
|
Response:
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"period": {
|
||||||
|
"start": "2026-03-01",
|
||||||
|
"end": "2026-03-31"
|
||||||
|
},
|
||||||
|
"summary": {
|
||||||
|
"total_revenue": 12500.00, # 总收入
|
||||||
|
"total_orders": 156, # 订单数
|
||||||
|
"total_usage": 500000000, # 总tokens
|
||||||
|
"total_requests": 15800, # 总请求数
|
||||||
|
"avg_success_rate": 99.2, # 平均成功率
|
||||||
|
"platform_fee": 1875.00, # 平台服务费
|
||||||
|
"net_earnings": 10625.00 # 净收益
|
||||||
|
},
|
||||||
|
"by_platform": [
|
||||||
|
{
|
||||||
|
"platform": "openai",
|
||||||
|
"revenue": 8000.00,
|
||||||
|
"orders": 100,
|
||||||
|
"tokens": 320000000,
|
||||||
|
"success_rate": 99.5
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"platform": "anthropic",
|
||||||
|
"revenue": 4500.00,
|
||||||
|
"orders": 56,
|
||||||
|
"tokens": 180000000,
|
||||||
|
"success_rate": 98.8
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"by_model": [...],
|
||||||
|
"trend": [...]
|
||||||
|
},
|
||||||
|
"pagination": {...}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 3.2.2 需求方账单API
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
# 需求方账单查询
|
||||||
|
GET /api/v1/consumer/billing
|
||||||
|
|
||||||
|
Response:
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"balance": {
|
||||||
|
"total_paid": 5000.00, # 已支付总额
|
||||||
|
"total_used": 3200.00, # 已使用
|
||||||
|
"remaining": 1800.00, # 剩余
|
||||||
|
"frozen": 0.00 # 冻结
|
||||||
|
},
|
||||||
|
"usage": {
|
||||||
|
"this_month": {
|
||||||
|
"tokens": 120000000,
|
||||||
|
"cost": 960.00,
|
||||||
|
"requests": 5200
|
||||||
|
},
|
||||||
|
"by_platform": [...],
|
||||||
|
"by_model": [...]
|
||||||
|
},
|
||||||
|
"orders": [...]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### 3.3 详细使用记录
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
# 详细使用记录查询
|
||||||
|
GET /api/v1/consumer/usage-records
|
||||||
|
|
||||||
|
Query Parameters:
|
||||||
|
- start_date: string
|
||||||
|
- end_date: string
|
||||||
|
- platform: string (可选)
|
||||||
|
- model: string (可选)
|
||||||
|
- success: boolean (可选)
|
||||||
|
- page: int
|
||||||
|
- page_size: int
|
||||||
|
|
||||||
|
Response:
|
||||||
|
{
|
||||||
|
"data": [
|
||||||
|
{
|
||||||
|
"id": 1001,
|
||||||
|
"request_id": "req_abc123",
|
||||||
|
"order_id": 500,
|
||||||
|
"platform": "openai",
|
||||||
|
"model": "gpt-4o",
|
||||||
|
"endpoint": "/v1/chat/completions",
|
||||||
|
"tokens": {
|
||||||
|
"input": 1500,
|
||||||
|
"output": 800,
|
||||||
|
"total": 2300
|
||||||
|
},
|
||||||
|
"cost": {
|
||||||
|
"input": 0.0075, # $7.5/1M
|
||||||
|
"output": 0.032, # $32/1M
|
||||||
|
"total": 0.0395 # $0.0395
|
||||||
|
},
|
||||||
|
"latency_ms": 1250,
|
||||||
|
"status": "success",
|
||||||
|
"timestamp": "2026-03-18T10:30:00Z"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"pagination": {...}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 4. 完整业务流程
|
||||||
|
|
||||||
|
### 4.1 供应方完整流程
|
||||||
|
|
||||||
|
```
|
||||||
|
┌─────────────────────────────────────────────────────────────────────────────┐
|
||||||
|
│ 供应方完整业务流程 │
|
||||||
|
└─────────────────────────────────────────────────────────────────────────────┘
|
||||||
|
|
||||||
|
1. 入驻认证
|
||||||
|
├── 注册账号
|
||||||
|
├── 实名认证 (个人/企业)
|
||||||
|
└── 缴纳保证金
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
2. 账号挂载
|
||||||
|
├── 选择供应商
|
||||||
|
├── 选择认证方式 (API Key / OAuth)
|
||||||
|
├── 提交凭证
|
||||||
|
├── 平台验证 (有效性/额度/ToS)
|
||||||
|
└── 通过 → 激活账号
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
3. 套餐发布
|
||||||
|
├── 选择要共享的模型
|
||||||
|
├── 设置配额 (全部/部分额度)
|
||||||
|
├── 设置售价
|
||||||
|
├── 设置有效期
|
||||||
|
└── 上架销售
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
4. 日常运营
|
||||||
|
├── 监控账号状态
|
||||||
|
├── 查看订单通知
|
||||||
|
├── 处理异常 (如有)
|
||||||
|
└── 收益查看
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
5. 收益结算
|
||||||
|
├── 收益累积 (T+7 可提现)
|
||||||
|
├── 申请提现
|
||||||
|
├── 平台审核
|
||||||
|
└── 到账
|
||||||
|
```
|
||||||
|
|
||||||
|
### 4.2 需求方完整流程
|
||||||
|
|
||||||
|
```
|
||||||
|
┌─────────────────────────────────────────────────────────────────────────────┐
|
||||||
|
│ 需求方完整业务流程 │
|
||||||
|
└─────────────────────────────────────────────────────────────────────────────┘
|
||||||
|
|
||||||
|
1. 浏览选购
|
||||||
|
├── 浏览可用套餐
|
||||||
|
├── 按供应商/模型筛选
|
||||||
|
├── 查看套餐详情 (额度/价格/评价)
|
||||||
|
└── 选择购买
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
2. 下单购买
|
||||||
|
├── 选择配额数量
|
||||||
|
├── 确认价格
|
||||||
|
├── 选择支付方式
|
||||||
|
└── 支付成功
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
3. 获取凭证
|
||||||
|
├── 获取平台 API Key(平台签发)
|
||||||
|
├── 不返回供应方上游凭证
|
||||||
|
├── 配置使用限制
|
||||||
|
└── 开始使用
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
4. 使用API
|
||||||
|
├── 调用统一API
|
||||||
|
├── 实时扣减配额
|
||||||
|
├── 查看使用统计
|
||||||
|
└── 配额不足 → 续费
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
5. 账单管理
|
||||||
|
├── 查看使用明细
|
||||||
|
├── 下载账单
|
||||||
|
└── 对账
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 5. 调度策略
|
||||||
|
|
||||||
|
### 5.1 请求调度流程
|
||||||
|
|
||||||
|
```
|
||||||
|
请求进入 (指定模型: gpt-4o)
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
查找可用套餐 (gpt-4o + 有剩余配额 + 正常状态)
|
||||||
|
│
|
||||||
|
├── 多个套餐可用
|
||||||
|
│ │
|
||||||
|
│ ▼
|
||||||
|
│ 选择策略:
|
||||||
|
│ 1. 最低价格优先
|
||||||
|
│ 2. 负载均衡 (选择负载最低)
|
||||||
|
│ 3. 轮询
|
||||||
|
│ 4. 供应商偏好
|
||||||
|
│ │
|
||||||
|
│ ▼
|
||||||
|
│ 选择最优套餐
|
||||||
|
│
|
||||||
|
└── 单个套餐可用 → 使用该套餐
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
检查套餐配额 (足够? → 继续; 不足? → 拒绝)
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
检查账户状态 (正常? → 继续; 异常? → 拒绝)
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
转发到上游供应商
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
获取响应
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
记录使用记录
|
||||||
|
│
|
||||||
|
├── 更新订单剩余配额
|
||||||
|
├── 更新套餐已售配额
|
||||||
|
├── 计算费用
|
||||||
|
└── 更新供应方收益
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
返回响应给需求方
|
||||||
|
```
|
||||||
|
|
||||||
|
### 5.2 调度策略配置
|
||||||
|
|
||||||
|
| 策略 | 说明 | 适用场景 |
|
||||||
|
|------|------|----------|
|
||||||
|
| **最低价格** | 选择售价最低的套餐 | 成本优先 |
|
||||||
|
| **负载均衡** | 选择负载最低的套餐 | 性能优先 |
|
||||||
|
| **轮询** | 依次选择各套餐 | 公平使用 |
|
||||||
|
| **供应商偏好** | 优先特定供应商 | 稳定性优先 |
|
||||||
|
| **混合** | 综合考虑价格/负载/偏好 | 默认 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 6. 监控与告警
|
||||||
|
|
||||||
|
### 6.1 监控指标
|
||||||
|
|
||||||
|
#### 6.1.1 供应方监控
|
||||||
|
|
||||||
|
| 指标 | 说明 | 告警阈值 |
|
||||||
|
|------|------|----------|
|
||||||
|
| 账号可用率 | 账号正常比例 | < 99% |
|
||||||
|
| 成功率 | 请求成功率 | < 95% |
|
||||||
|
| 平均延迟 | 平均响应时间 | > 5000ms |
|
||||||
|
| 配额消耗速度 | 配额日消耗比例 | > 80%/天 |
|
||||||
|
| 异常请求比例 | 失败请求比例 | > 10% |
|
||||||
|
| 收益异常 | 收益突增/突降 | 偏离 > 50% |
|
||||||
|
|
||||||
|
#### 6.1.2 套餐监控
|
||||||
|
|
||||||
|
| 指标 | 说明 | 告警阈值 |
|
||||||
|
|------|------|----------|
|
||||||
|
| 剩余配额 | 可用配额 | < 10% |
|
||||||
|
| 订单量 | 新增订单 | 突降 > 30% |
|
||||||
|
| 评分 | 用户评分 | < 4.0 |
|
||||||
|
| 投诉 | 用户投诉 | > 3次/周 |
|
||||||
|
|
||||||
|
### 6.2 告警通知
|
||||||
|
|
||||||
|
| 告警类型 | 通知对象 | 通知方式 |
|
||||||
|
|----------|----------|----------|
|
||||||
|
| 账号异常 | 供应方 | 站内信/邮件 |
|
||||||
|
| 配额不足 | 供应方 | 站内信/短信 |
|
||||||
|
| 收益到账 | 供应方 | 站内信/邮件 |
|
||||||
|
| 异常订单 | 平台运营 | 邮件/短信 |
|
||||||
|
| ToS 违规 | 平台运营 | 邮件 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 7. 报表导出
|
||||||
|
|
||||||
|
### 7.1 供应方报表
|
||||||
|
|
||||||
|
| 报表 | 内容 | 格式 |
|
||||||
|
|------|------|------|
|
||||||
|
| 账单汇总 | 收入/订单/费用 | CSV/Excel |
|
||||||
|
| 使用明细 | 每笔调用详情 | CSV/Excel |
|
||||||
|
| 账户流水 | 收益/提现/冻结 | CSV/Excel |
|
||||||
|
| 对账单 | 平台与供应方对账 | PDF |
|
||||||
|
|
||||||
|
### 7.2 需求方报表
|
||||||
|
|
||||||
|
| 报表 | 内容 | 格式 |
|
||||||
|
|------|------|------|
|
||||||
|
| 消费账单 | 消费汇总 | CSV/Excel |
|
||||||
|
| 使用明细 | 调用明细 | CSV/Excel |
|
||||||
|
| 成本分析 | 按模型/部门分析 | PDF |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 8. 数据统计SQL示例
|
||||||
|
|
||||||
|
### 8.1 供应方收入统计
|
||||||
|
|
||||||
|
```sql
|
||||||
|
SELECT
|
||||||
|
sa.user_id,
|
||||||
|
u.username,
|
||||||
|
sa.platform,
|
||||||
|
SUM(sur.total_cost) as total_revenue,
|
||||||
|
SUM(sur.total_cost) * :platform_fee_rate as platform_fee,
|
||||||
|
SUM(sur.total_cost) * :supplier_settlement_rate as supplier_earnings,
|
||||||
|
COUNT(DISTINCT sur.order_id) as order_count,
|
||||||
|
SUM(sur.total_tokens) as total_tokens,
|
||||||
|
AVG(sur.success) as avg_success_rate
|
||||||
|
FROM supply_usage_records sur
|
||||||
|
JOIN supply_accounts sa ON sur.supply_account_id = sa.id
|
||||||
|
JOIN users u ON sa.user_id = u.id
|
||||||
|
WHERE sur.created_at >= '2026-03-01' AND sur.created_at < '2026-04-01'
|
||||||
|
GROUP BY sa.user_id, u.username, sa.platform
|
||||||
|
ORDER BY total_revenue DESC;
|
||||||
|
```
|
||||||
|
|
||||||
|
### 8.2 套餐消耗统计
|
||||||
|
|
||||||
|
```sql
|
||||||
|
SELECT
|
||||||
|
sp.id as package_id,
|
||||||
|
sp.model,
|
||||||
|
sp.platform,
|
||||||
|
sp.total_quota,
|
||||||
|
sp.available_quota,
|
||||||
|
sp.sold_quota,
|
||||||
|
(sp.sold_quota / sp.total_quota * 100) as sold_rate,
|
||||||
|
COUNT(DISTINCT so.id) as order_count,
|
||||||
|
SUM(sur.total_cost) as total_revenue,
|
||||||
|
AVG(sur.latency_ms) as avg_latency,
|
||||||
|
AVG(sur.success) as success_rate
|
||||||
|
FROM supply_packages sp
|
||||||
|
LEFT JOIN supply_orders so ON sp.id = so.supply_package_id
|
||||||
|
LEFT JOIN supply_usage_records sur
|
||||||
|
ON sur.supply_account_id = sp.supply_account_id
|
||||||
|
AND sur.model = sp.model
|
||||||
|
WHERE sp.created_at >= '2026-03-01'
|
||||||
|
GROUP BY sp.id, sp.model, sp.platform, sp.total_quota, sp.available_quota, sp.sold_quota
|
||||||
|
ORDER BY total_revenue DESC;
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
**文档状态**:完整详细设计(已补 PostgreSQL 执行口径与 SQL 示例修正)
|
||||||
|
**关联文档**:
|
||||||
|
- `supply_side_product_design_v1_2026-03-18.md`
|
||||||
|
- `supply_feature_technical_analysis_v1_2026-03-18.md`
|
||||||
284
docs/supply_feature_technical_analysis_v1_2026-03-18.md
Normal file
284
docs/supply_feature_technical_analysis_v1_2026-03-18.md
Normal file
@@ -0,0 +1,284 @@
|
|||||||
|
# Subapi 技术能力分析与用户供应场景补充
|
||||||
|
|
||||||
|
> 本文档回答关于 subapi 技术深度、供应商能力、风险评估,以及补充"用户分享LLM供应"的详细设计。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 1. Subapi 技术深度评估
|
||||||
|
|
||||||
|
### 1.1 技术了解程度:**足够深入**
|
||||||
|
|
||||||
|
| 维度 | 评估 | 证据 |
|
||||||
|
|------|------|------|
|
||||||
|
| 协议支持 | ⭐⭐⭐⭐⭐ | OpenAI/Anthropic/Gemini 完整兼容 |
|
||||||
|
| 契约设计 | ⭐⭐⭐⭐⭐ | 已有详细的 Connector 契约文档 |
|
||||||
|
| 错误处理 | ⭐⭐⭐⭐ | 三类错误归一,重试机制完善 |
|
||||||
|
| 流式支持 | ⭐⭐⭐⭐ | SSE/WebSocket 均已实现 |
|
||||||
|
|
||||||
|
### 1.2 已掌握的技术细节
|
||||||
|
|
||||||
|
- **Connector 契约**:`subapi_connector_contract_v1_2026-03-17.md` 明确定义了请求/响应/错误归一模型
|
||||||
|
- **认证机制**:支持 Bearer Token、x-api-key、x-goog-api-key
|
||||||
|
- **版本治理**:生产环境锁定精确版本,周级升级窗口
|
||||||
|
- **风险点**:已识别 2 个 P0 问题(内网隔离、query key 边界)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 2. Subapi 当前供应商能力
|
||||||
|
|
||||||
|
### 2.1 已支持的供应商
|
||||||
|
|
||||||
|
| 供应商 | 模型支持 | 认证方式 | 状态 |
|
||||||
|
|--------|----------|----------|------|
|
||||||
|
| **OpenAI** | GPT-3.5/4/5 全系列 | API Key | ✅ 稳定 |
|
||||||
|
| **Anthropic** | Claude 2/3/4 全系列 | API Key / OAuth | ✅ 稳定 |
|
||||||
|
| **Google Gemini** | Gemini 2/3 全系列 | API Key / OAuth | ✅ 稳定 |
|
||||||
|
| **Antigravity** | Claude 4.5+ / Gemini 2.5+ | OAuth | ✅ 稳定 |
|
||||||
|
| **Sora** | 图片/视频生成 | API Key | ✅ 稳定 |
|
||||||
|
| **AWS Bedrock** | Claude/Titan 等 | AWS 凭证 | ✅ 稳定 |
|
||||||
|
| **百度文心** | ERNIE 系列 | API Key | ✅ |
|
||||||
|
| **讯飞星火** | Spark 系列 | API Key | ✅ |
|
||||||
|
| **腾讯混元** | Hunyuan 系列 | API Key | ✅ |
|
||||||
|
| **Perplexity** | Pro/Labs | API Key | ✅ |
|
||||||
|
|
||||||
|
### 2.2 供应商能力总结
|
||||||
|
|
||||||
|
- **总计**:10+ 供应商,100+ 模型
|
||||||
|
- **海外主力**:OpenAI、Anthropic、Gemini(支持 OAuth 授权)
|
||||||
|
- **国内支持**:百度、讯飞、腾讯(API Key 方式)
|
||||||
|
- **⚠️ 注意**:subapi 不支持用户自己挂载账号给平台,只支持平台统一管理上游账号
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 3. 关键发现:Subapi 不支持"用户分享LLM供应"
|
||||||
|
|
||||||
|
### 3.1 Subapi 现有模式
|
||||||
|
|
||||||
|
```
|
||||||
|
平台方(管理员)→ 添加上游账号 → 分发给用户使用
|
||||||
|
```
|
||||||
|
|
||||||
|
- 平台方统一管理所有 LLM 供应商账号
|
||||||
|
- 用户使用平台生成的 API Key 调用
|
||||||
|
- 优点:统一管理、计费简单
|
||||||
|
- 缺点:无法实现"用户分享多余配额"
|
||||||
|
|
||||||
|
### 3.2 用户想要的模式
|
||||||
|
|
||||||
|
```
|
||||||
|
用户A(供应方)→ 挂载自己账号的剩余配额 → 平台售卖 → 用户B(需求方)使用
|
||||||
|
```
|
||||||
|
|
||||||
|
- 任何用户可以把自己的 LLM 账号/配额挂载到平台
|
||||||
|
- 平台验证账号有效性后接受供应
|
||||||
|
- 平台将配额卖给其他用户
|
||||||
|
- 供应方获得收益分成
|
||||||
|
|
||||||
|
### 3.3 结论
|
||||||
|
|
||||||
|
**Subapi 没有实现"用户分享LLM供应"功能!**
|
||||||
|
|
||||||
|
这意味着用户的独特场景需要**在我方平台层实现**,而不是依赖 subapi。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 4. 风险评估
|
||||||
|
|
||||||
|
### 4.1 Subapi 集成风险
|
||||||
|
|
||||||
|
| 风险项 | 级别 | 缓解措施 |
|
||||||
|
|--------|------|----------|
|
||||||
|
| 内网隔离缺失 | P0 | 新增 SEC-007/008 任务 |
|
||||||
|
| query key 边界歧义 | P0 | 新增 SEC-009 强制测试 |
|
||||||
|
| 接管率口径冲突 | P1 | COMP-007 统一 canonical |
|
||||||
|
| CN 平台硬编码 | P1 | COMP-008 配置表驱动 |
|
||||||
|
|
||||||
|
### 4.2 商业风险
|
||||||
|
|
||||||
|
| 风险项 | 级别 | 说明 |
|
||||||
|
|--------|------|------|
|
||||||
|
| 用户供应功能需自研 | 高 | 供应侧功能不在 subapi 范围内 |
|
||||||
|
| ToS 合规风险 | 高 | 需严格审查各供应商条款 |
|
||||||
|
| 供应商账号封禁 | 中 | 需多账号冗余 + 备用通道 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 5. 用户分享LLM供应 - 完整设计补充
|
||||||
|
|
||||||
|
### 5.1 功能定位
|
||||||
|
|
||||||
|
这是**平台独有功能**,不在 subapi 范围内,需要我方平台层实现。
|
||||||
|
|
||||||
|
### 5.2 业务流程
|
||||||
|
|
||||||
|
```
|
||||||
|
┌─────────────────────────────────────────────────────────────────────────┐
|
||||||
|
│ 用户供应LLM配额业务流程 │
|
||||||
|
├─────────────────────────────────────────────────────────────────────────┤
|
||||||
|
│ │
|
||||||
|
│ 供应方(用户A) 平台 需求方(用户B) │
|
||||||
|
│ │ │ │ │
|
||||||
|
│ ▼ │ │ │
|
||||||
|
│ 1. 注册/登录 │ │ │
|
||||||
|
│ │ │ │ │
|
||||||
|
│ ▼ │ │ │
|
||||||
|
│ 2. 挂载LLM账号 │ │ │
|
||||||
|
│ (API Key/OAuth) │ │ │
|
||||||
|
│ │ │ │ │
|
||||||
|
│ ▼ │ │ │
|
||||||
|
│ 3. 平台验证账号 │ │ │
|
||||||
|
│ (有效性/额度/合规) │ │ │
|
||||||
|
│ │ │ │ │
|
||||||
|
│ ▼ │ │ │
|
||||||
|
│ 4. 设置供给配额 │ │ │
|
||||||
|
│ (挂载量/售价) │ │ │
|
||||||
|
│ │ │ │ │
|
||||||
|
│ ▼ │ │ │
|
||||||
|
│ 5. 供应上线 │ │ │
|
||||||
|
│ │───────────────────────┼───────────────────────────┤ │
|
||||||
|
│ │ │ │ │
|
||||||
|
│ │ ▼ │ │
|
||||||
|
│ │ 6. 平台展示 │ │
|
||||||
|
│ │ (套餐列表) │ │
|
||||||
|
│ │ │ │ │
|
||||||
|
│ │ ▼ │ │
|
||||||
|
│ │ │ 7. 购买套餐 │
|
||||||
|
│ │ │ │ │
|
||||||
|
│ │ ▼ │ │
|
||||||
|
│ │ │ 8. 获取平台调用凭证 │
|
||||||
|
│ │ │ │ │
|
||||||
|
│ │ ▼ │ │
|
||||||
|
│ │ │ 9. 调用LLM服务 │
|
||||||
|
│ │ │◀──────────────────────────┘ │
|
||||||
|
│ │ │ │
|
||||||
|
│ ▼ ▼ │
|
||||||
|
│ 10. 收益到账 11. 平台抽成 │
|
||||||
|
│ │
|
||||||
|
└─────────────────────────────────────────────────────────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
### 5.3 核心模块设计
|
||||||
|
|
||||||
|
#### 5.3.1 账号挂载模块
|
||||||
|
|
||||||
|
| 功能 | 说明 |
|
||||||
|
|------|------|
|
||||||
|
| **挂载方式** | API Key 手动输入 / OAuth 授权 |
|
||||||
|
| **支持的供应商** | 与平台支持列表一致 |
|
||||||
|
| **账号验证** | 调用供应商 API 验证有效性 |
|
||||||
|
| **额度获取** | 调用供应商 API 获取剩余额度 |
|
||||||
|
| **合规检查** | 检查是否违反供应商 ToS |
|
||||||
|
|
||||||
|
#### 5.3.2 套餐发布模块
|
||||||
|
|
||||||
|
| 功能 | 说明 |
|
||||||
|
|------|------|
|
||||||
|
| **最小挂载量** | 平台设定最低额度(如 $10) |
|
||||||
|
| **最大挂载量** | 根据账号类型和历史表现设定 |
|
||||||
|
| **定价规则** | 供应方设置售价,平台设定最低价 |
|
||||||
|
| **有效期** | 可选择日/月/永久 |
|
||||||
|
| **状态管理** | 上架/下架/售罄/过期 |
|
||||||
|
|
||||||
|
#### 5.3.3 调度与计费模块
|
||||||
|
|
||||||
|
| 功能 | 说明 |
|
||||||
|
|------|------|
|
||||||
|
| **请求调度** | 优先用低价/空闲供应,保障成功率 |
|
||||||
|
| **额度扣减** | 实时扣减供应方额度 |
|
||||||
|
| **账单生成** | 记录每笔交易的 token 量和费用 |
|
||||||
|
| **收益计算** | 供应方收益 = 售价 × 消耗量 × 60% |
|
||||||
|
|
||||||
|
#### 5.3.4 风控模块
|
||||||
|
|
||||||
|
| 功能 | 说明 |
|
||||||
|
|------|------|
|
||||||
|
| **账号健康** | 监控账号可用性、额度消耗异常 |
|
||||||
|
| **欺诈检测** | 检测恶意套现、虚假额度 |
|
||||||
|
| **ToS 合规** | 检测供应商禁止的调用模式 |
|
||||||
|
| **保证金** | 供应方需缴纳保证金(防违约) |
|
||||||
|
|
||||||
|
### 5.4 数据模型
|
||||||
|
|
||||||
|
```sql
|
||||||
|
-- 供应方账号表
|
||||||
|
CREATE TABLE supply_accounts (
|
||||||
|
id BIGINT PRIMARY KEY,
|
||||||
|
user_id BIGINT NOT NULL,
|
||||||
|
platform VARCHAR(50) NOT NULL, -- openai/anthropic/gemini/baidu/...
|
||||||
|
account_type VARCHAR(20) NOT NULL, -- api_key/oauth
|
||||||
|
encrypted_key VARCHAR(500), -- 加密存储
|
||||||
|
display_name VARCHAR(100),
|
||||||
|
status VARCHAR(20), -- pending/active/suspended
|
||||||
|
verified_at TIMESTAMP,
|
||||||
|
created_at TIMESTAMP DEFAULT NOW()
|
||||||
|
);
|
||||||
|
|
||||||
|
-- 供应套餐表
|
||||||
|
CREATE TABLE supply_packages (
|
||||||
|
id BIGINT PRIMARY KEY,
|
||||||
|
supply_account_id BIGINT NOT NULL,
|
||||||
|
model VARCHAR(100) NOT NULL,
|
||||||
|
available_quota DECIMAL(20, 6), -- 可用额度
|
||||||
|
sold_quota DECIMAL(20, 6) DEFAULT 0, -- 已售额度
|
||||||
|
price_per_1m DECIMAL(20, 6), -- 每百万tokens价格
|
||||||
|
status VARCHAR(20), -- active/sold_out/expired
|
||||||
|
valid_until TIMESTAMP,
|
||||||
|
created_at TIMESTAMP DEFAULT NOW()
|
||||||
|
);
|
||||||
|
|
||||||
|
-- 供应方收益表
|
||||||
|
CREATE TABLE supply_earnings (
|
||||||
|
id BIGINT PRIMARY KEY,
|
||||||
|
user_id BIGINT NOT NULL,
|
||||||
|
supply_package_id BIGINT NOT NULL,
|
||||||
|
consumption_tokens BIGINT NOT NULL,
|
||||||
|
consumption_amount DECIMAL(20, 6) NOT NULL,
|
||||||
|
platform_share DECIMAL(20, 6) NOT NULL, -- 平台抽成
|
||||||
|
supplier_share DECIMAL(20, 6) NOT NULL, -- 供应方收益
|
||||||
|
created_at TIMESTAMP DEFAULT NOW()
|
||||||
|
);
|
||||||
|
```
|
||||||
|
|
||||||
|
### 5.5 与现有系统的集成
|
||||||
|
|
||||||
|
| 模块 | 集成点 | 说明 |
|
||||||
|
|------|--------|------|
|
||||||
|
| **认证系统** | 复用现有用户体系 | 供应方需完成实名认证 |
|
||||||
|
| **计费系统** | 复用账务引擎 | 供应方收益计入用户余额 |
|
||||||
|
| **风控系统** | 复用合规引擎 | 账号需通过 ToS 检查 |
|
||||||
|
| **API 网关** | 新增路由规则 | 识别供应来源,调度到对应账号 |
|
||||||
|
|
||||||
|
### 5.6 实施计划
|
||||||
|
|
||||||
|
| 阶段 | 时间 | 任务 | 交付 |
|
||||||
|
|------|------|------|------|
|
||||||
|
| **S0-a** | W1-W2 | 账号挂载模块开发 | 挂载/验证/下架功能 |
|
||||||
|
| **S0-b** | W3-W4 | 套餐发布模块开发 | 上架/定价/展示功能 |
|
||||||
|
| **S0-c** | W5-W6 | 调度与计费模块开发 | 实时调度/扣减/账单 |
|
||||||
|
| **S0-d** | W7-W8 | 风控模块开发 | 健康监控/欺诈检测 |
|
||||||
|
| **S0-e** | W9-W10 | 内部测试与修复 | 试运行 |
|
||||||
|
| **S0-f** | W11-W12 | 首批供应方引入 | 10家供应方 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 6. 结论与建议
|
||||||
|
|
||||||
|
### 6.1 结论
|
||||||
|
|
||||||
|
1. **Subapi 技术掌握充分**:已有详细的契约文档,可以完成集成
|
||||||
|
2. **Subapi 供应商覆盖全**:10+ 供应商,100+ 模型
|
||||||
|
3. **用户供应功能缺失**:Subapi 不支持,需平台层自研
|
||||||
|
|
||||||
|
### 6.2 建议
|
||||||
|
|
||||||
|
1. ✅ 继续推进 Subapi 集成(技术可行)
|
||||||
|
2. ✅ 补充用户供应功能的自研计划(当前规划缺失)
|
||||||
|
3. ✅ 将 S0 阶段延长,增加供应侧功能开发
|
||||||
|
4. ✅ 优先在 S0-e 阶段引入首批供应方进行验证
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
**文档状态**:补充说明
|
||||||
|
**关联文档**:
|
||||||
|
- `llm_gateway_subapi_evolution_plan_v4_1_2026-03-18.md`
|
||||||
|
- `subapi_connector_contract_v1_2026-03-17.md`
|
||||||
165
docs/supply_gate_command_playbook_v1_2026-03-25.md
Normal file
165
docs/supply_gate_command_playbook_v1_2026-03-25.md
Normal file
@@ -0,0 +1,165 @@
|
|||||||
|
# SUP Gate 命令级执行清单(SUP-004~SUP-007)
|
||||||
|
|
||||||
|
- 版本:v1.0
|
||||||
|
- 日期:2026-03-25
|
||||||
|
- 目标:为测试团队提供可直接执行的命令清单,输出可回填证据
|
||||||
|
- 关联任务:`SUP-004`、`SUP-005`、`SUP-006`、`SUP-007`
|
||||||
|
- 关联报告模板:
|
||||||
|
- `tests/supply/ui_sup_acc_report_2026-03-28.md`
|
||||||
|
- `tests/supply/ui_sup_pkg_report_2026-03-29.md`
|
||||||
|
- `tests/supply/ui_sup_set_report_2026-03-29.md`
|
||||||
|
- `tests/supply/sec_sup_boundary_report_2026-03-30.md`
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 1. 执行前准备
|
||||||
|
|
||||||
|
## 1.1 环境变量
|
||||||
|
|
||||||
|
在项目根目录创建并编辑 `scripts/supply-gate/.env`:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
API_BASE_URL="https://staging.example.com"
|
||||||
|
OWNER_BEARER_TOKEN="replace-me-owner-token"
|
||||||
|
VIEWER_BEARER_TOKEN="replace-me-viewer-token"
|
||||||
|
ADMIN_BEARER_TOKEN="replace-me-admin-token"
|
||||||
|
|
||||||
|
# 测试数据(可按环境替换)
|
||||||
|
TEST_PROVIDER="openai"
|
||||||
|
TEST_MODEL="gpt-4o"
|
||||||
|
TEST_ACCOUNT_ALIAS="sup_acc_cmd"
|
||||||
|
TEST_CREDENTIAL_INPUT="sk-test-replace-me"
|
||||||
|
TEST_PAYMENT_METHOD="alipay"
|
||||||
|
TEST_PAYMENT_ACCOUNT="tester@example.com"
|
||||||
|
TEST_SMS_CODE="123456"
|
||||||
|
|
||||||
|
# 可选:绕过平台直连供应方探测目标
|
||||||
|
SUPPLIER_DIRECT_TEST_URL=""
|
||||||
|
```
|
||||||
|
|
||||||
|
## 1.2 依赖检查
|
||||||
|
|
||||||
|
```bash
|
||||||
|
command -v curl >/dev/null
|
||||||
|
command -v jq >/dev/null
|
||||||
|
```
|
||||||
|
|
||||||
|
## 1.3 执行入口
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd "/home/long/project/立交桥"
|
||||||
|
bash "scripts/supply-gate/run_all.sh"
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 2. SUP-004 账号挂载链路(UI-SUP-ACC-001~006)
|
||||||
|
|
||||||
|
执行脚本:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd "/home/long/project/立交桥"
|
||||||
|
bash "scripts/supply-gate/sup004_accounts.sh"
|
||||||
|
```
|
||||||
|
|
||||||
|
最低断言:
|
||||||
|
|
||||||
|
1. 验证接口返回 `verify_status=pass/review_required`。
|
||||||
|
2. 创建账号成功并返回 `account_id`。
|
||||||
|
3. 激活/暂停接口返回状态迁移成功。
|
||||||
|
4. 审计日志接口可查询并返回 `request_id`。
|
||||||
|
|
||||||
|
证据输出:
|
||||||
|
|
||||||
|
1. `tests/supply/artifacts/sup004/*.json`
|
||||||
|
2. `tests/supply/artifacts/sup004/summary.txt`
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 3. SUP-005 套餐发布链路(UI-SUP-PKG-001~006)
|
||||||
|
|
||||||
|
执行脚本:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd "/home/long/project/立交桥"
|
||||||
|
bash "scripts/supply-gate/sup005_packages.sh"
|
||||||
|
```
|
||||||
|
|
||||||
|
最低断言:
|
||||||
|
|
||||||
|
1. 草稿创建成功并返回 `package_id`。
|
||||||
|
2. 上架后状态为 `active`。
|
||||||
|
3. 暂停后状态为 `paused`。
|
||||||
|
4. 下架返回成功(`expired/paused` 合法)。
|
||||||
|
5. 批量调价返回 `success_count + failed_count = total`。
|
||||||
|
6. 复制成功并返回新的 `package_id`。
|
||||||
|
|
||||||
|
证据输出:
|
||||||
|
|
||||||
|
1. `tests/supply/artifacts/sup005/*.json`
|
||||||
|
2. `tests/supply/artifacts/sup005/summary.txt`
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 4. SUP-006 结算提现链路(UI-SUP-SET-001~005)
|
||||||
|
|
||||||
|
执行脚本:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd "/home/long/project/立交桥"
|
||||||
|
bash "scripts/supply-gate/sup006_settlements.sh"
|
||||||
|
```
|
||||||
|
|
||||||
|
最低断言:
|
||||||
|
|
||||||
|
1. 账单查询成功返回 `summary`。
|
||||||
|
2. 提现申请成功返回 `settlement_id` 且状态 `pending`。
|
||||||
|
3. 撤销申请接口返回状态变更。
|
||||||
|
4. 对账单下载接口返回 `download_url`。
|
||||||
|
5. 收益流水接口返回分页与记录字段。
|
||||||
|
|
||||||
|
证据输出:
|
||||||
|
|
||||||
|
1. `tests/supply/artifacts/sup006/*.json`
|
||||||
|
2. `tests/supply/artifacts/sup006/summary.txt`
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 5. SUP-007 凭证边界专项(SEC-SUP-001~002)
|
||||||
|
|
||||||
|
执行脚本:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd "/home/long/project/立交桥"
|
||||||
|
bash "scripts/supply-gate/sup007_boundary.sh"
|
||||||
|
```
|
||||||
|
|
||||||
|
最低断言:
|
||||||
|
|
||||||
|
1. 平台凭证访问主路径成功(映射 M-014)。
|
||||||
|
2. 外部 query key 请求被拒绝(映射 M-016)。
|
||||||
|
3. 响应/导出样本脱敏扫描无可复用凭证片段(映射 M-013)。
|
||||||
|
4. 若配置 `SUPPLIER_DIRECT_TEST_URL`,直连探测应失败或被阻断(映射 M-015)。
|
||||||
|
|
||||||
|
证据输出:
|
||||||
|
|
||||||
|
1. `tests/supply/artifacts/sup007/*.json`
|
||||||
|
2. `tests/supply/artifacts/sup007/summary.txt`
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 6. 回填要求
|
||||||
|
|
||||||
|
执行完成后,必须回填:
|
||||||
|
|
||||||
|
1. `tests/supply/ui_sup_acc_report_2026-03-28.md`
|
||||||
|
2. `tests/supply/ui_sup_pkg_report_2026-03-29.md`
|
||||||
|
3. `tests/supply/ui_sup_set_report_2026-03-29.md`
|
||||||
|
4. `tests/supply/sec_sup_boundary_report_2026-03-30.md`
|
||||||
|
5. `reports/supply_gate_review_2026-03-31.md`
|
||||||
|
|
||||||
|
所有回填项需要包含:
|
||||||
|
|
||||||
|
1. 结论(PASS/FAIL/BLOCKED)
|
||||||
|
2. 证据路径(json/screenshot/log)
|
||||||
|
3. 责任人签字
|
||||||
356
docs/supply_side_product_design_v1_2026-03-18.md
Normal file
356
docs/supply_side_product_design_v1_2026-03-18.md
Normal file
@@ -0,0 +1,356 @@
|
|||||||
|
# 供应侧产品设计章节(新增)
|
||||||
|
|
||||||
|
> 本章节为 PRD v0 的补充章节,专门针对"用户分享多余LLM套餐"这一核心独特场景进行产品化设计。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 0. 术语澄清(重要)
|
||||||
|
|
||||||
|
| 术语 | 定义 | 举例 |
|
||||||
|
|------|------|------|
|
||||||
|
| **供应方** | 在平台挂载多余LLM配额的个人或企业(平台的用户角色) | 挂载自己 OpenAI 账号的用户A |
|
||||||
|
| **供应商** | LLM 服务提供商(上游账号来源) | OpenAI、Anthropic、百度、讯飞等 |
|
||||||
|
| **平台** | 统一网关平台本身 | 立交桥/LJQ |
|
||||||
|
|
||||||
|
> ⚠️ **注意**:本文档中"供应商"指 LLM 服务商(如 OpenAI),"供应方"指挂载账号的平台用户。两者是不同的概念,请勿混淆。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 1. 供应侧业务模型
|
||||||
|
|
||||||
|
### 1.1 业务角色
|
||||||
|
|
||||||
|
| 角色 | 定义 | 核心诉求 |
|
||||||
|
|------|------|---------|
|
||||||
|
| **供应方(Provider)** | 拥有多余LLM配额的个人或企业 | 将闲置配额变现,回笼资金 |
|
||||||
|
| **平台(Platform)** | 统一网关平台 | 汇集供应方资源,提供稳定服务,赚取差价 |
|
||||||
|
| **需求方(Consumer)** | 需要LLM调用能力的企业/开发者 | 以优惠价格获取LLM服务,无需自建账号 |
|
||||||
|
|
||||||
|
### 1.2 统购统销模式
|
||||||
|
|
||||||
|
```
|
||||||
|
┌─────────────┐ 统一定价收购 ┌─────────────┐ 加价出售 ┌─────────────┐
|
||||||
|
│ 供应方 │ ───────────────▶ │ 平台 │ ──────────────▶ │ 需求方 │
|
||||||
|
│ (卖配额) │ 定价P0 │ (中间商) │ 定价P1 │ (买服务) │
|
||||||
|
└─────────────┘ └─────────────┘ └─────────────┘
|
||||||
|
│
|
||||||
|
差价 = P1 - P0
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 2. 供应方产品流程
|
||||||
|
|
||||||
|
### 2.1 供应方入驻
|
||||||
|
|
||||||
|
**流程**:
|
||||||
|
1. 供应方注册平台账号(需实名认证)
|
||||||
|
2. 提交待共享的LLM账号凭证(API Key)
|
||||||
|
3. 平台进行套餐有效性验证
|
||||||
|
4. 验证通过后,供应方设置供给配额和最低售价
|
||||||
|
5. 签订《配额供应协议》
|
||||||
|
|
||||||
|
**验收标准**:
|
||||||
|
- 首次入驻流程在 30 分钟内可完成
|
||||||
|
- 实名认证支持企业/个人两种模式
|
||||||
|
|
||||||
|
### 2.2 套餐发布
|
||||||
|
|
||||||
|
**流程**:
|
||||||
|
1. 供应方选择要共享的LLM供应商和模型
|
||||||
|
2. 输入账号API Key或配额信息
|
||||||
|
3. 设置单次/每日/每月供给配额
|
||||||
|
4. 设置售价(平台设定最低售价保护)
|
||||||
|
5. 提交验证
|
||||||
|
|
||||||
|
**验收标准**:
|
||||||
|
- 套餐发布后 5 分钟内完成验证并上线
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 3. 套餐有效性验证机制
|
||||||
|
|
||||||
|
### 3.1 验证层级
|
||||||
|
|
||||||
|
| 验证层级 | 验证内容 | 验证方式 | 失败处理 |
|
||||||
|
|----------|----------|----------|----------|
|
||||||
|
| **L1 基础验证** | API Key格式、供应商连通性 | 自动调用供应商API检查有效性 | 立即拒绝 |
|
||||||
|
| **L2 额度验证** | 剩余配额、账户状态 | 调用供应商账户API获取额度信息 | 标记额度不足 |
|
||||||
|
| **L3 行为验证** | 账户历史行为、风险评分 | 平台风控模型评估 | 高风险拒绝 |
|
||||||
|
| **L4 持续监控** | 配额消耗异常、账户异常 | 实时监控+告警 | 自动下架+通知 |
|
||||||
|
|
||||||
|
### 3.2 验证技术方案
|
||||||
|
|
||||||
|
```
|
||||||
|
┌─────────────────────────────────────────────────────────────┐
|
||||||
|
│ 套餐验证引擎 │
|
||||||
|
├─────────────────────────────────────────────────────────────┤
|
||||||
|
│ 输入:API Key, Provider, Model │
|
||||||
|
│ │
|
||||||
|
│ ┌─────────┐ ┌─────────┐ ┌─────────┐ ┌─────────┐ │
|
||||||
|
│ │ L1 基础 │ ──▶│ L2 额度 │ ──▶│ L3 行为 │ ──▶│ L4 持续 │ │
|
||||||
|
│ │ 验证 │ │ 验证 │ │ 验证 │ │ 监控 │ │
|
||||||
|
│ └─────────┘ └─────────┘ └─────────┘ └─────────┘ │
|
||||||
|
│ │ │ │ │ │
|
||||||
|
│ ▼ ▼ ▼ ▼ │
|
||||||
|
│ 格式检查 余额查询 风控评分 异常检测 │
|
||||||
|
│ 连通性测试 有效期限 历史合规 消耗速率 │
|
||||||
|
│ │
|
||||||
|
│ 输出:VerificationResult {status, quota, riskScore} │
|
||||||
|
└─────────────────────────────────────────────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
### 3.3 验证规则配置
|
||||||
|
|
||||||
|
| 验证项 | 规则 | 可配置 |
|
||||||
|
|--------|------|--------|
|
||||||
|
| API Key格式 | 必须符合各供应商规范 | 否 |
|
||||||
|
| 连通性 | 3次重试后仍失败则标记不可用 | 是 |
|
||||||
|
| 最小剩余额度 | 根据模型设置阈值(如$5) | 是 |
|
||||||
|
| 风险评分 | >80分拒绝,60-80分人工复核 | 是 |
|
||||||
|
| 账户年龄 | 新账号需观察期后共享 | 是 |
|
||||||
|
|
||||||
|
### 3.4 验收标准
|
||||||
|
|
||||||
|
1. **L1验证成功率**:>= 99%
|
||||||
|
2. **L2验证准确率**:>= 98%(额度误差)
|
||||||
|
3. **L3风控拦截率**:>= 95%(高风险账户识别)
|
||||||
|
4. **验证延迟**:P95 <= 2秒
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 4. 定价与分成机制
|
||||||
|
|
||||||
|
### 4.1 统购统销定价模型
|
||||||
|
|
||||||
|
| 定价层级 | 定价方式 | 公式 |
|
||||||
|
|----------|----------|------|
|
||||||
|
| **采购价(P0)** | 平台统一定价收购 | P0 = 供应商官方价格 × 折扣系数 |
|
||||||
|
| **出售价(P1)** | 平台加价出售 | P1 = P0 × (1 + 毛利率目标) |
|
||||||
|
|
||||||
|
### 4.2 定价参数
|
||||||
|
|
||||||
|
| 参数 | 默认值 | 说明 |
|
||||||
|
|------|--------|------|
|
||||||
|
| 采购折扣系数 | 0.60 | 供应方获得官方价格的60%(与商业SSOT一致) |
|
||||||
|
| 毛利率目标 | 15-50% | 根据模型热度与供需动态调整(与商业SSOT一致) |
|
||||||
|
| 最低售价保护 | 供应商价格×80% | 防止恶性低价竞争 |
|
||||||
|
| 动态调价 | 是 | 根据供需比自动调整 |
|
||||||
|
|
||||||
|
### 4.3 供应方收益结算
|
||||||
|
|
||||||
|
| 结算周期 | 结算方式 | 到账时间 |
|
||||||
|
|----------|----------|----------|
|
||||||
|
| T+7 | 自动结算到余额 | 1-3个工作日 |
|
||||||
|
| 提现 | 银行卡/支付宝 | 3-5个工作日 |
|
||||||
|
|
||||||
|
### 4.4 验收标准
|
||||||
|
|
||||||
|
1. 定价透明,供应方和需求方均可查看历史价格
|
||||||
|
2. 价格调整需提前 24 小时通知
|
||||||
|
3. 结算误差率 <= 0.1%
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 5. 需求方产品流程
|
||||||
|
|
||||||
|
### 5.1 选购流程
|
||||||
|
|
||||||
|
1. 需求方在平台浏览可用套餐
|
||||||
|
2. 选择供应商、模型、数量
|
||||||
|
3. 查看套餐详情(额度、有效期、评价)
|
||||||
|
4. 下单购买
|
||||||
|
5. 获取平台调用凭证(仅平台签发,不返回供应方上游凭证)
|
||||||
|
|
||||||
|
### 5.2 套餐展示信息
|
||||||
|
|
||||||
|
| 信息项 | 说明 |
|
||||||
|
|--------|------|
|
||||||
|
| 供应商 | OpenAI/Anthropic/国产等 |
|
||||||
|
| 模型 | gpt-4o/claude-3等 |
|
||||||
|
| 剩余额度 | 可用配额 |
|
||||||
|
| 单价 | 每1M tokens价格 |
|
||||||
|
| 供应方评分 | 历史服务质量评分 |
|
||||||
|
| 可用性 | 近7天在线率 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 5.3 凭证边界约束(强制)
|
||||||
|
|
||||||
|
1. 需求方只使用平台签发的 API Key/Access Token 访问平台。
|
||||||
|
2. 供应方上游凭证只允许平台托管与代调用,不向需求方透出。
|
||||||
|
3. 任何报表、导出、错误信息都不得包含可复用的供应方上游凭证片段。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 6. 风险控制体系
|
||||||
|
|
||||||
|
### 6.1 套餐有效性风险
|
||||||
|
|
||||||
|
| 风险场景 | 防控措施 | 响应机制 |
|
||||||
|
|----------|----------|----------|
|
||||||
|
| 供应方额度耗尽 | 实时监控+提前告警 | 自动切换到备用套餐 |
|
||||||
|
| 供应商账户被封 | 多账户冗余 | 启动备用通道 |
|
||||||
|
| API Key失效 | 定期验证 | 通知供应方更新 |
|
||||||
|
|
||||||
|
### 6.2 滥用风险防控
|
||||||
|
|
||||||
|
| 风险类型 | 防控措施 |
|
||||||
|
|----------|----------|
|
||||||
|
| 薅羊毛 | 供应方需缴纳保证金,实名认证 |
|
||||||
|
| 套现 | 设置提现冷却期,最低提现额度 |
|
||||||
|
| 恶意共享 | 风控模型+人工审核 |
|
||||||
|
| 账号共享 | 设备指纹+IP限制 |
|
||||||
|
| 凭证泄露 | 上游凭证不外发 + 全链路脱敏 + 审计告警 |
|
||||||
|
|
||||||
|
### 6.3 保证金机制
|
||||||
|
|
||||||
|
| 供应方类型 | 保证金要求 | 退还条件 |
|
||||||
|
|------------|------------|----------|
|
||||||
|
| 个人 | ¥500 | 最后一笔交易后30天无异常 |
|
||||||
|
| 企业 | ¥5000 | 最后一笔交易后90天无异常 |
|
||||||
|
|
||||||
|
### 6.4 赔付机制
|
||||||
|
|
||||||
|
| 场景 | 赔付方式 |
|
||||||
|
|------|----------|
|
||||||
|
| 套餐失效导致需求方损失 | 平台优先用保证金赔付,不足部分平台承担 |
|
||||||
|
| 额度不足导致调用失败 | 立即切换备用套餐,差额由平台补偿 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 7. 供应侧管理后台
|
||||||
|
|
||||||
|
### 7.1 供应方控制台功能
|
||||||
|
|
||||||
|
1. **套餐管理**
|
||||||
|
- 上架/下架套餐
|
||||||
|
- 设置配额和售价
|
||||||
|
- 查看销售数据
|
||||||
|
2. **账户管理**
|
||||||
|
- 实名认证
|
||||||
|
- 保证金缴纳
|
||||||
|
- 提现操作
|
||||||
|
3. **数据分析**
|
||||||
|
- 销售额统计
|
||||||
|
- 客户评价
|
||||||
|
- 收益趋势
|
||||||
|
|
||||||
|
### 7.2 平台管理功能
|
||||||
|
|
||||||
|
1. **供应方管理**
|
||||||
|
- 资质审核
|
||||||
|
- 保证金管理
|
||||||
|
- 违规处理
|
||||||
|
2. **套餐管理**
|
||||||
|
- 验证规则配置
|
||||||
|
- 价格监控
|
||||||
|
- 下架管理
|
||||||
|
3. **风控中心**
|
||||||
|
- 风险预警
|
||||||
|
- 异常调查
|
||||||
|
- 黑名单管理
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 8. 供应侧指标体系
|
||||||
|
|
||||||
|
### 8.1 供应方侧指标
|
||||||
|
|
||||||
|
| 指标 | 定义 | 目标 |
|
||||||
|
|------|------|------|
|
||||||
|
| 供应方数量 | 活跃供应方总数 | S1: 50, S2: 200 |
|
||||||
|
| 套餐上架率 | 通过验证的套餐比例 | >= 90% |
|
||||||
|
| 供应方留存率 | 30天活跃供应方比例 | >= 60% |
|
||||||
|
| 投诉率 | 供应方投诉次数/交易量 | <= 1% |
|
||||||
|
|
||||||
|
### 8.2 套餐侧指标
|
||||||
|
|
||||||
|
| 指标 | 定义 | 目标 |
|
||||||
|
|------|------|------|
|
||||||
|
| 套餐可用率 | 在线套餐/总套餐 | >= 95% |
|
||||||
|
| 验证通过率 | 通过验证的申请比例 | >= 85% |
|
||||||
|
| 异常下架率 | 因问题下架的套餐比例 | <= 5% |
|
||||||
|
|
||||||
|
### 8.3 需求方侧指标
|
||||||
|
|
||||||
|
| 指标 | 定义 | 目标 |
|
||||||
|
|------|------|------|
|
||||||
|
| 套餐购买量 | 需求方购买次数 | S1: 1000, S2: 5000 |
|
||||||
|
| 需求方留存率 | 30天复购比例 | >= 40% |
|
||||||
|
| 套餐满意度 | 需求方评分均值 | >= 4.0/5.0 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 9. 阶段规划
|
||||||
|
|
||||||
|
### 9.1 S0(准备阶段):供应侧MVP
|
||||||
|
|
||||||
|
**时间**:2026-03-18 至 2026-04-30
|
||||||
|
|
||||||
|
**目标**:验证供应侧业务模式可行性
|
||||||
|
|
||||||
|
**交付**:
|
||||||
|
1. 供应方入驻与实名认证
|
||||||
|
2. 套餐验证引擎v1(L1+L2)
|
||||||
|
3. 手动定价(暂无动态调价)
|
||||||
|
4. 基础赔付机制
|
||||||
|
|
||||||
|
**验收标准**:
|
||||||
|
- 引入首批 10 家供应方
|
||||||
|
- 套餐验证成功率 >= 90%
|
||||||
|
|
||||||
|
### 9.2 S1(成长阶段):规模化
|
||||||
|
|
||||||
|
**时间**:2026-05-01 至 2026-08-31
|
||||||
|
|
||||||
|
**目标**:扩大供应侧规模
|
||||||
|
|
||||||
|
**交付**:
|
||||||
|
1. 套餐验证引擎v2(+L3风控)
|
||||||
|
2. 动态定价算法
|
||||||
|
3. 保证金机制
|
||||||
|
4. 供应方等级体系
|
||||||
|
|
||||||
|
**验收标准**:
|
||||||
|
- 活跃供应方 >= 200
|
||||||
|
- 套餐可用率 >= 95%
|
||||||
|
|
||||||
|
### 9.3 S2(成熟阶段):生态化
|
||||||
|
|
||||||
|
**时间**:2026-09-01 至 2027-03-31
|
||||||
|
|
||||||
|
**目标**:形成供需双边网络效应
|
||||||
|
|
||||||
|
**交付**:
|
||||||
|
1. 开放API供第三方集成
|
||||||
|
2. 供应方等级权益体系
|
||||||
|
3. 金融衍生服务(额度期货)
|
||||||
|
4. 全球化供应支持
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 10. 待决策问题
|
||||||
|
|
||||||
|
| 编号 | 决策项 | 选项 | 建议 |
|
||||||
|
|------|--------|------|------|
|
||||||
|
| SD1 | 采购折扣系数 | 50%/60%/70% | 60%(与商业模型文档统一) |
|
||||||
|
| SD2 | 毛利率目标区间 | 10-20%/15-50%/20-40% | 15-50%(与商业模型文档统一) |
|
||||||
|
| SD3 | 保证金金额 | 个人300/500/1000,企业3000/5000/10000 | 建议500/5000,留出纠错空间 |
|
||||||
|
| SD4 | 结算周期 | T+3/T+7/T+14 | T+7,平衡资金安全和供应方体验 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 11. 法律合规提醒
|
||||||
|
|
||||||
|
1. **ToS合规**:确保共享行为不违反各LLM供应商服务条款
|
||||||
|
2. **数据安全**:API Key加密存储,仅用于调用转发
|
||||||
|
3. **税务合规**:供应方收入需依法纳税,平台代扣代缴
|
||||||
|
4. **限额标识**:明确标注额度有效期限,避免过期损失
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
**章节状态**:修订稿(参数口径已与商业SSOT对齐)
|
||||||
|
**关联文档**:
|
||||||
|
- `llm_gateway_prd_v0_2026-03-16.md`
|
||||||
|
- `llm_gateway_subapi_evolution_plan_v2_2026-03-17.md`
|
||||||
193
docs/supply_technical_design_enhanced_v1_2026-03-25.md
Normal file
193
docs/supply_technical_design_enhanced_v1_2026-03-25.md
Normal file
@@ -0,0 +1,193 @@
|
|||||||
|
# 供应侧技术设计增强版(XR-001)
|
||||||
|
|
||||||
|
- 版本:v1.0
|
||||||
|
- 日期:2026-03-25
|
||||||
|
- 状态:生效(实施基线)
|
||||||
|
- 目标:补齐供应侧关键写路径的幂等、并发、事务、不变量与可靠性闭环
|
||||||
|
- 关联 SSOT:
|
||||||
|
- `llm_gateway_subapi_evolution_plan_v4_2_2026-03-24.md`
|
||||||
|
- `acceptance_gate_single_source_v1_2026-03-18.md`
|
||||||
|
- `supply_button_level_prd_v1_2026-03-25.md`
|
||||||
|
- `supply_api_contract_openapi_draft_v1_2026-03-25.yaml`
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 1. 设计边界与约束
|
||||||
|
|
||||||
|
1. 业务链路固定为:`用户A供给 -> 平台 -> 用户B购买平台服务`。
|
||||||
|
2. 供应方上游凭证仅平台托管,任何北向接口不得回显可复用凭证片段。
|
||||||
|
3. 所有关键写操作必须支持双键幂等:`request_id + idempotency_key`。
|
||||||
|
4. 所有状态迁移必须满足“显式前置状态 + 原子落库 + 审计可追溯”。
|
||||||
|
5. 所有跨系统副作用必须通过 Outbox/Saga 触发,禁止在数据库事务中直连外部系统。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 2. 关键写路径与幂等协议
|
||||||
|
|
||||||
|
## 2.1 适用操作
|
||||||
|
|
||||||
|
1. `POST /api/v1/supply/accounts`
|
||||||
|
2. `POST /api/v1/supply/packages/{id}/publish`
|
||||||
|
3. `POST /api/v1/supply/packages/batch-price`
|
||||||
|
4. `POST /api/v1/supply/settlements/withdraw`
|
||||||
|
5. `POST /api/v1/supply/settlements/{id}/cancel`
|
||||||
|
|
||||||
|
## 2.2 入站协议(MUST)
|
||||||
|
|
||||||
|
1. Header 必填:`X-Request-Id`(UUID)
|
||||||
|
2. Header 必填:`Idempotency-Key`(长度 16-128)
|
||||||
|
3. 幂等作用域:`tenant_id + operator_id + api_path + idempotency_key`
|
||||||
|
4. 幂等有效期:`24h`(提现类可扩展到 `72h`)
|
||||||
|
|
||||||
|
## 2.3 语义规范
|
||||||
|
|
||||||
|
1. 首次成功:返回业务成功码(`200/201`)并写入幂等记录。
|
||||||
|
2. 重放同参:返回同一业务结果,`idempotent_replay=true`。
|
||||||
|
3. 重放异参:返回 `409 IDEMPOTENCY_PAYLOAD_MISMATCH`。
|
||||||
|
4. 首次处理中:返回 `202 IDEMPOTENCY_IN_PROGRESS`,携带 `retry_after_ms`。
|
||||||
|
|
||||||
|
## 2.4 存储建议(PostgreSQL)
|
||||||
|
|
||||||
|
```sql
|
||||||
|
create table if not exists supply_idempotency_record (
|
||||||
|
id bigserial primary key,
|
||||||
|
tenant_id bigint not null,
|
||||||
|
operator_id bigint not null,
|
||||||
|
api_path varchar(200) not null,
|
||||||
|
idempotency_key varchar(128) not null,
|
||||||
|
request_id varchar(64) not null,
|
||||||
|
payload_hash char(64) not null,
|
||||||
|
response_code int,
|
||||||
|
response_body jsonb,
|
||||||
|
status varchar(20) not null, -- processing/succeeded/failed
|
||||||
|
expires_at timestamp not null,
|
||||||
|
created_at timestamp not null default now(),
|
||||||
|
updated_at timestamp not null default now(),
|
||||||
|
unique (tenant_id, operator_id, api_path, idempotency_key)
|
||||||
|
);
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 3. 并发控制策略(按领域动作)
|
||||||
|
|
||||||
|
## 3.1 账号挂载与状态迁移
|
||||||
|
|
||||||
|
1. 账号状态变更(激活/暂停/禁用)采用乐观锁:`version` 字段 CAS 更新。
|
||||||
|
2. 激活操作 SQL 需带前置状态:`where id=? and status in ('pending','suspended') and version=?`。
|
||||||
|
3. 同一账号同一时刻只允许一个状态迁移事务;冲突返回 `409 SUP_ACC_4091`。
|
||||||
|
|
||||||
|
## 3.2 套餐发布与批量调价
|
||||||
|
|
||||||
|
1. 套餐单条迁移采用乐观锁,保证 `draft -> active -> paused -> expired` 不跳态。
|
||||||
|
2. 批量调价采用“分片事务 + 明细回执”模式,单条失败不回滚全部成功项。
|
||||||
|
3. 批量任务必须落审计明细:`total/success/failed/failed_items[]`。
|
||||||
|
|
||||||
|
## 3.3 提现发起与撤销
|
||||||
|
|
||||||
|
1. 发起提现采用悲观锁:`select ... for update` 锁定供应方可提现余额行。
|
||||||
|
2. 约束:同一供应方同一时刻最多 1 笔 `processing` 提现单。
|
||||||
|
3. 唯一约束建议:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
create unique index if not exists uq_settlement_supplier_processing
|
||||||
|
on supply_settlement(supplier_id)
|
||||||
|
where status = 'processing';
|
||||||
|
```
|
||||||
|
|
||||||
|
4. 余额扣减与结算单创建必须同事务提交,任一失败整体回滚。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 4. 领域不变量(Invariant)
|
||||||
|
|
||||||
|
| 编号 | 不变量 | 触发动作 | 拒绝码 |
|
||||||
|
|---|---|---|---|
|
||||||
|
| INV-ACC-001 | `active` 账号不可删除 | 删除账号 | `SUP_ACC_4092` |
|
||||||
|
| INV-ACC-002 | 账号 `disabled` 仅管理员可恢复 | 激活账号 | `SUP_ACC_4031` |
|
||||||
|
| INV-PKG-001 | `sold_out` 只能系统迁移 | 人工改状态 | `SUP_PKG_4092` |
|
||||||
|
| INV-PKG-002 | `expired` 套餐不可直接恢复 | 发布上架 | `SUP_PKG_4093` |
|
||||||
|
| INV-PKG-003 | 售价不得低于保护价 | 发布/调价 | `SUP_PKG_4001` |
|
||||||
|
| INV-SET-001 | `processing/completed` 不可撤销 | 撤销申请 | `SUP_SET_4092` |
|
||||||
|
| INV-SET-002 | 提现金额不得超过可提现余额 | 发起提现 | `SUP_SET_4001` |
|
||||||
|
| INV-SET-003 | 结算单金额与余额流水必须平衡 | 结算入账 | `SUP_SET_5002` |
|
||||||
|
|
||||||
|
说明:所有不变量失败必须写入审计事件 `invariant_violation`,并携带 `rule_code`。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 5. 事务边界与副作用编排
|
||||||
|
|
||||||
|
## 5.1 本地事务内(必须原子)
|
||||||
|
|
||||||
|
1. 领域状态变更(账号/套餐/结算单)
|
||||||
|
2. 资金子账变更(冻结/解冻/可提现)
|
||||||
|
3. 幂等记录更新(`processing -> succeeded/failed`)
|
||||||
|
4. 审计日志落库(最小字段集)
|
||||||
|
5. Outbox 事件入库
|
||||||
|
|
||||||
|
## 5.2 事务外(异步执行)
|
||||||
|
|
||||||
|
1. 通知发送(站内信/邮件/短信)
|
||||||
|
2. 导出任务生成
|
||||||
|
3. 风险引擎异步评分
|
||||||
|
4. BI 聚合看板更新
|
||||||
|
|
||||||
|
## 5.3 Outbox 事件规范
|
||||||
|
|
||||||
|
1. 事件命名:`supply.{domain}.{action}.{result}`
|
||||||
|
2. 必填字段:`event_id/request_id/tenant_id/object_id/before_state/after_state`
|
||||||
|
3. 消费保障:至少一次投递 + 消费幂等(以 `event_id` 去重)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 6. 失败注入与回滚策略
|
||||||
|
|
||||||
|
| 场景ID | 注入点 | 预期行为 | 验收点 |
|
||||||
|
|---|---|---|---|
|
||||||
|
| FI-001 | 提现创建后数据库超时 | 事务回滚,不产生挂单 | 余额不变、无孤儿单 |
|
||||||
|
| FI-002 | 幂等记录已存在同键异参 | 返回 409 | 不改业务状态 |
|
||||||
|
| FI-003 | 套餐发布时状态冲突 | 返回 409 | 状态不跳变 |
|
||||||
|
| FI-004 | 审计落库失败 | 主事务失败并回滚 | 无“成功但无审计” |
|
||||||
|
| FI-005 | Outbox 入库失败 | 主事务失败并回滚 | 无“状态已变更但无事件” |
|
||||||
|
| FI-006 | 导出服务不可用 | 主事务成功,异步重试 | 业务不阻塞 |
|
||||||
|
| FI-007 | 外部 query key 请求 | 网关拒绝 | M-016=100% |
|
||||||
|
| FI-008 | 响应误回显凭证片段 | 安全门禁阻断 | M-013=0 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 7. SLO 与页面动作映射
|
||||||
|
|
||||||
|
| 页面按钮 | API | SLI | SLO | Error Budget |
|
||||||
|
|---|---|---|---|---|
|
||||||
|
| BTN-ACC-001 立即验证 | `/accounts/verify` | 可用率 + P95 | 可用率 >= 99.9%,P95 <= 800ms | 月度 0.1% |
|
||||||
|
| BTN-ACC-002 提交挂载 | `/accounts` | 成功率 | 成功率 >= 99.5% | 月度 0.5% |
|
||||||
|
| BTN-PKG-002 发布上架 | `/packages/{id}/publish` | 成功率 + 冲突率 | 成功率 >= 99.5%,冲突率 <= 0.3% | 月度 0.5% |
|
||||||
|
| BTN-PKG-005 批量调价 | `/packages/batch-price` | 局部成功可解释率 | 明细可解释率 = 100% | 0 |
|
||||||
|
| BTN-SET-002 发起提现 | `/settlements/withdraw` | 一致性 + 时延 | `billing_error_rate_pct<=0.1%`,P95<=1200ms | 与 M-004 联动 |
|
||||||
|
| BTN-SET-003 撤销申请 | `/settlements/{id}/cancel` | 成功率 | 成功率 >= 99.9% | 月度 0.1% |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 8. 审计与安全对齐
|
||||||
|
|
||||||
|
1. 所有关键写请求必须记录:`request_id/idempotency_key/operator_id/object_id/result_code`。
|
||||||
|
2. 错误体、导出、日志统一经过脱敏扫描;命中即触发 P0。
|
||||||
|
3. 与门禁指标映射:
|
||||||
|
1. M-013:凭证泄露事件数=0
|
||||||
|
2. M-014:平台凭证入站覆盖率=100%
|
||||||
|
3. M-015:需求方绕平台直连事件=0
|
||||||
|
4. M-016:外部 query key 拒绝率=100%
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 9. 实施与验收清单
|
||||||
|
|
||||||
|
1. API 网关:校验并透传 `X-Request-Id`、`Idempotency-Key`。
|
||||||
|
2. 数据库:新增幂等表、状态版本字段、提现唯一索引。
|
||||||
|
3. 服务层:统一幂等拦截器与冲突返回码。
|
||||||
|
4. 测试层:新增并发冲突、幂等重放、失败注入专项。
|
||||||
|
5. 门禁层:将 FI-001~FI-008 纳入 `SUP-*` 与 `SEC-*` Gate。
|
||||||
|
6. 证据层:执行日志、指标截图、审计抽样、签署记录齐全。
|
||||||
|
|
||||||
|
达到以上 6 项即视为 XR-001 关闭。
|
||||||
181
docs/supply_test_plan_enhanced_v1_2026-03-25.md
Normal file
181
docs/supply_test_plan_enhanced_v1_2026-03-25.md
Normal file
@@ -0,0 +1,181 @@
|
|||||||
|
# 供应侧测试方案增强版(XR-002)
|
||||||
|
|
||||||
|
- 版本:v1.0
|
||||||
|
- 日期:2026-03-25
|
||||||
|
- 状态:生效(测试执行基线)
|
||||||
|
- 目标:形成“需求-接口-测试-指标-门禁”全链路闭环,补齐并发与重放风险覆盖
|
||||||
|
- 关联文档:
|
||||||
|
- `supply_button_level_prd_v1_2026-03-25.md`
|
||||||
|
- `supply_api_contract_openapi_draft_v1_2026-03-25.yaml`
|
||||||
|
- `supply_ui_test_cases_executable_v1_2026-03-25.md`
|
||||||
|
- `supply_technical_design_enhanced_v1_2026-03-25.md`
|
||||||
|
- `acceptance_gate_single_source_v1_2026-03-18.md`
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 1. 测试目标与分层策略
|
||||||
|
|
||||||
|
## 1.1 目标
|
||||||
|
|
||||||
|
1. 保障供应侧三页链路(账号、套餐、结算)功能正确且不越权。
|
||||||
|
2. 保障凭证边界红线(M-013~M-016)持续达标。
|
||||||
|
3. 保障关键写路径在并发和重复提交下无双扣、无跳态、无脏数据。
|
||||||
|
4. 保障业主口径 SLA、申诉与赔付流程可验证、可追溯、可复盘。
|
||||||
|
|
||||||
|
## 1.2 分层覆盖
|
||||||
|
|
||||||
|
1. L1 单元测试:状态机迁移、不变量校验、幂等判定、金额计算。
|
||||||
|
2. L2 契约测试:OpenAPI 请求/响应字段、错误码、枚举与脱敏约束。
|
||||||
|
3. L3 集成测试:数据库事务一致性、唯一索引冲突、Outbox 事件。
|
||||||
|
4. L4 UI+API 端到端:按钮级流程、权限态、异常态、审计事件联查。
|
||||||
|
5. L5 安全专项:凭证泄露扫描、query key 拦截、绕平台直连探测。
|
||||||
|
6. L6 可靠性/性能专项:高并发冲突率、重试重放、降级与回滚演练。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 2. 测试追踪矩阵(Requirement -> API -> Test -> Metric -> Gate)
|
||||||
|
|
||||||
|
| 需求ID | 需求描述 | API | 测试用例 | 验收指标 | 门禁映射 |
|
||||||
|
|---|---|---|---|---|---|
|
||||||
|
| R-ACC-001 | 账号凭证验证成功可视化 | `POST /accounts/verify` | UI-SUP-ACC-001 | 验证成功率 >=99.5% | SUP-004 |
|
||||||
|
| R-ACC-002 | 挂载需风险确认与审计 | `POST /accounts` | UI-SUP-ACC-002 | 审计覆盖率=100% | SUP-004 |
|
||||||
|
| R-ACC-003 | 账号状态不跳态 | `POST /accounts/{id}/activate/suspend` | UI-SUP-ACC-003/004 + INT-ACC-STATE-001 | 冲突可解释率=100% | SUP-004 |
|
||||||
|
| R-ACC-004 | 活跃账号不可删除 | `DELETE /accounts/{id}` | UI-SUP-ACC-005 | 违规删除成功率=0 | SUP-004 |
|
||||||
|
| R-PKG-001 | 草稿保存可追踪 | `POST /packages/draft` | UI-SUP-PKG-001 | 保存成功率>=99.5% | SUP-005 |
|
||||||
|
| R-PKG-002 | 套餐发布满足保护价与状态约束 | `POST /packages/{id}/publish` | UI-SUP-PKG-002 + INT-PKG-INV-001 | 保护价违规放行率=0 | SUP-005 |
|
||||||
|
| R-PKG-003 | 批量调价部分失败可回执 | `POST /packages/batch-price` | UI-SUP-PKG-005 | 明细完备率=100% | SUP-005 |
|
||||||
|
| R-SET-001 | 提现发起防重复防双扣 | `POST /settlements/withdraw` | UI-SUP-SET-002 + CON-SET-001 | M-004/M-005 达标 | SUP-006 |
|
||||||
|
| R-SET-002 | 处理中/已完成不可撤销 | `POST /settlements/{id}/cancel` | UI-SUP-SET-003 + INT-SET-STATE-001 | 跳态成功率=0 | SUP-006 |
|
||||||
|
| R-SET-003 | 对账单导出不泄露敏感信息 | `GET /settlements/{id}/statement` | UI-SUP-SET-004 + SEC-SUP-001 | M-013=0 | SUP-006/SUP-007 |
|
||||||
|
| R-SEC-001 | 仅平台凭证入站 | 全部北向 API | SEC-SUP-002 | M-014=100% | SUP-007 |
|
||||||
|
| R-SEC-002 | 外部 query key 全拒绝 | 全部北向 API | SEC-SUP-002 | M-016=100% | SUP-007 |
|
||||||
|
| R-SEC-003 | 需求方不可绕平台直连 | 出网策略与告警 | SEC-SUP-002 + SEC-DIRECT-001 | M-015=0 | SUP-007 |
|
||||||
|
| R-UX-001 | 按钮可见性和禁用规则正确 | 三页面全部按钮 | UI-DESIGN-QA-001~020 | 按钮规则通过率=100% | SUP-003/SUP-008 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 3. 并发与幂等专项(P0)
|
||||||
|
|
||||||
|
## 3.1 CON-SET-001 提现并发双提防护
|
||||||
|
|
||||||
|
1. 前置:同一 `supplier_id` 可提现余额 1000,且无 processing 单。
|
||||||
|
2. 步骤:10 并发请求同一金额提现,使用不同 `request_id`,相同业务时窗。
|
||||||
|
3. 断言:
|
||||||
|
1. 最多 1 笔进入 `processing`。
|
||||||
|
2. 其余请求返回 `409/202`,无余额负值。
|
||||||
|
3. 账务流水借贷平衡,`billing_conflict_rate_pct<=0.01%`。
|
||||||
|
|
||||||
|
## 3.2 CON-SET-002 提现幂等重放
|
||||||
|
|
||||||
|
1. 前置:准备固定 `Idempotency-Key`。
|
||||||
|
2. 步骤:同请求体重复发送 20 次;再发送一次异构请求体。
|
||||||
|
3. 断言:
|
||||||
|
1. 同参重复返回同一 `settlement_id`,`idempotent_replay=true`。
|
||||||
|
2. 异参返回 `409 IDEMPOTENCY_PAYLOAD_MISMATCH`。
|
||||||
|
|
||||||
|
## 3.3 CON-PKG-001 套餐发布冲突
|
||||||
|
|
||||||
|
1. 步骤:两个会话同时发布同一 `draft` 套餐。
|
||||||
|
2. 断言:
|
||||||
|
1. 仅一个成功转为 `active`。
|
||||||
|
2. 另一个返回 `409 SUP_PKG_4091`。
|
||||||
|
3. 审计日志有冲突记录,且状态无跳变。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 4. 安全与合规专项(P0)
|
||||||
|
|
||||||
|
## 4.1 凭证泄露扫描
|
||||||
|
|
||||||
|
1. 扫描对象:API 响应体、错误体、导出文件、审计日志、告警消息。
|
||||||
|
2. 扫描规则:上游 key/token 模式库 + 熵检测 + 前缀检测。
|
||||||
|
3. 通过标准:`supplier_credential_exposure_events=0`。
|
||||||
|
|
||||||
|
## 4.2 鉴权边界专项
|
||||||
|
|
||||||
|
1. 平台凭证成功链路:header bearer 访问主路径成功。
|
||||||
|
2. query key 拒绝链路:`/v1/*` 与 `/v1beta/*` 全拒绝。
|
||||||
|
3. 直连阻断链路:模拟需求方绕平台访问供应方,必须失败并告警。
|
||||||
|
|
||||||
|
## 4.3 申诉与赔付流程可测性
|
||||||
|
|
||||||
|
1. 场景:提现延迟、误扣款、导出异常、账户误冻结。
|
||||||
|
2. 断言:具备工单编号、SLA 计时、处理人、结果说明、赔付记录。
|
||||||
|
3. 指标:业主承诺时限命中率 >=99%,逾期需自动升级。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 5. 性能与可靠性专项(P1)
|
||||||
|
|
||||||
|
| 用例ID | 场景 | 负载 | 目标 |
|
||||||
|
|---|---|---|---|
|
||||||
|
| PERF-ACC-001 | 账号验证峰值 | 100 RPS, 10min | P95 <= 800ms,错误率 <0.5% |
|
||||||
|
| PERF-PKG-001 | 套餐批量调价 | 2000 套餐/批 | 全量回执,超时率 <1% |
|
||||||
|
| PERF-SET-001 | 提现高峰并发 | 50 并发/供应方 | 无双扣,余额不为负 |
|
||||||
|
| REL-SET-001 | 结算服务实例重启 | 执行中重启一次 | 无状态跳变,幂等可恢复 |
|
||||||
|
| REL-SEC-001 | 网关规则热更新 | 更新 query key 拦截规则 | M-016 不下降 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 6. 测试数据治理
|
||||||
|
|
||||||
|
## 6.1 数据分层
|
||||||
|
|
||||||
|
1. 固定样本:用于回归(可重复、可比较)。
|
||||||
|
2. 脱敏样本:用于安全扫描与导出验证。
|
||||||
|
3. 回放样本:来自线上脱敏事件,验证真实边界场景。
|
||||||
|
|
||||||
|
## 6.2 数据规则
|
||||||
|
|
||||||
|
1. 测试数据必须通过脱敏策略,不得包含真实凭证。
|
||||||
|
2. 每次执行必须记录 `dataset_version`。
|
||||||
|
3. 幂等与并发用例必须复位余额和状态,防止脏数据串案。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 7. CI 与门禁编排
|
||||||
|
|
||||||
|
## 7.1 执行顺序
|
||||||
|
|
||||||
|
1. `Contract Gate`:OpenAPI 漂移检测(阻断)。
|
||||||
|
2. `Core Integration Gate`:事务与不变量校验(阻断)。
|
||||||
|
3. `UI-SUP Gate`:按钮级 E2E(阻断)。
|
||||||
|
4. `SEC-SUP Gate`:凭证边界与泄露扫描(阻断)。
|
||||||
|
5. `PERF/REL Gate`:每晚定时跑,异常进入发布前强制复核。
|
||||||
|
|
||||||
|
## 7.2 失败策略
|
||||||
|
|
||||||
|
1. P0 用例失败:立即阻断发布 + 当日复盘。
|
||||||
|
2. P1 用例失败:冻结升波,48h 内修复并补测。
|
||||||
|
3. Flaky 管理:单用例 7 日 flaky 率 >2% 必须治理,禁止“无限重试掩盖失败”。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 8. 准入与退出标准
|
||||||
|
|
||||||
|
## 8.1 准入(Entry)
|
||||||
|
|
||||||
|
1. PRD 按钮级规格冻结。
|
||||||
|
2. OpenAPI 字段冻结。
|
||||||
|
3. 技术增强稿(XR-001)已落地。
|
||||||
|
|
||||||
|
## 8.2 退出(Exit)
|
||||||
|
|
||||||
|
1. 追踪矩阵全部有执行结果与证据链接。
|
||||||
|
2. P0 用例通过率 100%,P1 用例通过率 >=98%。
|
||||||
|
3. M-013~M-016 全部达标,且无未关闭 P0 缺陷。
|
||||||
|
4. 业主验收条款(SLA/申诉/赔付)签字通过。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 9. 交付物清单
|
||||||
|
|
||||||
|
1. `tests/supply/ui_sup_acc_report_2026-03-28.md`
|
||||||
|
2. `tests/supply/ui_sup_pkg_report_2026-03-29.md`
|
||||||
|
3. `tests/supply/ui_sup_set_report_2026-03-29.md`
|
||||||
|
4. `tests/supply/sec_sup_boundary_report_2026-03-30.md`
|
||||||
|
5. `reports/supply_gate_review_2026-03-31.md`
|
||||||
|
6. `reports/supply_traceability_matrix_2026-03-25.csv`(新增)
|
||||||
|
7. `reports/supply_flaky_budget_2026-03-25.md`(新增)
|
||||||
|
|
||||||
|
完成以上 7 项即视为 XR-002 关闭。
|
||||||
342
docs/supply_ui_test_cases_executable_v1_2026-03-25.md
Normal file
342
docs/supply_ui_test_cases_executable_v1_2026-03-25.md
Normal file
@@ -0,0 +1,342 @@
|
|||||||
|
# 供应侧 UI-SUP 可执行测试清单(v1.0)
|
||||||
|
|
||||||
|
- 版本:v1.0
|
||||||
|
- 日期:2026-03-25
|
||||||
|
- 适用范围:`SUP-PAGE-001/002/003`(账号挂载、套餐发布、收益结算)
|
||||||
|
- 关联文档:
|
||||||
|
- `supply_button_level_prd_v1_2026-03-25.md`
|
||||||
|
- `supply_api_contract_openapi_draft_v1_2026-03-25.yaml`
|
||||||
|
- `router_core_s2_acceptance_test_cases_v1_2026-03-17.md`
|
||||||
|
- `acceptance_gate_single_source_v1_2026-03-18.md`
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 1. 测试环境与公共前置
|
||||||
|
|
||||||
|
## 1.1 环境
|
||||||
|
|
||||||
|
1. 环境:`staging`(含审计日志、告警、导出能力)。
|
||||||
|
2. 鉴权:仅允许平台凭证(header),禁止 query key 入站。
|
||||||
|
3. 时区:`Asia/Shanghai`。
|
||||||
|
|
||||||
|
## 1.2 测试账号
|
||||||
|
|
||||||
|
1. `supplier_owner_01`:供应方主账号(可创建/编辑)。
|
||||||
|
2. `supplier_viewer_01`:只读账号(仅查看)。
|
||||||
|
3. `platform_admin_01`:平台管理员(可处理风控态)。
|
||||||
|
|
||||||
|
## 1.3 测试数据
|
||||||
|
|
||||||
|
1. 供应账号初始状态:`pending`、`active`、`suspended` 各 1 条。
|
||||||
|
2. 套餐初始状态:`draft`、`active`、`paused`、`sold_out`、`expired` 各 1 条。
|
||||||
|
3. 结算单初始状态:`pending`、`processing`、`completed` 各 1 条。
|
||||||
|
|
||||||
|
## 1.4 通用断言
|
||||||
|
|
||||||
|
1. 每次关键按钮点击均产生审计事件。
|
||||||
|
2. 错误体不出现可复用上游凭证片段(映射 M-013)。
|
||||||
|
3. 所有请求 `request_id` 可追踪。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 2. SUP-PAGE-001 账号挂载(UI-SUP-ACC-001~006)
|
||||||
|
|
||||||
|
## UI-SUP-ACC-001 立即验证成功路径
|
||||||
|
|
||||||
|
- 前置数据:
|
||||||
|
1. `supplier_owner_01` 已登录。
|
||||||
|
2. 页面字段 `provider/account_type/credential_input` 均可编辑。
|
||||||
|
|
||||||
|
- 步骤:
|
||||||
|
1. 选择 `provider=openai`。
|
||||||
|
2. 选择 `account_type=api_key`。
|
||||||
|
3. 输入合法凭证(测试密钥)。
|
||||||
|
4. 点击 `BTN-ACC-001 立即验证`。
|
||||||
|
|
||||||
|
- 断言:
|
||||||
|
1. 发起 `POST /api/v1/supply/accounts/verify`。
|
||||||
|
2. 返回 `verify_status=pass`。
|
||||||
|
3. 页面显示“验证通过 + 可用额度摘要”。
|
||||||
|
4. 记录审计事件 `supply.account.verify`。
|
||||||
|
|
||||||
|
## UI-SUP-ACC-002 提交挂载成功路径
|
||||||
|
|
||||||
|
- 前置数据:
|
||||||
|
1. 已完成 `UI-SUP-ACC-001` 且返回通过。
|
||||||
|
2. 勾选 `risk_ack`。
|
||||||
|
|
||||||
|
- 步骤:
|
||||||
|
1. 输入 `account_alias`。
|
||||||
|
2. 点击 `BTN-ACC-002 提交挂载`。
|
||||||
|
|
||||||
|
- 断言:
|
||||||
|
1. 发起 `POST /api/v1/supply/accounts`。
|
||||||
|
2. 返回 `201`,状态为 `pending` 或 `active`。
|
||||||
|
3. 列表出现新账号记录。
|
||||||
|
4. 记录审计事件 `supply.account.create`。
|
||||||
|
|
||||||
|
## UI-SUP-ACC-003 激活账号
|
||||||
|
|
||||||
|
- 前置数据:
|
||||||
|
1. 存在状态为 `pending` 的账号 A。
|
||||||
|
2. 当前用户有编辑权限。
|
||||||
|
|
||||||
|
- 步骤:
|
||||||
|
1. 在账号 A 行点击 `BTN-ACC-003 激活账号`。
|
||||||
|
|
||||||
|
- 断言:
|
||||||
|
1. 发起 `POST /api/v1/supply/accounts/{id}/activate`。
|
||||||
|
2. 状态从 `pending` 变为 `active`。
|
||||||
|
3. 页面即时刷新该行状态。
|
||||||
|
4. 记录审计事件 `supply.account.activate`。
|
||||||
|
|
||||||
|
## UI-SUP-ACC-004 暂停账号
|
||||||
|
|
||||||
|
- 前置数据:
|
||||||
|
1. 存在状态为 `active` 的账号 B。
|
||||||
|
2. 账号 B 无未结清风险单。
|
||||||
|
|
||||||
|
- 步骤:
|
||||||
|
1. 在账号 B 行点击 `BTN-ACC-004 暂停账号`。
|
||||||
|
|
||||||
|
- 断言:
|
||||||
|
1. 发起 `POST /api/v1/supply/accounts/{id}/suspend`。
|
||||||
|
2. 状态从 `active` 变为 `suspended`。
|
||||||
|
3. 记录审计事件 `supply.account.suspend`。
|
||||||
|
|
||||||
|
## UI-SUP-ACC-005 删除账号失败保护
|
||||||
|
|
||||||
|
- 前置数据:
|
||||||
|
1. 存在状态 `active` 的账号 C。
|
||||||
|
|
||||||
|
- 步骤:
|
||||||
|
1. 尝试触发 `BTN-ACC-005 删除账号`(若不可见则通过接口模拟)。
|
||||||
|
|
||||||
|
- 断言:
|
||||||
|
1. UI 层按钮不可见或不可点。
|
||||||
|
2. 若后端请求被触发,返回 `409` 冲突。
|
||||||
|
3. 页面提示“活跃账号不可删除”。
|
||||||
|
4. 不产生删除成功审计事件。
|
||||||
|
|
||||||
|
## UI-SUP-ACC-006 查看审计日志
|
||||||
|
|
||||||
|
- 前置数据:
|
||||||
|
1. 账号 D 已有至少 3 条历史操作记录。
|
||||||
|
|
||||||
|
- 步骤:
|
||||||
|
1. 点击 `BTN-ACC-006 查看审计`。
|
||||||
|
|
||||||
|
- 断言:
|
||||||
|
1. 发起 `GET /api/v1/supply/accounts/{id}/audit-logs`。
|
||||||
|
2. 侧边栏展示审计列表(含 operator/request_id/time)。
|
||||||
|
3. 列表分页正常(page/page_size 生效)。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 3. SUP-PAGE-002 套餐发布(UI-SUP-PKG-001~006)
|
||||||
|
|
||||||
|
## UI-SUP-PKG-001 保存草稿
|
||||||
|
|
||||||
|
- 前置数据:
|
||||||
|
1. 至少存在 1 个 `active` 账号。
|
||||||
|
|
||||||
|
- 步骤:
|
||||||
|
1. 选择 `supply_account_id`、`model`。
|
||||||
|
2. 输入 `total_quota`、`price_per_1m_input`、`price_per_1m_output`、`valid_days`。
|
||||||
|
3. 点击 `BTN-PKG-001 保存草稿`。
|
||||||
|
|
||||||
|
- 断言:
|
||||||
|
1. 发起 `POST /api/v1/supply/packages/draft`。
|
||||||
|
2. 返回 `201`,状态为 `draft`。
|
||||||
|
3. 列表可查询到该草稿。
|
||||||
|
4. 记录审计事件 `supply.package.draft.save`。
|
||||||
|
|
||||||
|
## UI-SUP-PKG-002 发布上架成功
|
||||||
|
|
||||||
|
- 前置数据:
|
||||||
|
1. 存在 `draft` 套餐 E,且价格高于最低保护价。
|
||||||
|
|
||||||
|
- 步骤:
|
||||||
|
1. 点击 `BTN-PKG-002 发布上架`。
|
||||||
|
|
||||||
|
- 断言:
|
||||||
|
1. 发起 `POST /api/v1/supply/packages/{id}/publish`。
|
||||||
|
2. 状态由 `draft` 变更为 `active`。
|
||||||
|
3. 记录审计事件 `supply.package.publish`。
|
||||||
|
|
||||||
|
## UI-SUP-PKG-003 暂停售卖
|
||||||
|
|
||||||
|
- 前置数据:
|
||||||
|
1. 存在 `active` 套餐 F。
|
||||||
|
|
||||||
|
- 步骤:
|
||||||
|
1. 点击 `BTN-PKG-003 暂停售卖`。
|
||||||
|
|
||||||
|
- 断言:
|
||||||
|
1. 发起 `POST /api/v1/supply/packages/{id}/pause`。
|
||||||
|
2. 状态变更为 `paused`。
|
||||||
|
3. 记录审计事件 `supply.package.pause`。
|
||||||
|
|
||||||
|
## UI-SUP-PKG-004 下架套餐
|
||||||
|
|
||||||
|
- 前置数据:
|
||||||
|
1. 存在 `active` 或 `paused` 套餐 G。
|
||||||
|
2. 套餐 G 不存在未完成结算锁。
|
||||||
|
|
||||||
|
- 步骤:
|
||||||
|
1. 点击 `BTN-PKG-004 立即下架`。
|
||||||
|
|
||||||
|
- 断言:
|
||||||
|
1. 发起 `POST /api/v1/supply/packages/{id}/unlist`。
|
||||||
|
2. 状态变更为 `expired`(或按策略回到 `paused`)。
|
||||||
|
3. 记录审计事件 `supply.package.unlist`。
|
||||||
|
|
||||||
|
## UI-SUP-PKG-005 批量调价部分失败
|
||||||
|
|
||||||
|
- 前置数据:
|
||||||
|
1. 选择 3 个套餐:2 个可编辑,1 个不可编辑(状态冲突)。
|
||||||
|
|
||||||
|
- 步骤:
|
||||||
|
1. 触发 `BTN-PKG-005 批量调价`,提交统一调价参数。
|
||||||
|
|
||||||
|
- 断言:
|
||||||
|
1. 发起 `POST /api/v1/supply/packages/batch-price`。
|
||||||
|
2. 返回 `total=3`,`success_count=2`,`failed_count=1`。
|
||||||
|
3. 失败项含明确 `package_id` 与 `error_code`。
|
||||||
|
4. 成功项价格实际更新。
|
||||||
|
|
||||||
|
## UI-SUP-PKG-006 复制套餐
|
||||||
|
|
||||||
|
- 前置数据:
|
||||||
|
1. 存在任意套餐 H。
|
||||||
|
|
||||||
|
- 步骤:
|
||||||
|
1. 点击 `BTN-PKG-006 复制套餐`。
|
||||||
|
|
||||||
|
- 断言:
|
||||||
|
1. 发起 `POST /api/v1/supply/packages/{id}/clone`。
|
||||||
|
2. 返回 `201`,新套餐状态为 `draft`。
|
||||||
|
3. 新套餐字段默认值与原套餐一致(除状态、创建时间)。
|
||||||
|
4. 记录审计事件 `supply.package.clone`。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 4. SUP-PAGE-003 结算提现(UI-SUP-SET-001~005)
|
||||||
|
|
||||||
|
## UI-SUP-SET-001 刷新收益数据
|
||||||
|
|
||||||
|
- 前置数据:
|
||||||
|
1. 供应方账号存在账单数据。
|
||||||
|
|
||||||
|
- 步骤:
|
||||||
|
1. 点击 `BTN-SET-001 刷新收益`。
|
||||||
|
|
||||||
|
- 断言:
|
||||||
|
1. 发起 `GET /api/v1/supplier/billing`。
|
||||||
|
2. 汇总卡片与趋势图刷新。
|
||||||
|
3. 刷新失败时显示可重试提示,不清空旧数据。
|
||||||
|
|
||||||
|
## UI-SUP-SET-002 发起提现成功
|
||||||
|
|
||||||
|
- 前置数据:
|
||||||
|
1. `available_amount > 0`。
|
||||||
|
2. 当前无 `processing` 结算单。
|
||||||
|
|
||||||
|
- 步骤:
|
||||||
|
1. 输入提现金额、收款方式、收款账户、验证码。
|
||||||
|
2. 点击 `BTN-SET-002 发起提现`。
|
||||||
|
|
||||||
|
- 断言:
|
||||||
|
1. 发起 `POST /api/v1/supply/settlements/withdraw`。
|
||||||
|
2. 返回 `201`,结算状态为 `pending`。
|
||||||
|
3. 提现金额从可提现余额冻结。
|
||||||
|
4. 记录审计事件 `supply.settlement.withdraw.create`。
|
||||||
|
|
||||||
|
## UI-SUP-SET-003 撤销提现申请
|
||||||
|
|
||||||
|
- 前置数据:
|
||||||
|
1. 存在 `pending` 状态结算单 I。
|
||||||
|
|
||||||
|
- 步骤:
|
||||||
|
1. 点击 `BTN-SET-003 撤销申请`。
|
||||||
|
|
||||||
|
- 断言:
|
||||||
|
1. 发起 `POST /api/v1/supply/settlements/{id}/cancel`。
|
||||||
|
2. 状态变更为 `failed/cancelled`。
|
||||||
|
3. 冻结金额回退。
|
||||||
|
4. 记录审计事件 `supply.settlement.withdraw.cancel`。
|
||||||
|
|
||||||
|
## UI-SUP-SET-004 下载对账单
|
||||||
|
|
||||||
|
- 前置数据:
|
||||||
|
1. 存在任意结算单 J。
|
||||||
|
|
||||||
|
- 步骤:
|
||||||
|
1. 点击 `BTN-SET-004 下载对账单`。
|
||||||
|
|
||||||
|
- 断言:
|
||||||
|
1. 发起 `GET /api/v1/supply/settlements/{id}/statement`。
|
||||||
|
2. 返回可下载链接与过期时间。
|
||||||
|
3. 下载文件成功,文件命名符合规范。
|
||||||
|
4. 记录审计事件 `supply.settlement.statement.export`。
|
||||||
|
|
||||||
|
## UI-SUP-SET-005 查看收益流水
|
||||||
|
|
||||||
|
- 前置数据:
|
||||||
|
1. 已生成多条收益流水记录。
|
||||||
|
|
||||||
|
- 步骤:
|
||||||
|
1. 点击 `BTN-SET-005 查看流水明细`。
|
||||||
|
2. 分别使用时间区间和分页参数查询。
|
||||||
|
|
||||||
|
- 断言:
|
||||||
|
1. 发起 `GET /api/v1/supply/earnings/records`。
|
||||||
|
2. 明细返回 `earnings_type/status/amount/earned_at` 字段完整。
|
||||||
|
3. 分页逻辑正确,无重复/漏项。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 5. 安全专项(SEC-SUP-001~002)
|
||||||
|
|
||||||
|
## SEC-SUP-001 错误体与导出脱敏检查
|
||||||
|
|
||||||
|
- 前置数据:
|
||||||
|
1. 人为构造账号验证失败、发布失败、提现失败场景。
|
||||||
|
2. 准备账单导出与对账单导出样本。
|
||||||
|
|
||||||
|
- 步骤:
|
||||||
|
1. 触发上述失败场景并抓取错误响应。
|
||||||
|
2. 执行对账单下载。
|
||||||
|
3. 对错误体、导出文件运行脱敏扫描。
|
||||||
|
|
||||||
|
- 断言:
|
||||||
|
1. 任一结果中均不出现可复用上游凭证片段。
|
||||||
|
2. 命中敏感片段则标记 P0,对应 M-013 失败。
|
||||||
|
|
||||||
|
## SEC-SUP-002 凭证边界回归(对齐 CB-001~CB-004)
|
||||||
|
|
||||||
|
- 前置数据:
|
||||||
|
1. 平台鉴权与拦截策略已开启。
|
||||||
|
2. 出网审计与告警已开启。
|
||||||
|
|
||||||
|
- 步骤:
|
||||||
|
1. 使用平台凭证访问主路径,确认可通过。
|
||||||
|
2. 构造外部 query key 请求(含 `/v1beta/*`)。
|
||||||
|
3. 构造需求方绕过平台直连上游尝试。
|
||||||
|
|
||||||
|
- 断言:
|
||||||
|
1. `platform_credential_ingress_coverage_pct=100%`(M-014)。
|
||||||
|
2. `query_key_external_reject_rate_pct=100%`(M-016)。
|
||||||
|
3. `direct_supplier_call_by_consumer_events=0`(M-015)。
|
||||||
|
4. `supplier_credential_exposure_events=0`(M-013)。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 6. 执行产物要求
|
||||||
|
|
||||||
|
每条用例至少产出:
|
||||||
|
|
||||||
|
1. 原始请求/响应日志(含 request_id)。
|
||||||
|
2. 页面录屏或关键截图。
|
||||||
|
3. 审计事件截图或导出。
|
||||||
|
4. 用例结论(PASS/FAIL/BLOCKED)与责任人签字。
|
||||||
185
docs/supply_uiux_design_spec_v1_2026-03-25.md
Normal file
185
docs/supply_uiux_design_spec_v1_2026-03-25.md
Normal file
@@ -0,0 +1,185 @@
|
|||||||
|
# 供应侧 UI/UX 设计规范(XR-003)
|
||||||
|
|
||||||
|
- 版本:v1.0
|
||||||
|
- 日期:2026-03-25
|
||||||
|
- 状态:生效(设计与验收基线)
|
||||||
|
- 目标:建立供应侧统一交互规范,补齐空态/异常态/权限态与可访问性基线
|
||||||
|
- 关联文档:
|
||||||
|
- `supply_button_level_prd_v1_2026-03-25.md`
|
||||||
|
- `supply_ui_test_cases_executable_v1_2026-03-25.md`
|
||||||
|
- `supply_api_contract_openapi_draft_v1_2026-03-25.yaml`
|
||||||
|
- `acceptance_gate_single_source_v1_2026-03-18.md`
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 1. 信息架构(IA)
|
||||||
|
|
||||||
|
## 1.1 导航结构
|
||||||
|
|
||||||
|
1. 一级导航:`供应账号`、`套餐管理`、`收益结算`。
|
||||||
|
2. 二级视图:
|
||||||
|
1. 供应账号:账号列表、挂载弹窗、审计侧栏。
|
||||||
|
2. 套餐管理:列表、编辑页、批量调价弹窗。
|
||||||
|
3. 收益结算:收益卡片、提现弹窗、流水抽屉、对账单下载。
|
||||||
|
3. 全局辅助入口:帮助中心、操作日志、权限说明。
|
||||||
|
|
||||||
|
## 1.2 页面布局基线
|
||||||
|
|
||||||
|
1. 顶部固定:页面标题 + 关键 KPI + 最近更新时间。
|
||||||
|
2. 主体区域:筛选区、列表区、详情区(抽屉或侧栏)。
|
||||||
|
3. 底部区域:批量操作栏(仅在多选时显示)。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 2. 交互规则(按钮级)
|
||||||
|
|
||||||
|
## 2.1 按钮等级与视觉规则
|
||||||
|
|
||||||
|
| 等级 | 用途 | 样式规则 | 示例 |
|
||||||
|
|---|---|---|---|
|
||||||
|
| Primary | 页面主动作(每屏最多 1 个) | 实色高对比 | 发布上架、发起提现 |
|
||||||
|
| Secondary | 常规辅助动作 | 描边/弱强调 | 保存草稿、刷新收益 |
|
||||||
|
| Tertiary | 次级动作 | 文本按钮 | 查看审计、查看流水 |
|
||||||
|
| Danger | 高风险动作 | 红色警示 + 二次确认 | 删除账号、立即下架 |
|
||||||
|
|
||||||
|
## 2.2 禁用态与提示规则
|
||||||
|
|
||||||
|
1. 禁用按钮必须可见禁用原因(tooltip 或内联提示),禁止“只灰不解释”。
|
||||||
|
2. 权限不足与状态不满足必须区分文案:
|
||||||
|
1. 权限不足:`你没有执行该操作的权限`。
|
||||||
|
2. 状态不满足:`当前状态不允许执行此操作`。
|
||||||
|
3. 按钮进入 loading 时,文案切换为进行时且禁止重复点击。
|
||||||
|
|
||||||
|
## 2.3 危险操作确认规则
|
||||||
|
|
||||||
|
1. `删除账号`、`立即下架`、`撤销提现`必须二次确认。
|
||||||
|
2. 二次确认弹窗必须包含:
|
||||||
|
1. 操作对象名称与 ID。
|
||||||
|
2. 影响范围说明。
|
||||||
|
3. 不可逆后果说明。
|
||||||
|
3. 高风险动作需二次输入验证码或对象别名(可配置)。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 3. 状态设计(闭环)
|
||||||
|
|
||||||
|
## 3.1 空态
|
||||||
|
|
||||||
|
1. 首次空态:展示引导文案 + 主按钮(如“立即挂载账号”)。
|
||||||
|
2. 过滤空态:展示“无匹配结果” + 清空筛选操作。
|
||||||
|
3. 异常空态:展示“加载失败” + 重试按钮 + 错误编号。
|
||||||
|
|
||||||
|
## 3.2 异常态
|
||||||
|
|
||||||
|
1. 表单错误:字段级错误 + 全局错误摘要。
|
||||||
|
2. 接口失败:保留用户输入,不自动清空。
|
||||||
|
3. 冲突错误(409):提示“状态已变化,请刷新后重试”。
|
||||||
|
4. 系统错误(5xx):提示可重试并记录 `request_id`。
|
||||||
|
|
||||||
|
## 3.3 权限态
|
||||||
|
|
||||||
|
1. `supplier_owner`:可执行所有业务按钮。
|
||||||
|
2. `supplier_viewer`:仅查看与导出,不可写操作。
|
||||||
|
3. `platform_admin`:可执行风控恢复、禁用解除。
|
||||||
|
4. UI 必须按权限前置控制,后端再做最终鉴权。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 4. 可访问性(A11y)基线
|
||||||
|
|
||||||
|
## 4.1 键盘与焦点
|
||||||
|
|
||||||
|
1. 所有可交互控件支持 Tab/Shift+Tab 顺序访问。
|
||||||
|
2. 焦点可见:focus ring 对比度 >= 3:1。
|
||||||
|
3. 弹窗打开后焦点锁定在弹窗内,关闭后返回触发元素。
|
||||||
|
|
||||||
|
## 4.2 可读性
|
||||||
|
|
||||||
|
1. 正文最小字号 14px,行高 >= 1.5。
|
||||||
|
2. 文本与背景对比度:
|
||||||
|
1. 常规文本 >= 4.5:1
|
||||||
|
2. 大字号文本 >= 3:1
|
||||||
|
3. 错误提示不能仅靠颜色表达,需配合图标或文字标签。
|
||||||
|
|
||||||
|
## 4.3 可理解性
|
||||||
|
|
||||||
|
1. 表单字段必须有 label 与帮助文案。
|
||||||
|
2. 错误消息必须指向可操作修复方式。
|
||||||
|
3. 必填项统一使用 `*` 且配套说明“*为必填”。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 5. 页面级规范补充
|
||||||
|
|
||||||
|
## 5.1 供应账号页(SUP-PAGE-001)
|
||||||
|
|
||||||
|
1. 凭证输入框默认掩码,粘贴后仅展示前后各 2 位。
|
||||||
|
2. 验证通过后展示摘要,不展示原始凭证。
|
||||||
|
3. 状态标签颜色统一:
|
||||||
|
1. `pending`:中性黄
|
||||||
|
2. `active`:绿色
|
||||||
|
3. `suspended`:橙色
|
||||||
|
4. `disabled`:红色
|
||||||
|
|
||||||
|
## 5.2 套餐管理页(SUP-PAGE-002)
|
||||||
|
|
||||||
|
1. 批量调价入口仅在多选>=2时出现。
|
||||||
|
2. 保护价未达标时禁用发布按钮并给出最低可填值。
|
||||||
|
3. 复制套餐后自动定位到新草稿并高亮 3 秒。
|
||||||
|
|
||||||
|
## 5.3 收益结算页(SUP-PAGE-003)
|
||||||
|
|
||||||
|
1. 发起提现前展示“可提现余额、预计到账时间、手续费”。
|
||||||
|
2. 处理中结算单必须固定在列表顶部并带进度状态。
|
||||||
|
3. 对账单下载失败时提供重试与工单入口。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 6. 反馈与文案规范
|
||||||
|
|
||||||
|
1. 成功反馈:简短结果 + 下一步建议(如“可前往套餐页继续上架”)。
|
||||||
|
2. 失败反馈:失败原因 + 修复建议 + request_id。
|
||||||
|
3. 文案禁用词:避免“系统异常”这类无信息词,必须给可执行建议。
|
||||||
|
4. 安全文案:不得出现可复用凭证片段、真实账户号全量信息。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 7. Design QA Checklist(验收清单)
|
||||||
|
|
||||||
|
| 编号 | 检查项 | 通过标准 | 严重级别 |
|
||||||
|
|---|---|---|---|
|
||||||
|
| DQA-001 | 按钮层级正确 | Primary/Secondary/Danger 使用符合规范 | P1 |
|
||||||
|
| DQA-002 | 禁用态可解释 | 所有禁用按钮有明确原因 | P1 |
|
||||||
|
| DQA-003 | 危险操作二次确认 | 所有高风险动作均有确认弹窗 | P0 |
|
||||||
|
| DQA-004 | 权限态一致 | viewer 不可触发写操作 | P0 |
|
||||||
|
| DQA-005 | 状态机可视一致 | 页面状态与后端状态一致,无跳态文案 | P0 |
|
||||||
|
| DQA-006 | 错误信息可操作 | 错误提示包含修复建议 | P1 |
|
||||||
|
| DQA-007 | 键盘可达性 | 全页面可纯键盘操作完成关键流程 | P1 |
|
||||||
|
| DQA-008 | 焦点可见性 | focus ring 清晰可见 | P1 |
|
||||||
|
| DQA-009 | 对比度达标 | 文本对比度满足 WCAG AA | P1 |
|
||||||
|
| DQA-010 | 表单可读性 | label/help/error 三要素完整 | P1 |
|
||||||
|
| DQA-011 | 空态完整 | 首次空态/过滤空态/异常空态均定义 | P2 |
|
||||||
|
| DQA-012 | 异常态完整 | 4xx/5xx/409 均有标准展示 | P1 |
|
||||||
|
| DQA-013 | 数据保留策略 | 提交失败后表单值保留 | P1 |
|
||||||
|
| DQA-014 | 审计可追溯提示 | 关键动作提示可追踪 request_id | P0 |
|
||||||
|
| DQA-015 | 敏感信息脱敏 | 界面与导出无可复用凭证片段 | P0 |
|
||||||
|
| DQA-016 | 组件复用一致 | 相同控件样式/交互一致 | P2 |
|
||||||
|
| DQA-017 | 响应式适配 | 1280/1024/768 宽度均可用 | P1 |
|
||||||
|
| DQA-018 | 文案一致性 | 同义动作命名一致,不混用 | P2 |
|
||||||
|
| DQA-019 | 加载反馈清晰 | loading/skeleton/spinner 使用统一 | P2 |
|
||||||
|
| DQA-020 | 业主验收可解释性 | SLA/申诉入口可见且流程清晰 | P1 |
|
||||||
|
|
||||||
|
判定规则:
|
||||||
|
1. 任一 P0 不通过则 UI Gate 不通过。
|
||||||
|
2. P1 通过率需 >=95%,低于阈值禁止发布。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 8. 与测试和门禁对齐
|
||||||
|
|
||||||
|
1. DQA-001~020 对应测试集:`UI-DESIGN-QA-*`(纳入 SUP-003/SUP-008)。
|
||||||
|
2. 凭证脱敏相关 DQA-015 与 `SEC-SUP-001` 强绑定。
|
||||||
|
3. 权限态相关 DQA-004 与 `UI-SUP-ACC-005`、`UI-SUP-SET-*` 强绑定。
|
||||||
|
4. 业主可解释性 DQA-020 与申诉/赔付验收条款强绑定。
|
||||||
|
|
||||||
|
完成 DQA 全量执行并达标,即视为 XR-003 关闭。
|
||||||
1202
docs/technical_architecture_design_v1_2026-03-18.md
Normal file
1202
docs/technical_architecture_design_v1_2026-03-18.md
Normal file
File diff suppressed because it is too large
Load Diff
122
docs/technical_architecture_optimized_v2_2026-03-18.md
Normal file
122
docs/technical_architecture_optimized_v2_2026-03-18.md
Normal file
@@ -0,0 +1,122 @@
|
|||||||
|
# 优化技术架构设计(最小可运营栈 + 触发式扩容)
|
||||||
|
|
||||||
|
- 版本:v2.0
|
||||||
|
- 日期:2026-03-18
|
||||||
|
- 目标:降低 S0/S1 运维复杂度,同时保证 S2 替换目标可达。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 1. 架构优化原则
|
||||||
|
|
||||||
|
1. 先跑通核心链路,再引入复杂中间件。
|
||||||
|
2. 每增加一个基础设施组件,必须有可量化触发条件。
|
||||||
|
3. 控制面集中、数据面可灰度、回滚可自动化。
|
||||||
|
4. 与主栈强一致:Go + PostgreSQL + Redis。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 2. 分阶段目标架构
|
||||||
|
|
||||||
|
### 2.0 极简模式(S0/S1优先,推荐默认)
|
||||||
|
|
||||||
|
| 层 | 组件 | 说明 |
|
||||||
|
|---|---|---|
|
||||||
|
| 北向入口 | 云负载均衡(L4/L7) | 仅做 TLS 终止与基础转发 |
|
||||||
|
| 业务层 | Gateway Core(Go 单体) | 内含 Auth/RateLimit/Router/Billing/subapi connector |
|
||||||
|
| 数据层 | PostgreSQL 15 | 交易、计费、审计主存储 |
|
||||||
|
| 缓存层 | Redis 7 | 分布式限流、并发门控、短期状态 |
|
||||||
|
| 观测层 | Prometheus + 云日志(stdout) | 指标+日志最小闭环,不默认引入 Loki |
|
||||||
|
| 外部集成 | subapi(内网) | mTLS 双向认证,作为外部模块接入 |
|
||||||
|
|
||||||
|
默认不引入(极简模式):
|
||||||
|
1. API 网关产品层(Kong/Traefik)作为必选组件。
|
||||||
|
2. Loki/ELK 独立日志平台。
|
||||||
|
3. Kafka、Istio、多集群服务治理。
|
||||||
|
|
||||||
|
极简模式退出条件(任一触发):
|
||||||
|
1. 需要在入口层做复杂插件治理(统一 WAF/插件策略/多协议细粒度控制)。
|
||||||
|
2. 网关核心服务发布耦合导致两次以上周更失败。
|
||||||
|
3. 观测检索 SLA 无法满足(日志检索 < 1 分钟)且云日志能力不足。
|
||||||
|
|
||||||
|
### 2.1 S0/S1 最小可运营栈(必须)
|
||||||
|
|
||||||
|
| 层 | 组件 | 说明 |
|
||||||
|
|---|---|---|
|
||||||
|
| 北向入口 | 单一入口层(极简默认:云LB + Go;增强模式:Kong) | 统一鉴权、限流、协议归一 |
|
||||||
|
| 业务层 | Go 服务(模块化单体优先) | Router/Auth/Billing/Adapter 在同一部署单元 |
|
||||||
|
| 数据层 | PostgreSQL 15 | 交易、计费、审计主存储 |
|
||||||
|
| 缓存层 | Redis 7 | 限流、并发门控、短期状态 |
|
||||||
|
| 观测层 | Prometheus + Grafana(日志默认走云日志) | 指标/日志最小闭环 |
|
||||||
|
| 外部集成 | subapi(内网) | 通过 connector 接入,mTLS 双向认证 |
|
||||||
|
|
||||||
|
不引入项(S0/S1 禁止默认引入):
|
||||||
|
1. 服务网格(Istio)。
|
||||||
|
2. Kafka 作为默认依赖。
|
||||||
|
3. ELK 全套日志平台。
|
||||||
|
4. Loki(仅在日志检索SLA不满足时引入)。
|
||||||
|
|
||||||
|
### 2.2 S2 目标架构(有条件增强)
|
||||||
|
|
||||||
|
1. Router Core 成为主路径执行引擎。
|
||||||
|
2. subapi 保留长尾协议与回退通道。
|
||||||
|
3. 按需引入异步事件总线,先从 PG outbox 开始,再评估 Kafka。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 3. 触发式扩容条件(唯一准入)
|
||||||
|
|
||||||
|
| 组件 | 引入前替代方案 | 触发条件(任一满足) | 引入后验收 |
|
||||||
|
|---|---|---|---|
|
||||||
|
| Kafka | PostgreSQL Outbox + Worker | 异步事件持续 > 5k msg/s 且 backlog > 15min;或跨域消费者>=3类 | 消费延迟 P95 < 5s,消息丢失=0 |
|
||||||
|
| Istio | 网关 + 应用内 mTLS/熔断 | 服务数量 > 8 且跨服务调用路径 > 20;或多集群流量治理需求明确 | 变更可观测、故障域隔离验证通过 |
|
||||||
|
| ELK | Loki + 对象存储归档 | 日志检索需求超出 Loki 能力,且安全审计检索 SLA < 1min | 检索 SLA 达标,成本可控 |
|
||||||
|
| 服务拆分 | 模块化单体 | 单服务 CPU>70% 持续7天;发布耦合导致周更失败>=2次 | 拆分后发布失败率下降 >=50% |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 4. 部署拓扑(简化且可靠)
|
||||||
|
|
||||||
|
```text
|
||||||
|
Internet
|
||||||
|
-> Cloud LB (or Kong in enhanced mode)
|
||||||
|
-> Gateway Core (Go)
|
||||||
|
-> Router Core / subapi connector
|
||||||
|
-> Providers
|
||||||
|
-> PostgreSQL
|
||||||
|
-> Redis
|
||||||
|
-> Observability (Prometheus/Grafana + Cloud Logs)
|
||||||
|
```
|
||||||
|
|
||||||
|
可靠性要求:
|
||||||
|
1. 回滚:10分钟触发、30分钟恢复。
|
||||||
|
2. 灰度:5% -> 20% -> 50% -> 100%。
|
||||||
|
3. 故障域:按租户分批升波,避免全量冲击。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 5. 运维简化设计
|
||||||
|
|
||||||
|
1. 单一控制面:路由、开关、灰度比例统一发布入口。
|
||||||
|
2. 单一门禁:发布必须通过唯一验收门禁表。
|
||||||
|
3. 标准 Runbook:告警 -> 判断 -> 操作 -> 验证。
|
||||||
|
4. 证据包制度:每次升波必须产出日志+指标+结论。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 6. 与现有文档关系
|
||||||
|
|
||||||
|
1. 本文档替代 `technical_architecture_design_v1_2026-03-18.md` 中“默认重组件组合”的实现建议。
|
||||||
|
2. 本文档与以下文档共同构成实施基线:
|
||||||
|
- `llm_gateway_subapi_evolution_plan_v4_2_2026-03-24.md`
|
||||||
|
- `acceptance_gate_single_source_v1_2026-03-18.md`
|
||||||
|
- `test_plan_go_aligned_v1_2026-03-18.md`
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 7. 首周落地动作
|
||||||
|
|
||||||
|
1. 冻结 S0/S1 最小栈,不引入 Istio/Kafka/ELK。
|
||||||
|
2. 默认采用极简模式(Cloud LB + Go Core),Kong/Loki按退出条件评审后引入。
|
||||||
|
3. 发布扩容触发条件评审模板(无触发条件不得引入组件)。
|
||||||
|
4. 将运维看板与门禁阈值绑定到唯一验收门禁表。
|
||||||
|
5. 完成一次“升级 + 灰度 + 自动回滚”全链路演练。
|
||||||
1364
docs/test_plan_design_v1_2026-03-18.md
Normal file
1364
docs/test_plan_design_v1_2026-03-18.md
Normal file
File diff suppressed because it is too large
Load Diff
194
docs/test_plan_go_aligned_v1_2026-03-18.md
Normal file
194
docs/test_plan_go_aligned_v1_2026-03-18.md
Normal file
@@ -0,0 +1,194 @@
|
|||||||
|
# Go 主测试链路对齐方案
|
||||||
|
|
||||||
|
- 版本:v1.0
|
||||||
|
- 日期:2026-03-18
|
||||||
|
- 目标:将测试体系与主技术栈(Go + PostgreSQL)对齐,替代 Python 工程骨架为主的测试设计。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 1. 适用范围
|
||||||
|
|
||||||
|
1. 适用于网关主链路:路由、鉴权、计费、适配器、风控、审计。
|
||||||
|
2. 覆盖阶段:S0-S2(优先保障 S1/S2 的上线与替换门禁)。
|
||||||
|
3. 本文档作为测试实施主方案,历史 Python 示例仅保留参考。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 2. 测试金字塔(Go版)
|
||||||
|
|
||||||
|
| 层级 | 占比 | 工具 | 目标 |
|
||||||
|
|---|---:|---|---|
|
||||||
|
| 单元测试 | 70% | `go test` + `testing` + `testify` | 逻辑正确性、异常分支、边界条件 |
|
||||||
|
| 集成测试 | 20% | `go test` + `testcontainers-go` + `httptest` | DB/Redis/网关链路联通与一致性 |
|
||||||
|
| E2E/门禁测试 | 10% | `playwright` + `k6` + 契约回归脚本 | 用户旅程、性能门禁、兼容门禁 |
|
||||||
|
|
||||||
|
覆盖率目标:
|
||||||
|
1. 核心包总覆盖率 >= 80%。
|
||||||
|
2. Router/Billing/Adapter 覆盖率 >= 85%。
|
||||||
|
3. 关键门禁用例(S2 Gate)通过率 = 100%。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 3. 测试目录与命名规范
|
||||||
|
|
||||||
|
```text
|
||||||
|
立交桥/
|
||||||
|
gateway/
|
||||||
|
internal/
|
||||||
|
pkg/
|
||||||
|
tests/
|
||||||
|
unit/
|
||||||
|
router/
|
||||||
|
billing/
|
||||||
|
auth/
|
||||||
|
integration/
|
||||||
|
api/
|
||||||
|
db/
|
||||||
|
adapter/
|
||||||
|
contract/
|
||||||
|
compat/
|
||||||
|
e2e/
|
||||||
|
user_journey/
|
||||||
|
performance/
|
||||||
|
k6/
|
||||||
|
```
|
||||||
|
|
||||||
|
命名规则:
|
||||||
|
1. 单元测试文件:`*_test.go`。
|
||||||
|
2. 集成测试标签:`//go:build integration`。
|
||||||
|
3. 门禁测试标签:`//go:build gate`。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 4. 工具链基线(Go)
|
||||||
|
|
||||||
|
| 能力 | 工具 | 说明 |
|
||||||
|
|---|---|---|
|
||||||
|
| 单元测试 | `testing`, `testify/require` | 断言与失败信息可读性 |
|
||||||
|
| Mock | `gomock` 或 `testify/mock` | 仅在外部依赖边界处使用 |
|
||||||
|
| HTTP测试 | `httptest` | Handler/中间件测试 |
|
||||||
|
| DB集成 | `testcontainers-go` + PostgreSQL 15 | 与生产数据库方言一致 |
|
||||||
|
| Redis集成 | `testcontainers-go` + Redis 7 | 限流/并发门控验证 |
|
||||||
|
| 覆盖率 | `go test -coverprofile` | CI 门禁 |
|
||||||
|
| 性能 | `k6` | P95/P99 与错误率门禁 |
|
||||||
|
| 前端E2E | `playwright` | 注册、Key、调用、账单旅程 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 5. 关键测试套件(必须落地)
|
||||||
|
|
||||||
|
### 5.1 Router Core 套件
|
||||||
|
|
||||||
|
1. 主路径端点归一:`/responses` -> `/v1/responses`。
|
||||||
|
2. 路由决策正确性:模型映射、租户策略、fallback。
|
||||||
|
3. 接管率标记:`router_engine` 写入一致性。
|
||||||
|
|
||||||
|
### 5.2 Billing 套件
|
||||||
|
|
||||||
|
1. 幂等扣费:重复 `request_id` 不重复扣费。
|
||||||
|
2. 冲突检测:`billing_conflict_rate_pct` 监测与告警。
|
||||||
|
3. 对账一致性:usage 与 billing 差异 <= 0.1%。
|
||||||
|
|
||||||
|
### 5.3 兼容契约套件
|
||||||
|
|
||||||
|
1. Schema Gate:请求/响应字段与类型。
|
||||||
|
2. Behavior Gate:stream/no-replay/错误码语义。
|
||||||
|
3. Performance Gate:P95/P99/5xx/账务指标。
|
||||||
|
|
||||||
|
### 5.4 安全套件
|
||||||
|
|
||||||
|
1. query key 外拒内转边界。
|
||||||
|
2. subapi 内网隔离与 mTLS。
|
||||||
|
3. RLS/租户越权访问防护。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 6. Go 测试示例(最小可执行)
|
||||||
|
|
||||||
|
```go
|
||||||
|
package billing_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestCharge_IdempotentByRequestID(t *testing.T) {
|
||||||
|
svc := newTestBillingService(t)
|
||||||
|
|
||||||
|
reqID := "req-123"
|
||||||
|
err1 := svc.Charge("u1", reqID, 100)
|
||||||
|
err2 := svc.Charge("u1", reqID, 100)
|
||||||
|
|
||||||
|
require.NoError(t, err1)
|
||||||
|
require.NoError(t, err2)
|
||||||
|
|
||||||
|
cnt, err := svc.CountTransactionsByRequestID(reqID)
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.Equal(t, 1, cnt)
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 7. CI 门禁流水线(Go版)
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
name: go-test-pipeline
|
||||||
|
on:
|
||||||
|
pull_request:
|
||||||
|
branches: [main]
|
||||||
|
push:
|
||||||
|
branches: [main]
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
unit:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
- uses: actions/setup-go@v5
|
||||||
|
with:
|
||||||
|
go-version: '1.21.x'
|
||||||
|
- run: go test ./... -coverprofile=coverage.out
|
||||||
|
- run: go tool cover -func=coverage.out
|
||||||
|
|
||||||
|
integration:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
- uses: actions/setup-go@v5
|
||||||
|
with:
|
||||||
|
go-version: '1.21.x'
|
||||||
|
- run: go test -tags=integration ./tests/integration/...
|
||||||
|
|
||||||
|
gate:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
- uses: actions/setup-go@v5
|
||||||
|
with:
|
||||||
|
go-version: '1.21.x'
|
||||||
|
- run: go test -tags=gate ./tests/contract/...
|
||||||
|
- run: ./scripts/gate/perf_gate_check.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 8. 历史 Python 测试迁移映射
|
||||||
|
|
||||||
|
| 历史做法 | 对齐后做法 |
|
||||||
|
|---|---|
|
||||||
|
| `pytest` 单测 | `go test` + `testify` |
|
||||||
|
| Python `AsyncClient` API 测试 | Go `httptest` + 集成容器 |
|
||||||
|
| sqlite 内存库 | PostgreSQL 容器(与生产一致) |
|
||||||
|
| Python 契约脚本 | Go 契约测试 + CI gate 标签 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 9. 实施排期(首周)
|
||||||
|
|
||||||
|
1. D1-D2:迁移 Router/Billing 单测到 Go 主链路。
|
||||||
|
2. D3:补齐 integration(PostgreSQL/Redis)。
|
||||||
|
3. D4:接入 gate 标签与性能门禁脚本。
|
||||||
|
4. D5:提交覆盖率与门禁报告。
|
||||||
|
|
||||||
401
docs/tos_compliance_engine_design_v1_2026-03-18.md
Normal file
401
docs/tos_compliance_engine_design_v1_2026-03-18.md
Normal file
@@ -0,0 +1,401 @@
|
|||||||
|
# ToS 合规引擎设计章节
|
||||||
|
|
||||||
|
> 本章节为 `llm_gateway_subapi_evolution_plan_v2_2026-03-17.md` 中 S4 阶段的补充设计,专门针对供应商服务条款(Terms of Service, ToS)合规风险进行系统化设计。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 1. 合规背景与目标
|
||||||
|
|
||||||
|
### 1.1 合规必要性
|
||||||
|
|
||||||
|
LLM 供应商对其 API 服务有严格的使用条款限制,违规使用可能导致:
|
||||||
|
|
||||||
|
| 风险类型 | 后果 | 严重程度 |
|
||||||
|
|----------|------|----------|
|
||||||
|
| 账户封禁 | 供应方账号被封,平台服务中断 | 严重 |
|
||||||
|
| API 限流 | 关键供应商降低可用配额 | 高 |
|
||||||
|
| 法律风险 | 违反 ToS 可能涉及法律纠纷 | 高 |
|
||||||
|
| 财务损失 | 供应商罚款或追偿 | 中 |
|
||||||
|
| 声誉损害 | 合规问题被公开影响品牌 | 中 |
|
||||||
|
|
||||||
|
### 1.2 合规目标
|
||||||
|
|
||||||
|
1. **ToS 规则覆盖率**:已接入供应商 = 100%
|
||||||
|
2. **高风险策略误放行率**:接近 0(以红线规则为硬约束)
|
||||||
|
3. **合规审计覆盖率**:关键管理操作 = 100%
|
||||||
|
4. **合规事件响应时间**:<= 1 小时
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 2. ToS 合规引擎架构
|
||||||
|
|
||||||
|
### 2.1 整体架构
|
||||||
|
|
||||||
|
```
|
||||||
|
┌─────────────────────────────────────────────────────────────────────────────┐
|
||||||
|
│ ToS 合规引擎 │
|
||||||
|
├─────────────────────────────────────────────────────────────────────────────┤
|
||||||
|
│ │
|
||||||
|
│ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │
|
||||||
|
│ │ 请求入口 │───▶│ 规则匹配 │───▶│ 策略执行 │───▶│ 审计记录 │ │
|
||||||
|
│ │ (Ingress) │ │ (Matcher) │ │ (Executor) │ │ (Audit) │ │
|
||||||
|
│ └─────────────┘ └─────────────┘ └─────────────┘ └─────────────┘ │
|
||||||
|
│ │ │ │ │ │
|
||||||
|
│ ▼ ▼ ▼ ▼ │
|
||||||
|
│ ┌─────────────────────────────────────────────────────────────────────┐ │
|
||||||
|
│ │ 规则配置中心 │ │
|
||||||
|
│ │ ┌───────────┐ ┌───────────┐ ┌───────────┐ ┌───────────┐ │ │
|
||||||
|
│ │ │ 供应商 │ │ 使用场景 │ │ 地区 │ │ 模型 │ │ │
|
||||||
|
│ │ │ 规则库 │ │ 规则库 │ │ 规则库 │ │ 规则库 │ │ │
|
||||||
|
│ │ └───────────┘ └───────────┘ └───────────┘ └───────────┘ │ │
|
||||||
|
│ └─────────────────────────────────────────────────────────────────────┘ │
|
||||||
|
│ │
|
||||||
|
└─────────────────────────────────────────────────────────────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2.2 核心组件
|
||||||
|
|
||||||
|
| 组件 | 职责 | 性能要求 |
|
||||||
|
|------|------|----------|
|
||||||
|
| **规则匹配器** | 根据请求上下文匹配适用的 ToS 规则 | P95 <= 5ms |
|
||||||
|
| **策略执行器** | 执行拦截/告警/放行动作 | P95 <= 2ms |
|
||||||
|
| **审计记录器** | 记录所有合规决策和操作 | 异步,不阻塞主流程 |
|
||||||
|
| **规则配置中心** | 管理 ToS 规则生命周期 | 支持热更新 |
|
||||||
|
|
||||||
|
### 2.3 执行位置
|
||||||
|
|
||||||
|
| 阶段 | 位置 | 动作 |
|
||||||
|
|------|------|------|
|
||||||
|
| **前置拦截** | API Gateway 入口 | 硬性规则拦截(不调用上游) |
|
||||||
|
| **请求转发** | Provider Adapter 层 | 软性规则告警 |
|
||||||
|
| **响应处理** | Billing Engine | 异常检测与追溯 |
|
||||||
|
| **后置审计** | 异步任务 | 合规审计与报告 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 3. 合规规则体系
|
||||||
|
|
||||||
|
### 3.1 规则分类
|
||||||
|
|
||||||
|
```
|
||||||
|
ToS 规则体系
|
||||||
|
│
|
||||||
|
├── 🔴 红线规则(Red Line)- 严格拦截
|
||||||
|
│ ├── 账号共享禁令
|
||||||
|
│ ├── 转售禁令
|
||||||
|
│ ├── 商业用途限制
|
||||||
|
│ └── 地区访问限制
|
||||||
|
│
|
||||||
|
├── 🟡 黄线规则(Yellow Line)- 告警+人工复核
|
||||||
|
│ ├── 使用量异常
|
||||||
|
│ ├── 调用模式异常
|
||||||
|
│ ├── 新型使用场景
|
||||||
|
│ └── 未明确允许的用途
|
||||||
|
│
|
||||||
|
└── 🟢 绿线规则(Green Line)- 通过
|
||||||
|
├── 合规使用场景
|
||||||
|
├── 标准 API 调用
|
||||||
|
└── 已在白名单的场景
|
||||||
|
```
|
||||||
|
|
||||||
|
### 3.2 红线规则(严格拦截)
|
||||||
|
|
||||||
|
以下规则为硬性约束,任何匹配立即拦截:
|
||||||
|
|
||||||
|
| 规则ID | 规则名称 | 描述 | 供应商 |
|
||||||
|
|--------|----------|------|--------|
|
||||||
|
| R001 | 账号共享禁令 | 禁止将同一账号分享给多个用户 | 全部 |
|
||||||
|
| R002 | 转售禁令 | 禁止将 API 服务转售给第三方 | 全部 |
|
||||||
|
| R003 | 服务条款禁止地区 | 禁止从制裁/限制地区访问 | 全部 |
|
||||||
|
| R004 | 账户类型限制 | 禁止使用个人账户进行商业用途 | OpenAI |
|
||||||
|
| R005 | 并发限制 | 超过账户允许的并发数 | 全部 |
|
||||||
|
| R006 | 代理/转发禁令 | 禁止作为代理或转发服务 | Anthropic |
|
||||||
|
| R007 | 竞争对手禁止 | 禁止用于竞品服务 | 部分供应商 |
|
||||||
|
|
||||||
|
### 3.3 黄线规则(告警+人工复核)
|
||||||
|
|
||||||
|
以下规则触发告警,由合规团队人工复核:
|
||||||
|
|
||||||
|
| 规则ID | 规则名称 | 描述 | 默认动作 |
|
||||||
|
|--------|----------|------|----------|
|
||||||
|
| Y001 | 使用量突增 | 日环比增长 > 200% | 告警+复核 |
|
||||||
|
| Y002 | 调用模式异常 | 偏离正常使用模式 | 告警+复核 |
|
||||||
|
| Y003 | 新型使用场景 | 首次出现的使用场景 | 告警+复核 |
|
||||||
|
| Y004 | 大额账户 | 单账户配额 > $10,000 | 告警+复核 |
|
||||||
|
| Y005 | 跨地区访问 | IP 地理位置变更异常 | 告警+复核 |
|
||||||
|
|
||||||
|
### 3.4 规则配置示例
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
# ToS 规则配置示例
|
||||||
|
rules:
|
||||||
|
- id: "R001"
|
||||||
|
name: "账号共享禁令"
|
||||||
|
severity: "red"
|
||||||
|
matchers:
|
||||||
|
- type: "tenant_share"
|
||||||
|
operator: "gt"
|
||||||
|
threshold: 1
|
||||||
|
action: "block"
|
||||||
|
message: "检测到账号共享行为,违反供应商 ToS"
|
||||||
|
|
||||||
|
- id: "Y001"
|
||||||
|
name: "使用量突增"
|
||||||
|
severity: "yellow"
|
||||||
|
matchers:
|
||||||
|
- type: "daily_usage_delta"
|
||||||
|
operator: "gt"
|
||||||
|
threshold: 2.0 # 200%
|
||||||
|
action: "alert"
|
||||||
|
message: "检测到使用量异常增长,请复核"
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 4. 供应商 ToS 清单
|
||||||
|
|
||||||
|
### 4.1 主要供应商 ToS 要点
|
||||||
|
|
||||||
|
| 供应商 | 关键限制 | 红线规则 | 合规策略 |
|
||||||
|
|--------|----------|----------|----------|
|
||||||
|
| **OpenAI** | 禁止转售、账户共享、商业用途需企业账户 | R001,R002,R004 | 严格拦截 + 账户分类 |
|
||||||
|
| **Anthropic** | 禁止代理转发、地区限制 | R001,R006 | 严格拦截 + 地区检测 |
|
||||||
|
| **Google Gemini** | 禁止竞争对手使用、需同意服务条款 | R001,R007 | 严格拦截 + 场景识别 |
|
||||||
|
| **Azure OpenAI** | 需企业协议、有限制使用条款 | R001,R004 | 企业账户验证 |
|
||||||
|
| **国内供应商** | 各家不同,需逐一审查 | 视具体条款 | 个性化配置 |
|
||||||
|
|
||||||
|
### 4.2 供应商合规矩阵
|
||||||
|
|
||||||
|
```
|
||||||
|
供应商 │ 账号共享 │ 转售 │ 代理 │ 地区限制 │ 商业用途
|
||||||
|
────────────────┼──────────┼──────┼──────┼─────────┼─────────
|
||||||
|
OpenAI │ 🔴 │ 🔴 │ 🟡 │ 🔴 │ 🟡
|
||||||
|
Anthropic │ 🔴 │ 🔴 │ 🔴 │ 🔴 │ 🟢
|
||||||
|
Gemini │ 🔴 │ 🔴 │ 🟡 │ 🔴 │ 🟡
|
||||||
|
Azure OpenAI │ 🔴 │ 🟢 │ 🟢 │ 🟢 │ 🟢
|
||||||
|
国内-通义 │ 🟡 │ 🟡 │ 🟡 │ 🔴 │ 🟢
|
||||||
|
国内-文心 │ 🟡 │ 🟡 │ 🟡 │ 🔴 │ 🟢
|
||||||
|
国内-智谱 │ 🟡 │ 🟡 │ 🟡 │ 🔴 │ 🟢
|
||||||
|
|
||||||
|
🔴 = 严格禁止 🟡 = 需审核 🟢 = 允许
|
||||||
|
```
|
||||||
|
|
||||||
|
### 4.3 供应商规则更新机制
|
||||||
|
|
||||||
|
1. **定期扫描**:每周扫描供应商 ToS 更新
|
||||||
|
2. **版本追踪**:维护各供应商 ToS 版本历史
|
||||||
|
3. **变更影响评估**:ToS 变更后 24 小时内完成影响评估
|
||||||
|
4. **规则同步**:影响评估后 48 小时内完成规则更新
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 5. 合规策略执行模式
|
||||||
|
|
||||||
|
### 5.1 两种执行模式
|
||||||
|
|
||||||
|
| 模式 | 描述 | 适用场景 | 优缺点 |
|
||||||
|
|------|------|----------|--------|
|
||||||
|
| **严格拦截** | 红线规则立即拦截,不调用上游 | S4 初期、高风险场景 | 安全但可能误伤 |
|
||||||
|
| **告警+人工复核** | 触发规则仅告警,人工确认后拦截 | 成熟期、低风险场景 | 灵活但响应慢 |
|
||||||
|
|
||||||
|
### 5.2 推荐策略(评审建议)
|
||||||
|
|
||||||
|
根据评审意见,建议默认采用**"告警+人工复核"**模式:
|
||||||
|
|
||||||
|
| 阶段 | 执行模式 | 说明 |
|
||||||
|
|------|----------|------|
|
||||||
|
| S1 | 告警+人工复核 | 积累经验,完善规则 |
|
||||||
|
| S2 | 告警+人工复核 | 持续优化 |
|
||||||
|
| S3 | 逐步切换 | 黄线告警+复核,红线拦截 |
|
||||||
|
| S4 | 分类执行 | 红线拦截,黄线复核,绿线放行 |
|
||||||
|
|
||||||
|
### 5.3 模式切换触发条件
|
||||||
|
|
||||||
|
```
|
||||||
|
告警+人工复核 ──▶ 严格拦截
|
||||||
|
|
||||||
|
触发条件(满足任一):
|
||||||
|
├── 30天内发生2次及以上合规事件
|
||||||
|
├── 供应商发出书面警告
|
||||||
|
├── 监管机构发布新规
|
||||||
|
└── 风险评分 > 80分
|
||||||
|
|
||||||
|
严格拦截 ──▶ 告警+人工复核
|
||||||
|
|
||||||
|
触发条件:
|
||||||
|
├── 连续60天无合规事件
|
||||||
|
├── 规则准确率 > 99%
|
||||||
|
└── 人工复核通过率 > 95%
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 6. 违规处理机制
|
||||||
|
|
||||||
|
### 6.1 违规分级
|
||||||
|
|
||||||
|
| 级别 | 定义 | 示例 | 处理方式 |
|
||||||
|
|------|------|------|----------|
|
||||||
|
| **P0-严重** | 违反红线,可能导致供应商封号 | 账号共享被检测 | 立即停止服务 + 通知 + 整改 |
|
||||||
|
| **P1-高** | 疑似违规,需立即复核 | 使用量突增 | 暂停服务 + 24小时内复核 |
|
||||||
|
| **P2-中** | 异常行为,需关注 | 调用模式偏离 | 告警 + 持续观察 |
|
||||||
|
| **P3-低** | 轻微异常 | 边界值触发 | 记录 + 不阻断 |
|
||||||
|
|
||||||
|
### 6.2 违规处理流程
|
||||||
|
|
||||||
|
```
|
||||||
|
检测到违规
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
┌─────────────┐
|
||||||
|
│ 违规定级 │ 自动/人工
|
||||||
|
└──────┬──────┘
|
||||||
|
│
|
||||||
|
┌────┴────┐
|
||||||
|
▼ ▼ ▼ ▼
|
||||||
|
┌────┐ ┌────┐ ┌────┐ ┌────┐
|
||||||
|
│ P0 │ │ P1 │ │ P2 │ │ P3 │
|
||||||
|
└────┘ └────┘ └────┘ └────┘
|
||||||
|
│ │ │ │
|
||||||
|
▼ ▼ ▼ ▼
|
||||||
|
立即停 24h 持续 记录
|
||||||
|
止服务 复核 观察 观察
|
||||||
|
│ │ │ │
|
||||||
|
▼ ▼ ▼ ▼
|
||||||
|
通知 整改/ 解除/ 解除/
|
||||||
|
供应方 恢复 升级 保持
|
||||||
|
```
|
||||||
|
|
||||||
|
### 6.3 违规响应时效
|
||||||
|
|
||||||
|
| 违规级别 | 响应时间 | 解决时间 | 升级时间 |
|
||||||
|
|----------|----------|----------|----------|
|
||||||
|
| P0 | 5 分钟 | 1 小时 | 30 分钟 |
|
||||||
|
| P1 | 30 分钟 | 24 小时 | 4 小时 |
|
||||||
|
| P2 | 2 小时 | 7 天 | 48 小时 |
|
||||||
|
| P3 | 24 小时 | 30 天 | 7 天 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 7. 审计与报告
|
||||||
|
|
||||||
|
### 7.1 审计日志要求
|
||||||
|
|
||||||
|
| 审计项 | 内容 | 保留时间 |
|
||||||
|
|--------|------|----------|
|
||||||
|
| **规则命中日志** | 请求ID、规则ID、匹配结果、执行动作 | 1年 |
|
||||||
|
| **人工复核记录** | 复核人、复核结果、处理意见 | 3年 |
|
||||||
|
| **规则变更记录** | 变更人、变更内容、变更原因 | 3年 |
|
||||||
|
| **供应商通知** | 通知内容、响应动作 | 5年 |
|
||||||
|
| **违规事件** | 事件详情、处理过程、最终结果 | 5年 |
|
||||||
|
|
||||||
|
### 7.2 定期报告
|
||||||
|
|
||||||
|
| 报告类型 | 频率 | 内容 |
|
||||||
|
|----------|------|------|
|
||||||
|
| **合规周报** | 每周 | 规则触发统计、违规事件、处理情况 |
|
||||||
|
| **合规月报** | 每月 | 趋势分析、风险评估、规则优化建议 |
|
||||||
|
| **合规季报** | 每季度 | 完整合规评估、供应商 ToS 更新、体系改进 |
|
||||||
|
| **年度合规报告** | 每年 | 全面合规审计、重大事件回顾、体系成熟度评估 |
|
||||||
|
|
||||||
|
### 7.3 合规指标
|
||||||
|
|
||||||
|
| 指标 | 目标值 | 严重阈值 |
|
||||||
|
|------|--------|----------|
|
||||||
|
| 规则覆盖率 | 100% | < 95% |
|
||||||
|
| 红线拦截准确率 | >= 99.9% | < 99% |
|
||||||
|
| 误报率 | <= 5% | > 10% |
|
||||||
|
| 平均响应时间 | <= 30min | > 2h |
|
||||||
|
| 审计完整率 | 100% | < 99% |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 8. 供应商账号导入合规
|
||||||
|
|
||||||
|
### 8.1 BYOA(Bring Your Own Account)合规检查
|
||||||
|
|
||||||
|
当客户导入自己的 LLM 账号时:
|
||||||
|
|
||||||
|
| 检查项 | 检查内容 | 处理方式 |
|
||||||
|
|--------|----------|----------|
|
||||||
|
| 账号类型 | 企业账户 vs 个人账户 | 个人账户商业用途需警告 |
|
||||||
|
| 使用权限 | 是否允许 API 访问 | 无权限拒绝 |
|
||||||
|
| 地区合规 | 账号注册地区是否合规 | 限制地区拒绝 |
|
||||||
|
| 账户状态 | 是否正常、是否有欠款 | 异常拒绝 |
|
||||||
|
| ToS 确认 | 客户确认遵守供应商 ToS | 需签署协议 |
|
||||||
|
|
||||||
|
### 8.2 凭证存储合规
|
||||||
|
|
||||||
|
| 要求 | 说明 |
|
||||||
|
|------|------|
|
||||||
|
| **加密存储** | API Key 必须加密存储(AES-256) |
|
||||||
|
| **访问控制** | 仅策略引擎可解密,禁止明文展示 |
|
||||||
|
| **日志脱敏** | 日志中禁止出现完整 API Key |
|
||||||
|
| **轮换机制** | 支持密钥轮换,轮换后旧密钥自动失效 |
|
||||||
|
| **审计日志** | 所有访问记录审计日志 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 9. 技术实现要点
|
||||||
|
|
||||||
|
### 9.1 性能优化
|
||||||
|
|
||||||
|
- **规则预编译**:规则在启动时编译为高效数据结构
|
||||||
|
- **缓存策略**:热点规则结果缓存 60 秒
|
||||||
|
- **异步执行**:合规检查异步化,不阻塞主请求
|
||||||
|
- **批量处理**:后置批量审计,减少实时开销
|
||||||
|
|
||||||
|
### 9.2 可观测性
|
||||||
|
|
||||||
|
| 指标 | 监控 |
|
||||||
|
|------|------|
|
||||||
|
| 规则匹配延迟 | P95 <= 5ms |
|
||||||
|
| 规则触发率 | 按规则维度统计 |
|
||||||
|
| 拦截成功率 | 拦截后上游调用数为 0 |
|
||||||
|
| 误报率 | 人工复核放行比例 |
|
||||||
|
|
||||||
|
### 9.3 高可用设计
|
||||||
|
|
||||||
|
- **规则服务无状态**:可横向扩展
|
||||||
|
- **配置中心高可用**:多副本 + 自动切换
|
||||||
|
- **降级策略**:合规服务不可用时默认放行 + 告警
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 10. 实施计划
|
||||||
|
|
||||||
|
### 10.1 S3 阶段:合规基础能力
|
||||||
|
|
||||||
|
| 时间 | 任务 | 交付 |
|
||||||
|
|------|------|------|
|
||||||
|
| W1-W2 | 合规引擎核心开发 | 规则匹配 + 执行 |
|
||||||
|
| W3 | 首批供应商规则配置 | OpenAI/Anthropic 规则 |
|
||||||
|
| W4 | 审计日志功能 | 合规审计能力 |
|
||||||
|
|
||||||
|
### 10.2 S4 阶段:合规体系完善
|
||||||
|
|
||||||
|
| 时间 | 任务 | 交付 |
|
||||||
|
|------|------|------|
|
||||||
|
| W1-W2 | 全供应商规则覆盖 | 全部已接入供应商 |
|
||||||
|
| W3-W4 | 人工复核工作流 | 告警 + 复核 + 处理 |
|
||||||
|
| W5-W6 | 定期报告功能 | 周报/月报/季报 |
|
||||||
|
| W7-W8 | 体系优化 | 规则准确率优化 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 11. 责任矩阵
|
||||||
|
|
||||||
|
| 任务 | 负责人 | 协作方 |
|
||||||
|
|------|--------|--------|
|
||||||
|
| 规则开发 | 平台工程 | 安全团队 |
|
||||||
|
| 规则配置 | 合规团队 | 产品 |
|
||||||
|
| 人工复核 | 合规团队 | 客服 |
|
||||||
|
| 供应商沟通 | 法务 | 商务 |
|
||||||
|
| 审计报告 | 合规团队 | 运营 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
**章节状态**:初稿
|
||||||
|
**关联文档**:
|
||||||
|
- `llm_gateway_subapi_evolution_plan_v2_2026-03-17.md`
|
||||||
|
- `llm_gateway_product_technical_blueprint_v1_2026-03-16.md`
|
||||||
219
docs/tos_legal_communication_plan_v1_2026-03-18.md
Normal file
219
docs/tos_legal_communication_plan_v1_2026-03-18.md
Normal file
@@ -0,0 +1,219 @@
|
|||||||
|
# ToS 合规法务前置沟通方案
|
||||||
|
|
||||||
|
> 版本:v1.0
|
||||||
|
> 日期:2026-03-18
|
||||||
|
> 目的:为与法务团队的沟通提供准备材料,确保合规策略符合法律要求
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 1. 沟通背景
|
||||||
|
|
||||||
|
### 1.1 为什么需要法务前置
|
||||||
|
|
||||||
|
| 风险项 | 影响 | 严重性 |
|
||||||
|
|--------|------|--------|
|
||||||
|
| ToS违规 | 供应商封号、追责 | 🔴 高 |
|
||||||
|
| 法律风险 | 诉讼、罚款 | 🔴 高 |
|
||||||
|
| 业务中断 | 服务不可用 | 🟡 中 |
|
||||||
|
|
||||||
|
**评审意见**:S4阶段的低成本账号模块存在法律风险,需法务前置
|
||||||
|
|
||||||
|
### 1.2 当前规划中的合规要素
|
||||||
|
|
||||||
|
- ToS 合规引擎(红/黄/绿规则)
|
||||||
|
- 执行模式:告警+人工复核(默认)
|
||||||
|
- 供应商合规矩阵
|
||||||
|
- 审计报表
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 2. 沟通议题清单
|
||||||
|
|
||||||
|
### 议题1:红线规则定义
|
||||||
|
|
||||||
|
**问题**:哪些行为是绝对禁止的?
|
||||||
|
|
||||||
|
| 规则类型 | 行为 | 建议状态 | 法务确认 |
|
||||||
|
|----------|------|----------|----------|
|
||||||
|
| 账号共享 | 多人共用一个账号 | 🔴 禁止 | ⬜ |
|
||||||
|
| 转售 | 加价转售配额 | 🔴 禁止 | ⬜ |
|
||||||
|
| 代理 | 未经授权的代理服务 | 🟡 需确认 | ⬜ |
|
||||||
|
| 地区限制 | 限制地区访问 | 🔴 禁止 | ⬜ |
|
||||||
|
|
||||||
|
**需要法务确认**:
|
||||||
|
1. "账号共享"的定义边界是什么?
|
||||||
|
2. "转售"的定义是否包括平台加价销售?
|
||||||
|
3. 代理服务的合规边界?
|
||||||
|
|
||||||
|
### 议题2:低成本账号模块
|
||||||
|
|
||||||
|
**问题**:S4阶段的"低成本账号"模块是否合规?
|
||||||
|
|
||||||
|
| 账号来源 | 合规性 | 风险等级 |
|
||||||
|
|----------|--------|----------|
|
||||||
|
| 官方直购 | ✅ 合规 | 🟢 低 |
|
||||||
|
| 授权分销商 | ⚠️ 需确认 | 🟡 中 |
|
||||||
|
| 第三方平台 | ❌ 存疑 | 🔴 高 |
|
||||||
|
| 用户共享 | ⚠️ 需确认 | 🟡 中 |
|
||||||
|
|
||||||
|
**需要法务确认**:
|
||||||
|
1. 授权分销商的定义和授权链如何验证?
|
||||||
|
2. 用户共享模式是否违反ToS?
|
||||||
|
3. 平台作为"中间商"是否涉及法律风险?
|
||||||
|
|
||||||
|
### 议题3:执行模式
|
||||||
|
|
||||||
|
**问题**:告警+人工复核模式是否足够?
|
||||||
|
|
||||||
|
| 模式 | 优点 | 缺点 | 建议 |
|
||||||
|
|------|------|------|------|
|
||||||
|
| 告警+人工复核 | 灵活、减少误伤 | 人工成本高 | ✅ 推荐 |
|
||||||
|
| 自动拦截 | 效率高 | 误伤可能 | 补充 |
|
||||||
|
| 事后审计 | 不影响体验 | 事后补救 | 辅助 |
|
||||||
|
|
||||||
|
**需要法务确认**:
|
||||||
|
1. 人工复核的法律效力?
|
||||||
|
2. 审计日志的法律证据效力?
|
||||||
|
3. 跨境数据传输的合规要求?
|
||||||
|
|
||||||
|
### 议题4:供应商ToS差异
|
||||||
|
|
||||||
|
**问题**:不同供应商的ToS要求不同,如何处理?
|
||||||
|
|
||||||
|
| 供应商 | 账号共享 | 转售 | 代理 | 地区限制 |
|
||||||
|
|--------|----------|------|------|----------|
|
||||||
|
| OpenAI | 🔴 | 🔴 | 🟡 | 🔴 |
|
||||||
|
| Anthropic | 🔴 | 🔴 | 🔴 | 🔴 |
|
||||||
|
| Azure OpenAI | 🟢 | 🟢 | 🟢 | 🟢 |
|
||||||
|
| 国内供应商 | 🟡 | 🟡 | 🟡 | 🔴 |
|
||||||
|
|
||||||
|
**需要法务确认**:
|
||||||
|
1. 是否需要对不同供应商采用不同策略?
|
||||||
|
2. 混合供应商模式的法律风险?
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 3. 法务沟通要点
|
||||||
|
|
||||||
|
### 3.1 核心诉求
|
||||||
|
|
||||||
|
1. **明确红线**:哪些行为绝对禁止?
|
||||||
|
2. **合规边界**:哪些行为可以做?如何做?
|
||||||
|
3. **证据链**:如何保留合规证据?
|
||||||
|
4. **应急预案**:违规后如何应对?
|
||||||
|
|
||||||
|
### 3.2 需准备的材料
|
||||||
|
|
||||||
|
| 材料 | 用途 | 准备方 |
|
||||||
|
|------|------|--------|
|
||||||
|
| 供应商ToS摘要 | 了解各供应商要求 | 产品 |
|
||||||
|
| 平台合规策略 | 展示我们的方案 | 产品 |
|
||||||
|
| 风险评估报告 | 说明风险和缓解 | 技术 |
|
||||||
|
| 审计方案 | 证据链设计 | 技术 |
|
||||||
|
|
||||||
|
### 3.3 预期产出
|
||||||
|
|
||||||
|
| 产出 | 说明 |
|
||||||
|
|------|------|
|
||||||
|
| 红线规则清单 | 法务确认的禁止行为 |
|
||||||
|
| 合规执行手册 | 操作指南 |
|
||||||
|
| 风险告知书 | 对用户的风险告知 |
|
||||||
|
| 法务意见书 | 正式法律意见 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 4. 合规执行框架
|
||||||
|
|
||||||
|
### 4.1 三线防御
|
||||||
|
|
||||||
|
```
|
||||||
|
┌─────────────────────────────────────────┐
|
||||||
|
│ 第一线:技术防御 │
|
||||||
|
│ - API Key 验证 │
|
||||||
|
│ - ToS 规则引擎 │
|
||||||
|
│ - 请求前置拦截 │
|
||||||
|
└─────────────────────────────────────────┘
|
||||||
|
│
|
||||||
|
┌─────────────────────────────────────────┐
|
||||||
|
│ 第二线:运营防御 │
|
||||||
|
│ - 人工复核机制 │
|
||||||
|
│ - 定期审计 │
|
||||||
|
│ - 异常告警 │
|
||||||
|
└─────────────────────────────────────────┘
|
||||||
|
│
|
||||||
|
┌─────────────────────────────────────────┐
|
||||||
|
│ 第三线:法务防御 │
|
||||||
|
│ - 用户协议 │
|
||||||
|
│ - 服务条款 │
|
||||||
|
│ - 免责声明 │
|
||||||
|
└─────────────────────────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
### 4.2 责任矩阵
|
||||||
|
|
||||||
|
| 角色 | 职责 |
|
||||||
|
|------|------|
|
||||||
|
| 产品 | 规则设计、体验优化 |
|
||||||
|
| 技术 | 引擎实现、审计日志 |
|
||||||
|
| 运营 | 人工复核、异常处理 |
|
||||||
|
| 法务 | 规则确认、风险告知 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 5. 风险应对
|
||||||
|
|
||||||
|
### 5.1 供应商追责应对
|
||||||
|
|
||||||
|
| 场景 | 应对措施 |
|
||||||
|
|------|----------|
|
||||||
|
| 收到供应商警告 | 立即排查,24小时内响应 |
|
||||||
|
| 账号被封 | 启动应急预案,切换到备用账号 |
|
||||||
|
| 法律函 | 法务介入,评估和解方案 |
|
||||||
|
| 诉讼 | 法律团队介入,保留证据 |
|
||||||
|
|
||||||
|
### 5.2 用户追责应对
|
||||||
|
|
||||||
|
| 场景 | 应对措施 |
|
||||||
|
|------|----------|
|
||||||
|
| 用户违规 | 依据用户协议处理 |
|
||||||
|
| 用户损失 | 依据服务条款免责 |
|
||||||
|
| 集体投诉 | 法务预案,启动保险 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 6. 下一步行动
|
||||||
|
|
||||||
|
### 6.1 沟通计划
|
||||||
|
|
||||||
|
| 时间 | 议题 | 参与方 |
|
||||||
|
|------|------|--------|
|
||||||
|
| 第1周 | 红线规则确认 | 产品+法务 |
|
||||||
|
| 第2周 | 合规执行模式 | 运营+法务 |
|
||||||
|
| 第3周 | 供应商ToS分析 | 技术+法务 |
|
||||||
|
| 第4周 | 最终确认 | 全体 |
|
||||||
|
|
||||||
|
### 6.2 待法务确认事项
|
||||||
|
|
||||||
|
- [ ] 账号共享定义边界
|
||||||
|
- [ ] 转售行为合法性
|
||||||
|
- [ ] 代理服务合规性
|
||||||
|
- [ ] 跨境数据传输要求
|
||||||
|
- [ ] 用户协议条款
|
||||||
|
- [ ] 免责声明效力
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 7. 紧急联络
|
||||||
|
|
||||||
|
| 场景 | 联系人 | 响应时间 |
|
||||||
|
|------|--------|----------|
|
||||||
|
| 供应商警告 | 法务负责人 | 2小时 |
|
||||||
|
| 账号异常 | 技术负责人 | 1小时 |
|
||||||
|
| 法律函 | 法务负责人 | 24小时 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
**文档状态**:法务沟通准备材料
|
||||||
|
**关联文档**:
|
||||||
|
- `llm_gateway_subapi_evolution_plan_v4_1_2026-03-18.md`
|
||||||
|
- `tos_compliance_engine_design_v1_2026-03-18.md`
|
||||||
89
docs/v4_1_baseline_convergence_checklist_2026-03-18.md
Normal file
89
docs/v4_1_baseline_convergence_checklist_2026-03-18.md
Normal file
@@ -0,0 +1,89 @@
|
|||||||
|
# 规划设计 v4.1 基线收敛清单(执行版)
|
||||||
|
|
||||||
|
- 版本:v4.1-checklist
|
||||||
|
- 日期:2026-03-18
|
||||||
|
- 目的:在实施前消除口径冲突、依赖倒挂、技术栈错位与验收歧义,形成单一可执行基线(SSOT)。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 1. 收敛原则(必须满足)
|
||||||
|
|
||||||
|
1. 单一事实源:阶段日期、目标值、验收口径只能在一份主基线文档定义,其他文档只引用。
|
||||||
|
2. 先定义再执行:任何 Go/No-Go 指标必须先统一 SQL 口径、端点集合与平台分类来源。
|
||||||
|
3. 依赖可执行:任务依赖必须满足“前置任务截止 <= 后置任务截止”。
|
||||||
|
4. 栈一致性:架构、测试、SQL 示例与主技术栈(Go + PostgreSQL)一致。
|
||||||
|
5. 验收可追溯:每个里程碑都有证据产物路径、责任人、阻断规则。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 2. P0 收敛项(必须在开工前完成)
|
||||||
|
|
||||||
|
| ID | 收敛项 | 当前问题 | 目标状态(v4.1) | 责任角色 | 完成定义 |
|
||||||
|
|---|---|---|---|---|---|
|
||||||
|
| BL-001 | 版本与命名统一 | 文件名 v3,正文出现 v4.0/v3.0 混用 | 主文档命名、头部版本、尾部版本一致(统一 v4.1) | PMO + ARCH | 任意文档内仅出现一个有效版本号 |
|
||||||
|
| BL-002 | 阶段时间线统一 | S0 在不同文档出现 4周/12周/15周;S1/S2相互重叠 | 发布“阶段时间线裁决表”,所有文档仅引用该表 | PMO | 阶段开始/结束日期在所有文档一致 |
|
||||||
|
| BL-003 | S2 目标值统一 | 同时存在 60%、50-60%、30-40%、40%终态 | 锁定 S2 验收目标:全供应商 >=60%,国内=100%(弹性仅做过程预警,不进入最终验收) | ARCH + 产品 | 所有验收章节口径一致 |
|
||||||
|
| BL-004 | 主路径端点集合统一 | `/responses` 与 `/v1/responses` 表述混用 | 固化 canonical 主路径端点集合并在 SQL/看板/执行文档复用 | ARCH + FIN | `main_path_endpoint_set` 在相关文档一致 |
|
||||||
|
| BL-005 | 国内平台分类来源统一 | `cn_platforms` 示例硬编码风险 | 改为配置表/配置中心注入,SQL 不再硬编码示例值 | PLAT + FIN | 验收 SQL 无固定数组常量 |
|
||||||
|
| BL-006 | WBS 阶段边界修正 | S0 文档中包含 S1 验收;任务重复(A0.2 与 C0.1) | 任务按阶段归属,重复任务合并,里程碑不跨阶段串名 | PMO + QA | WBS 中无重复任务、阶段名与验收名一致 |
|
||||||
|
| BL-007 | 依赖拓扑重排 | 多个任务截止日早于其依赖项 | 发布“依赖重排版任务表”,所有依赖满足时间顺序 | PMO | 依赖零倒挂 |
|
||||||
|
| BL-008 | 安全 SQL 方言统一 | PostgreSQL 架构中出现 MySQL 方言样例 | 安全文档 SQL/触发器改为 PostgreSQL 可执行语法 | SEC + DBA | SQL 可在 PostgreSQL 演练通过 |
|
||||||
|
| BL-009 | 责任人实名化 | 关键任务仍为角色占位 | 所有 P0/P1 任务实名 owner+backup+on-call | PMO | 无占位符角色 |
|
||||||
|
| BL-010 | 验收门禁唯一化 | 同一指标出现多阈值版本 | 建立“唯一验收门禁表”,其余文档只引用 ID | ARCH + QA | 验收阈值单一来源 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 3. P1 收敛项(建议在两周内完成)
|
||||||
|
|
||||||
|
| ID | 收敛项 | 当前问题 | 目标状态(v4.1) | 责任角色 |
|
||||||
|
|---|---|---|---|---|
|
||||||
|
| BL-011 | 测试栈与实现栈对齐 | Go 主线,测试方案以 Python 工程骨架为主 | 单测/集成/契约主路径以 Go 工具链为主,Python 仅保留工具脚本 | QA + 后端 |
|
||||||
|
| BL-012 | 运维复杂度分层 | S0/S1 引入过多基础设施组件 | 定义“最小可运营栈”与“触发式扩容栈”分界 | ARCH + SRE |
|
||||||
|
| BL-013 | 文档引用规范 | 部分文档引用旧版本基线 | 全部引用最新主基线文档与唯一验收表 | PMO |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 4. 文件级修订清单(按优先级)
|
||||||
|
|
||||||
|
| 优先级 | 文件 | 必改项 |
|
||||||
|
|---|---|---|
|
||||||
|
| P0 | `llm_gateway_subapi_evolution_plan_v4_1_2026-03-18.md` | 统一版本号;统一阶段日期;统一 S2 验收目标;清理弹性目标与最终目标冲突 |
|
||||||
|
| P0 | `s0_wbs_detailed_v1_2026-03-18.md` | 校正 S0 边界;移除或重命名 S1 验收项;合并重复任务;修复里程碑命名 |
|
||||||
|
| P0 | `subapi_integration_risk_controls_execution_tasks_v1_2026-03-17.md` | 修复所有依赖倒挂;P0/P1 任务实名化 |
|
||||||
|
| P0 | `router_core_takeover_execution_plan_v3_2026-03-17.md` | 与指标 SQL 的主路径端点定义逐字一致 |
|
||||||
|
| P0 | `router_core_takeover_metrics_sql_dashboard_v1_2026-03-17.md` | `cn_platforms` 改配置源;主路径集合与执行文档一致 |
|
||||||
|
| P0 | `security_solution_v1_2026-03-18.md` | SQL 方言改为 PostgreSQL;触发器/审计示例可执行化 |
|
||||||
|
| P1 | `technical_architecture_design_v1_2026-03-18.md` | 定义 S0/S1 最小栈,组件引入条件化 |
|
||||||
|
| P1 | `test_plan_design_v1_2026-03-18.md` | Go 主测试链路替换 Python 工程骨架示例 |
|
||||||
|
| P1 | `architecture_solution_v1_2026-03-18.md` | 将“30-40%终态”改为“过程缓冲,不改变最终验收目标” |
|
||||||
|
| P1 | `s2_takeover_buffer_strategy_v1_2026-03-18.md` | 保留预警与止损,不覆盖 S2 最终验收口径 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 5. 裁决项(需要一次会议拍板)
|
||||||
|
|
||||||
|
1. S0 周期最终裁决:4周 / 12周 / 15周(三选一)。
|
||||||
|
2. S1 与 S0 是否并行:并行 / 串行(二选一)。
|
||||||
|
3. S2 目标是否允许“验收降档”:不允许(推荐)/ 允许一次。
|
||||||
|
4. 主技术栈是否冻结为 Go + PostgreSQL:是(推荐)/ 否。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 6. 验收清单(v4.1 收敛完成判据)
|
||||||
|
|
||||||
|
1. 文档一致性检查通过:版本、日期、阶段、目标值零冲突。
|
||||||
|
2. 依赖拓扑检查通过:关键任务零倒挂。
|
||||||
|
3. 指标口径检查通过:主路径端点、国内平台分类来源、SQL 公式一致。
|
||||||
|
4. 栈一致性检查通过:架构、测试、SQL 示例与主栈一致。
|
||||||
|
5. 责任闭环检查通过:P0/P1 任务均有 owner+backup+on-call。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 7. 建议执行顺序(48小时)
|
||||||
|
|
||||||
|
1. 0-4小时:完成裁决项并冻结 SSOT。
|
||||||
|
2. 4-12小时:修订主基线 + WBS + 任务单 + 指标口径。
|
||||||
|
3. 12-24小时:修订安全 SQL、测试栈、架构最小栈。
|
||||||
|
4. 24-36小时:进行一次全量交叉核对(口径/依赖/验收)。
|
||||||
|
5. 36-48小时:输出 `v4.1` 最终发布版并锁定变更窗口。
|
||||||
|
|
||||||
86
docs/v4_1_post_fix_review_report_2026-03-18.md
Normal file
86
docs/v4_1_post_fix_review_report_2026-03-18.md
Normal file
@@ -0,0 +1,86 @@
|
|||||||
|
# v4.1 收敛整改后第三轮全面复审报告
|
||||||
|
|
||||||
|
- 版本:v1.0
|
||||||
|
- 日期:2026-03-18
|
||||||
|
- 对照基线:`v4_1_baseline_convergence_checklist_2026-03-18.md`
|
||||||
|
- 结论级别:`CONDITIONAL GO`
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 1. 结论摘要
|
||||||
|
|
||||||
|
1. 本轮已完成主要 P0 收敛项,原先阻断实施的核心矛盾基本关闭。
|
||||||
|
2. 规划路线、接管指标口径、WBS边界、任务依赖已显著收敛。
|
||||||
|
3. 仍存在少量非阻断残余项(主要是门禁文档单点化与测试栈对齐),建议在首周内补齐。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 2. P0 收敛复核结果
|
||||||
|
|
||||||
|
| ID | 状态 | 复核结论 |
|
||||||
|
|---|---|---|
|
||||||
|
| BL-001 版本命名统一 | ✅ 通过 | 新增 `v4.1` 主基线文件,旧 `v3` 文件改为历史跳转 |
|
||||||
|
| BL-002 阶段时间线统一 | ✅ 通过 | S0 统一为 12 周(2026-03-18 至 2026-06-08) |
|
||||||
|
| BL-003 S2 目标值统一 | ✅ 通过 | 终验口径统一为全供应商 >=60%、国内=100%,弹性仅作过程预警 |
|
||||||
|
| BL-004 主路径端点统一 | ✅ 通过 | 执行方案与 SQL 统一为 canonical 端点集合并说明 alias 归一 |
|
||||||
|
| BL-005 国内平台来源统一 | ✅ 通过 | `cn_platforms` 改为配置表 `gateway_cn_platforms` 来源 |
|
||||||
|
| BL-006 WBS 边界修正 | ✅ 通过 | S0 文档中的目标命名与验收命名已去歧义;重复任务已改为跨Track治理任务 |
|
||||||
|
| BL-007 依赖拓扑重排 | ✅ 通过 | 已修复关键倒挂(SEC-009/UXR-001/TST-001/TST-002/GAT-002) |
|
||||||
|
| BL-008 安全 SQL 方言统一 | ✅ 通过 | 安全审计 SQL 示例已切换 PostgreSQL 语法 |
|
||||||
|
| BL-009 责任实名化 | ✅ 通过 | 角色映射升级为实名RACI并纳入 on-call |
|
||||||
|
| BL-010 验收门禁唯一化 | ⚠️ 部分通过 | 口径冲突已显著减少,但“唯一门禁表”仍建议单独固化成独立文档 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 3. 关键证据(文件与行)
|
||||||
|
|
||||||
|
1. 主基线与周期/口径统一:
|
||||||
|
- `llm_gateway_subapi_evolution_plan_v4_1_2026-03-18.md:3`
|
||||||
|
- `llm_gateway_subapi_evolution_plan_v4_1_2026-03-18.md:110`
|
||||||
|
- `llm_gateway_subapi_evolution_plan_v4_1_2026-03-18.md:391`
|
||||||
|
- `llm_gateway_subapi_evolution_plan_v4_1_2026-03-18.md:398`
|
||||||
|
- `llm_gateway_subapi_evolution_plan_v4_1_2026-03-18.md:399`
|
||||||
|
- `llm_gateway_subapi_evolution_plan_v4_1_2026-03-18.md:400`
|
||||||
|
2. 历史版本降级为兼容引用:
|
||||||
|
- `llm_gateway_subapi_evolution_plan_v3_2026-03-18.md:5`
|
||||||
|
- `llm_gateway_subapi_evolution_plan_v3_2026-03-18.md:13`
|
||||||
|
3. WBS 阶段边界与重复任务修复:
|
||||||
|
- `s0_wbs_detailed_v1_2026-03-18.md:15`
|
||||||
|
- `s0_wbs_detailed_v1_2026-03-18.md:410`
|
||||||
|
- `s0_wbs_detailed_v1_2026-03-18.md:420`
|
||||||
|
4. 依赖倒挂修复与实名RACI:
|
||||||
|
- `subapi_integration_risk_controls_execution_tasks_v1_2026-03-17.md:20`
|
||||||
|
- `subapi_integration_risk_controls_execution_tasks_v1_2026-03-17.md:31`
|
||||||
|
- `subapi_integration_risk_controls_execution_tasks_v1_2026-03-17.md:71`
|
||||||
|
- `subapi_integration_risk_controls_execution_tasks_v1_2026-03-17.md:107`
|
||||||
|
- `subapi_integration_risk_controls_execution_tasks_v1_2026-03-17.md:109`
|
||||||
|
- `subapi_integration_risk_controls_execution_tasks_v1_2026-03-17.md:110`
|
||||||
|
- `subapi_integration_risk_controls_execution_tasks_v1_2026-03-17.md:113`
|
||||||
|
5. 指标口径与平台分类来源统一:
|
||||||
|
- `router_core_takeover_execution_plan_v3_2026-03-17.md:31`
|
||||||
|
- `router_core_takeover_metrics_sql_dashboard_v1_2026-03-17.md:99`
|
||||||
|
- `router_core_takeover_metrics_sql_dashboard_v1_2026-03-17.md:111`
|
||||||
|
- `router_core_takeover_metrics_sql_dashboard_v1_2026-03-17.md:392`
|
||||||
|
6. 安全 SQL 方言收敛:
|
||||||
|
- `security_solution_v1_2026-03-18.md:51`
|
||||||
|
- `security_solution_v1_2026-03-18.md:78`
|
||||||
|
- `security_solution_v1_2026-03-18.md:113`
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 4. 残余风险(非阻断)
|
||||||
|
|
||||||
|
1. 验收阈值仍分散在多文档,建议新增“唯一门禁表”文档并由其他文档只引用。
|
||||||
|
2. `test_plan_design` 仍偏 Python 示例,与 Go 主实现栈存在偏差,建议首周收敛。
|
||||||
|
3. `technical_architecture_design` 仍包含较重组件组合,建议在 S0/S1 明确最小可运营栈并设触发式扩容条件。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 5. 实施建议
|
||||||
|
|
||||||
|
1. 可按 `CONDITIONAL GO` 进入实施,但以 v4.1 主基线作为唯一执行口径。
|
||||||
|
2. 首周优先完成以下两项:
|
||||||
|
- 输出“唯一验收门禁表(单文档)”;
|
||||||
|
- 输出“Go 主测试链路对齐版测试方案”。
|
||||||
|
3. 每周例会固定核对:时间线、接管率口径、依赖拓扑、证据包完整性。
|
||||||
|
|
||||||
89
docs/v4_1_re_review_report_2026-03-18.md
Normal file
89
docs/v4_1_re_review_report_2026-03-18.md
Normal file
@@ -0,0 +1,89 @@
|
|||||||
|
# v4.1 基线收敛后二次全面核对报告
|
||||||
|
|
||||||
|
- 版本:v1.0
|
||||||
|
- 日期:2026-03-18
|
||||||
|
- 对照清单:`v4_1_baseline_convergence_checklist_2026-03-18.md`
|
||||||
|
- 结论级别:`NO-GO(需先完成P0收敛)`
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 1. 核对结论总览
|
||||||
|
|
||||||
|
| 分类 | 结果 |
|
||||||
|
|---|---|
|
||||||
|
| 路线清晰可行性 | 不通过(基线冲突) |
|
||||||
|
| 技术规划可靠可实现性 | 有条件通过(需先修P0) |
|
||||||
|
| 任务分解与最小可验证目标 | 有条件通过(依赖倒挂未清) |
|
||||||
|
| 是否可直接进入实施 | 不建议 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 2. P0 核对结果
|
||||||
|
|
||||||
|
| ID | 状态 | 说明 |
|
||||||
|
|---|---|---|
|
||||||
|
| BL-001 版本命名统一 | ❌ 未通过 | 同一主文档中同时出现 v4.0 与 v3.0 表述 |
|
||||||
|
| BL-002 阶段时间线统一 | ❌ 未通过 | S0 出现 4周/12周/15周三套周期 |
|
||||||
|
| BL-003 S2目标值统一 | ❌ 未通过 | 同时存在 >=60%、50-60%、30-40% 等口径 |
|
||||||
|
| BL-004 主路径端点统一 | ⚠️ 部分通过 | 文本端点定义与 SQL 仍有潜在漂移风险 |
|
||||||
|
| BL-005 国内平台来源统一 | ❌ 未通过 | 验收 SQL 仍使用示例常量数组 |
|
||||||
|
| BL-006 WBS阶段边界修正 | ❌ 未通过 | S0 WBS 中包含 S1 验收项,且存在重复任务 |
|
||||||
|
| BL-007 依赖拓扑重排 | ❌ 未通过 | 多个任务依赖晚于任务截止,存在倒挂 |
|
||||||
|
| BL-008 安全SQL方言统一 | ❌ 未通过 | PostgreSQL 技术栈中仍有 MySQL 风格示例 |
|
||||||
|
| BL-009 责任实名化 | ⚠️ 部分通过 | 任务单仍保留角色占位,实名未完成 |
|
||||||
|
| BL-010 验收门禁唯一化 | ❌ 未通过 | 同指标在多文档出现不同阈值 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 3. 关键证据索引(文件与行)
|
||||||
|
|
||||||
|
1. 版本号冲突:
|
||||||
|
- `llm_gateway_subapi_evolution_plan_v3_2026-03-18.md`:行 3(v4.0)、行 589(v3.0)。
|
||||||
|
2. S0 周期冲突:
|
||||||
|
- `llm_gateway_subapi_evolution_plan_v3_2026-03-18.md`:行 110(2026-03-18 至 2026-04-15)。
|
||||||
|
- `s0_wbs_detailed_v1_2026-03-18.md`:行 4-5(2026-03-18 至 2026-06-08,12周)。
|
||||||
|
- `resource_assessment_plan_v1_2026-03-18.md`:行 51(S0 建议 15周)。
|
||||||
|
3. S2 目标冲突:
|
||||||
|
- `llm_gateway_subapi_evolution_plan_v3_2026-03-18.md`:行 391(>=60%)。
|
||||||
|
- `llm_gateway_subapi_evolution_plan_v3_2026-03-18.md`:行 398(50-60%)。
|
||||||
|
- `architecture_solution_v1_2026-03-18.md`:行 23(30-40%)。
|
||||||
|
4. 依赖倒挂:
|
||||||
|
- `subapi_integration_risk_controls_execution_tasks_v1_2026-03-17.md`:SEC-009 依赖 SEC-003/SEC-004 但截止更早。
|
||||||
|
- 同文件:UXR-001 依赖 PROD-001 但截止更早。
|
||||||
|
- 同文件:TST-001 依赖 COMP-005 但截止更早。
|
||||||
|
- 同文件:TST-002、GAT-002 同类问题。
|
||||||
|
5. 指标口径风险:
|
||||||
|
- `router_core_takeover_execution_plan_v3_2026-03-17.md`:行 31(`/responses`)。
|
||||||
|
- `router_core_takeover_metrics_sql_dashboard_v1_2026-03-17.md`:行 78/132/342(`/v1/responses`)。
|
||||||
|
- 同文件:行 108/325(`ARRAY['antigravity']`)。
|
||||||
|
6. WBS 边界与重复:
|
||||||
|
- `s0_wbs_detailed_v1_2026-03-18.md`:行 410(S1阶段验收出现在 S0 文档)。
|
||||||
|
- 同文件:A0.2.* 与 C0.1.* 功能重复(Key 生成/验证/激活码)。
|
||||||
|
7. SQL 方言错位:
|
||||||
|
- `technical_architecture_design_v1_2026-03-18.md`:PostgreSQL。
|
||||||
|
- `security_solution_v1_2026-03-18.md`:`AUTO_INCREMENT`、`DELIMITER` 等非 PostgreSQL 语法。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 4. 影响评估
|
||||||
|
|
||||||
|
1. 若不先收敛,项目将出现“执行按 A 文档、验收按 B 文档”的治理失效。
|
||||||
|
2. 依赖倒挂会导致风险控制任务名义完成但实际不可执行。
|
||||||
|
3. 指标口径漂移会使 S2 接管率验收存在统计争议,直接影响 GO/NO-GO。
|
||||||
|
4. SQL 方言错位会在安全落地时失败,形成“纸面合规”。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 5. 复审建议(进入实施前)
|
||||||
|
|
||||||
|
1. 先完成全部 P0 收敛项,再允许实施排期生效。
|
||||||
|
2. 发布 v4.1 主基线后,冻结 7 天不改目标值,仅允许修正文案与引用。
|
||||||
|
3. 以“唯一验收门禁表”作为每周评审唯一口径,禁止多文档并行解释。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 6. 复审结论
|
||||||
|
|
||||||
|
当前状态不建议直接进入实施。
|
||||||
|
建议在 48 小时内完成 v4.1 收敛整改后,再做一次最终 GO/NO-GO 复审。
|
||||||
|
|
||||||
1
llm-gateway-competitors/litellm
Submodule
1
llm-gateway-competitors/litellm
Submodule
Submodule llm-gateway-competitors/litellm added at cd37ee1459
1
llm-gateway-competitors/litellm-sparse
Submodule
1
llm-gateway-competitors/litellm-sparse
Submodule
Submodule llm-gateway-competitors/litellm-sparse added at 58e74a631c
@@ -0,0 +1,26 @@
|
|||||||
|
Portions of this software are licensed as follows:
|
||||||
|
|
||||||
|
* All content that resides under the "enterprise/" directory of this repository, if that directory exists, is licensed under the license defined in "enterprise/LICENSE".
|
||||||
|
* Content outside of the above mentioned directories or restrictions above is available under the MIT license as defined below.
|
||||||
|
---
|
||||||
|
MIT License
|
||||||
|
|
||||||
|
Copyright (c) 2023 Berri AI
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
in the Software without restriction, including without limitation the rights
|
||||||
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in all
|
||||||
|
copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
SOFTWARE.
|
||||||
@@ -0,0 +1,555 @@
|
|||||||
|
Metadata-Version: 2.1
|
||||||
|
Name: litellm
|
||||||
|
Version: 1.82.2
|
||||||
|
Summary: Library to easily interface with LLM API providers
|
||||||
|
License: MIT
|
||||||
|
Author: BerriAI
|
||||||
|
Requires-Python: >=3.9,<4.0
|
||||||
|
Classifier: License :: OSI Approved :: MIT License
|
||||||
|
Classifier: Programming Language :: Python :: 3
|
||||||
|
Classifier: Programming Language :: Python :: 3.9
|
||||||
|
Classifier: Programming Language :: Python :: 3.10
|
||||||
|
Classifier: Programming Language :: Python :: 3.11
|
||||||
|
Classifier: Programming Language :: Python :: 3.12
|
||||||
|
Classifier: Programming Language :: Python :: 3.13
|
||||||
|
Provides-Extra: caching
|
||||||
|
Provides-Extra: extra-proxy
|
||||||
|
Provides-Extra: google
|
||||||
|
Provides-Extra: grpc
|
||||||
|
Provides-Extra: mlflow
|
||||||
|
Provides-Extra: proxy
|
||||||
|
Provides-Extra: semantic-router
|
||||||
|
Provides-Extra: utils
|
||||||
|
Requires-Dist: PyJWT (>=2.10.1,<3.0.0) ; (python_version >= "3.9") and (extra == "proxy")
|
||||||
|
Requires-Dist: a2a-sdk (>=0.3.22,<0.4.0) ; (python_version >= "3.10") and (extra == "extra-proxy")
|
||||||
|
Requires-Dist: aiohttp (>=3.10)
|
||||||
|
Requires-Dist: apscheduler (>=3.10.4,<4.0.0) ; extra == "proxy"
|
||||||
|
Requires-Dist: azure-identity (>=1.15.0,<2.0.0) ; (python_version >= "3.9") and (extra == "proxy" or extra == "extra-proxy")
|
||||||
|
Requires-Dist: azure-keyvault-secrets (>=4.8.0,<5.0.0) ; extra == "extra-proxy"
|
||||||
|
Requires-Dist: azure-storage-blob (>=12.25.1,<13.0.0) ; extra == "proxy"
|
||||||
|
Requires-Dist: backoff ; extra == "proxy"
|
||||||
|
Requires-Dist: boto3 (>=1.40.76,<2.0.0) ; extra == "proxy"
|
||||||
|
Requires-Dist: click
|
||||||
|
Requires-Dist: cryptography ; extra == "proxy"
|
||||||
|
Requires-Dist: diskcache (>=5.6.1,<6.0.0) ; extra == "caching"
|
||||||
|
Requires-Dist: fastapi (>=0.120.1) ; extra == "proxy"
|
||||||
|
Requires-Dist: fastapi-sso (>=0.16.0,<0.17.0) ; extra == "proxy"
|
||||||
|
Requires-Dist: fastuuid (>=0.13.0)
|
||||||
|
Requires-Dist: google-cloud-aiplatform (>=1.38.0) ; extra == "google"
|
||||||
|
Requires-Dist: google-cloud-iam (>=2.19.1,<3.0.0) ; extra == "extra-proxy"
|
||||||
|
Requires-Dist: google-cloud-kms (>=2.21.3,<3.0.0) ; extra == "extra-proxy"
|
||||||
|
Requires-Dist: grpcio (>=1.62.3,!=1.68.*,!=1.69.*,!=1.70.*,!=1.71.0,!=1.71.1,!=1.72.0,!=1.72.1,!=1.73.0) ; (python_version < "3.14") and (extra == "grpc")
|
||||||
|
Requires-Dist: grpcio (>=1.75.0) ; (python_version >= "3.14") and (extra == "grpc")
|
||||||
|
Requires-Dist: gunicorn (>=23.0.0,<24.0.0) ; extra == "proxy"
|
||||||
|
Requires-Dist: httpx (>=0.23.0)
|
||||||
|
Requires-Dist: importlib-metadata (>=6.8.0)
|
||||||
|
Requires-Dist: jinja2 (>=3.1.2,<4.0.0)
|
||||||
|
Requires-Dist: jsonschema (>=4.23.0,<5.0.0)
|
||||||
|
Requires-Dist: litellm-enterprise (>=0.1.33,<0.2.0) ; extra == "proxy"
|
||||||
|
Requires-Dist: litellm-proxy-extras (>=0.4.56,<0.5.0) ; extra == "proxy"
|
||||||
|
Requires-Dist: mcp (>=1.25.0,<2.0.0) ; (python_version >= "3.10") and (extra == "proxy")
|
||||||
|
Requires-Dist: mlflow (>3.1.4) ; (python_version >= "3.10") and (extra == "mlflow")
|
||||||
|
Requires-Dist: numpydoc ; extra == "utils"
|
||||||
|
Requires-Dist: openai (>=2.8.0)
|
||||||
|
Requires-Dist: orjson (>=3.9.7,<4.0.0) ; extra == "proxy"
|
||||||
|
Requires-Dist: polars (>=1.31.0,<2.0.0) ; (python_version >= "3.10") and (extra == "proxy")
|
||||||
|
Requires-Dist: prisma (>=0.11.0,<0.12.0) ; extra == "extra-proxy"
|
||||||
|
Requires-Dist: pydantic (>=2.5.0,<3.0.0)
|
||||||
|
Requires-Dist: pynacl (>=1.5.0,<2.0.0) ; extra == "proxy"
|
||||||
|
Requires-Dist: pyroscope-io (>=0.8,<0.9) ; (sys_platform != "win32") and (extra == "proxy")
|
||||||
|
Requires-Dist: python-dotenv (>=0.2.0)
|
||||||
|
Requires-Dist: python-multipart (>=0.0.20) ; extra == "proxy"
|
||||||
|
Requires-Dist: pyyaml (>=6.0.1,<7.0.0) ; extra == "proxy"
|
||||||
|
Requires-Dist: redisvl (>=0.4.1,<0.5.0) ; (python_version >= "3.9" and python_version < "3.14") and (extra == "extra-proxy")
|
||||||
|
Requires-Dist: resend (>=0.8.0) ; extra == "extra-proxy"
|
||||||
|
Requires-Dist: rich (>=13.7.1,<14.0.0) ; extra == "proxy"
|
||||||
|
Requires-Dist: rq ; extra == "proxy"
|
||||||
|
Requires-Dist: semantic-router (>=0.1.12) ; (python_version >= "3.9" and python_version < "3.14") and (extra == "semantic-router")
|
||||||
|
Requires-Dist: soundfile (>=0.12.1,<0.13.0) ; extra == "proxy"
|
||||||
|
Requires-Dist: tiktoken (>=0.7.0)
|
||||||
|
Requires-Dist: tokenizers
|
||||||
|
Requires-Dist: uvicorn (>=0.32.1,<1.0.0) ; extra == "proxy"
|
||||||
|
Requires-Dist: uvloop (>=0.21.0,<0.22.0) ; (sys_platform != "win32") and (extra == "proxy")
|
||||||
|
Requires-Dist: websockets (>=15.0.1,<16.0.0) ; extra == "proxy"
|
||||||
|
Project-URL: Documentation, https://docs.litellm.ai
|
||||||
|
Project-URL: Homepage, https://litellm.ai
|
||||||
|
Project-URL: Repository, https://github.com/BerriAI/litellm
|
||||||
|
Project-URL: documentation, https://docs.litellm.ai
|
||||||
|
Project-URL: homepage, https://litellm.ai
|
||||||
|
Project-URL: repository, https://github.com/BerriAI/litellm
|
||||||
|
Description-Content-Type: text/markdown
|
||||||
|
|
||||||
|
<h1 align="center">
|
||||||
|
🚅 LiteLLM
|
||||||
|
</h1>
|
||||||
|
<p align="center">
|
||||||
|
<p align="center">Call 100+ LLMs in OpenAI format. [Bedrock, Azure, OpenAI, VertexAI, Anthropic, Groq, etc.]
|
||||||
|
</p>
|
||||||
|
<p align="center">
|
||||||
|
<a href="https://render.com/deploy?repo=https://github.com/BerriAI/litellm" target="_blank" rel="nofollow"><img src="https://render.com/images/deploy-to-render-button.svg" alt="Deploy to Render"></a>
|
||||||
|
<a href="https://railway.app/template/HLP0Ub?referralCode=jch2ME">
|
||||||
|
<img src="https://railway.app/button.svg" alt="Deploy on Railway">
|
||||||
|
</a>
|
||||||
|
</p>
|
||||||
|
</p>
|
||||||
|
<h4 align="center"><a href="https://docs.litellm.ai/docs/simple_proxy" target="_blank">LiteLLM Proxy Server (AI Gateway)</a> | <a href="https://docs.litellm.ai/docs/enterprise#hosted-litellm-proxy" target="_blank"> Hosted Proxy</a> | <a href="https://docs.litellm.ai/docs/enterprise"target="_blank">Enterprise Tier</a></h4>
|
||||||
|
<h4 align="center">
|
||||||
|
<a href="https://pypi.org/project/litellm/" target="_blank">
|
||||||
|
<img src="https://img.shields.io/pypi/v/litellm.svg" alt="PyPI Version">
|
||||||
|
</a>
|
||||||
|
<a href="https://www.ycombinator.com/companies/berriai">
|
||||||
|
<img src="https://img.shields.io/badge/Y%20Combinator-W23-orange?style=flat-square" alt="Y Combinator W23">
|
||||||
|
</a>
|
||||||
|
<a href="https://wa.link/huol9n">
|
||||||
|
<img src="https://img.shields.io/static/v1?label=Chat%20on&message=WhatsApp&color=success&logo=WhatsApp&style=flat-square" alt="Whatsapp">
|
||||||
|
</a>
|
||||||
|
<a href="https://discord.gg/wuPM9dRgDw">
|
||||||
|
<img src="https://img.shields.io/static/v1?label=Chat%20on&message=Discord&color=blue&logo=Discord&style=flat-square" alt="Discord">
|
||||||
|
</a>
|
||||||
|
<a href="https://www.litellm.ai/support">
|
||||||
|
<img src="https://img.shields.io/static/v1?label=Chat%20on&message=Slack&color=black&logo=Slack&style=flat-square" alt="Slack">
|
||||||
|
</a>
|
||||||
|
</h4>
|
||||||
|
|
||||||
|
<img width="2688" height="1600" alt="Group 7154 (1)" src="https://github.com/user-attachments/assets/c5ee0412-6fb5-4fb6-ab5b-bafae4209ca6" />
|
||||||
|
|
||||||
|
|
||||||
|
## Use LiteLLM for
|
||||||
|
|
||||||
|
<details open>
|
||||||
|
<summary><b>LLMs</b> - Call 100+ LLMs (Python SDK + AI Gateway)</summary>
|
||||||
|
|
||||||
|
[**All Supported Endpoints**](https://docs.litellm.ai/docs/supported_endpoints) - `/chat/completions`, `/responses`, `/embeddings`, `/images`, `/audio`, `/batches`, `/rerank`, `/a2a`, `/messages` and more.
|
||||||
|
|
||||||
|
### Python SDK
|
||||||
|
|
||||||
|
```shell
|
||||||
|
pip install litellm
|
||||||
|
```
|
||||||
|
|
||||||
|
```python
|
||||||
|
from litellm import completion
|
||||||
|
import os
|
||||||
|
|
||||||
|
os.environ["OPENAI_API_KEY"] = "your-openai-key"
|
||||||
|
os.environ["ANTHROPIC_API_KEY"] = "your-anthropic-key"
|
||||||
|
|
||||||
|
# OpenAI
|
||||||
|
response = completion(model="openai/gpt-4o", messages=[{"role": "user", "content": "Hello!"}])
|
||||||
|
|
||||||
|
# Anthropic
|
||||||
|
response = completion(model="anthropic/claude-sonnet-4-20250514", messages=[{"role": "user", "content": "Hello!"}])
|
||||||
|
```
|
||||||
|
|
||||||
|
### AI Gateway (Proxy Server)
|
||||||
|
|
||||||
|
[**Getting Started - E2E Tutorial**](https://docs.litellm.ai/docs/proxy/docker_quick_start) - Setup virtual keys, make your first request
|
||||||
|
|
||||||
|
```shell
|
||||||
|
pip install 'litellm[proxy]'
|
||||||
|
litellm --model gpt-4o
|
||||||
|
```
|
||||||
|
|
||||||
|
```python
|
||||||
|
import openai
|
||||||
|
|
||||||
|
client = openai.OpenAI(api_key="anything", base_url="http://0.0.0.0:4000")
|
||||||
|
response = client.chat.completions.create(
|
||||||
|
model="gpt-4o",
|
||||||
|
messages=[{"role": "user", "content": "Hello!"}]
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
[**Docs: LLM Providers**](https://docs.litellm.ai/docs/providers)
|
||||||
|
|
||||||
|
</details>
|
||||||
|
|
||||||
|
<details>
|
||||||
|
<summary><b>Agents</b> - Invoke A2A Agents (Python SDK + AI Gateway)</summary>
|
||||||
|
|
||||||
|
[**Supported Providers**](https://docs.litellm.ai/docs/a2a#add-a2a-agents) - LangGraph, Vertex AI Agent Engine, Azure AI Foundry, Bedrock AgentCore, Pydantic AI
|
||||||
|
|
||||||
|
### Python SDK - A2A Protocol
|
||||||
|
|
||||||
|
```python
|
||||||
|
from litellm.a2a_protocol import A2AClient
|
||||||
|
from a2a.types import SendMessageRequest, MessageSendParams
|
||||||
|
from uuid import uuid4
|
||||||
|
|
||||||
|
client = A2AClient(base_url="http://localhost:10001")
|
||||||
|
|
||||||
|
request = SendMessageRequest(
|
||||||
|
id=str(uuid4()),
|
||||||
|
params=MessageSendParams(
|
||||||
|
message={
|
||||||
|
"role": "user",
|
||||||
|
"parts": [{"kind": "text", "text": "Hello!"}],
|
||||||
|
"messageId": uuid4().hex,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
)
|
||||||
|
response = await client.send_message(request)
|
||||||
|
```
|
||||||
|
|
||||||
|
### AI Gateway (Proxy Server)
|
||||||
|
|
||||||
|
**Step 1.** [Add your Agent to the AI Gateway](https://docs.litellm.ai/docs/a2a#adding-your-agent)
|
||||||
|
|
||||||
|
**Step 2.** Call Agent via A2A SDK
|
||||||
|
|
||||||
|
```python
|
||||||
|
from a2a.client import A2ACardResolver, A2AClient
|
||||||
|
from a2a.types import MessageSendParams, SendMessageRequest
|
||||||
|
from uuid import uuid4
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
base_url = "http://localhost:4000/a2a/my-agent" # LiteLLM proxy + agent name
|
||||||
|
headers = {"Authorization": "Bearer sk-1234"} # LiteLLM Virtual Key
|
||||||
|
|
||||||
|
async with httpx.AsyncClient(headers=headers) as httpx_client:
|
||||||
|
resolver = A2ACardResolver(httpx_client=httpx_client, base_url=base_url)
|
||||||
|
agent_card = await resolver.get_agent_card()
|
||||||
|
client = A2AClient(httpx_client=httpx_client, agent_card=agent_card)
|
||||||
|
|
||||||
|
request = SendMessageRequest(
|
||||||
|
id=str(uuid4()),
|
||||||
|
params=MessageSendParams(
|
||||||
|
message={
|
||||||
|
"role": "user",
|
||||||
|
"parts": [{"kind": "text", "text": "Hello!"}],
|
||||||
|
"messageId": uuid4().hex,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
)
|
||||||
|
response = await client.send_message(request)
|
||||||
|
```
|
||||||
|
|
||||||
|
[**Docs: A2A Agent Gateway**](https://docs.litellm.ai/docs/a2a)
|
||||||
|
|
||||||
|
</details>
|
||||||
|
|
||||||
|
<details>
|
||||||
|
<summary><b>MCP Tools</b> - Connect MCP servers to any LLM (Python SDK + AI Gateway)</summary>
|
||||||
|
|
||||||
|
### Python SDK - MCP Bridge
|
||||||
|
|
||||||
|
```python
|
||||||
|
from mcp import ClientSession, StdioServerParameters
|
||||||
|
from mcp.client.stdio import stdio_client
|
||||||
|
from litellm import experimental_mcp_client
|
||||||
|
import litellm
|
||||||
|
|
||||||
|
server_params = StdioServerParameters(command="python", args=["mcp_server.py"])
|
||||||
|
|
||||||
|
async with stdio_client(server_params) as (read, write):
|
||||||
|
async with ClientSession(read, write) as session:
|
||||||
|
await session.initialize()
|
||||||
|
|
||||||
|
# Load MCP tools in OpenAI format
|
||||||
|
tools = await experimental_mcp_client.load_mcp_tools(session=session, format="openai")
|
||||||
|
|
||||||
|
# Use with any LiteLLM model
|
||||||
|
response = await litellm.acompletion(
|
||||||
|
model="gpt-4o",
|
||||||
|
messages=[{"role": "user", "content": "What's 3 + 5?"}],
|
||||||
|
tools=tools
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
### AI Gateway - MCP Gateway
|
||||||
|
|
||||||
|
**Step 1.** [Add your MCP Server to the AI Gateway](https://docs.litellm.ai/docs/mcp#adding-your-mcp)
|
||||||
|
|
||||||
|
**Step 2.** Call MCP tools via `/chat/completions`
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -X POST 'http://0.0.0.0:4000/v1/chat/completions' \
|
||||||
|
-H 'Authorization: Bearer sk-1234' \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-d '{
|
||||||
|
"model": "gpt-4o",
|
||||||
|
"messages": [{"role": "user", "content": "Summarize the latest open PR"}],
|
||||||
|
"tools": [{
|
||||||
|
"type": "mcp",
|
||||||
|
"server_url": "litellm_proxy/mcp/github",
|
||||||
|
"server_label": "github_mcp",
|
||||||
|
"require_approval": "never"
|
||||||
|
}]
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
### Use with Cursor IDE
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"mcpServers": {
|
||||||
|
"LiteLLM": {
|
||||||
|
"url": "http://localhost:4000/mcp/",
|
||||||
|
"headers": {
|
||||||
|
"x-litellm-api-key": "Bearer sk-1234"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
[**Docs: MCP Gateway**](https://docs.litellm.ai/docs/mcp)
|
||||||
|
|
||||||
|
</details>
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## How to use LiteLLM
|
||||||
|
|
||||||
|
You can use LiteLLM through either the Proxy Server or Python SDK. Both gives you a unified interface to access multiple LLMs (100+ LLMs). Choose the option that best fits your needs:
|
||||||
|
|
||||||
|
<table style={{width: '100%', tableLayout: 'fixed'}}>
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th style={{width: '14%'}}></th>
|
||||||
|
<th style={{width: '43%'}}><strong><a href="https://docs.litellm.ai/docs/simple_proxy">LiteLLM AI Gateway</a></strong></th>
|
||||||
|
<th style={{width: '43%'}}><strong><a href="https://docs.litellm.ai/docs/">LiteLLM Python SDK</a></strong></th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
<tr>
|
||||||
|
<td style={{width: '14%'}}><strong>Use Case</strong></td>
|
||||||
|
<td style={{width: '43%'}}>Central service (LLM Gateway) to access multiple LLMs</td>
|
||||||
|
<td style={{width: '43%'}}>Use LiteLLM directly in your Python code</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td style={{width: '14%'}}><strong>Who Uses It?</strong></td>
|
||||||
|
<td style={{width: '43%'}}>Gen AI Enablement / ML Platform Teams</td>
|
||||||
|
<td style={{width: '43%'}}>Developers building LLM projects</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td style={{width: '14%'}}><strong>Key Features</strong></td>
|
||||||
|
<td style={{width: '43%'}}>Centralized API gateway with authentication and authorization, multi-tenant cost tracking and spend management per project/user, per-project customization (logging, guardrails, caching), virtual keys for secure access control, admin dashboard UI for monitoring and management</td>
|
||||||
|
<td style={{width: '43%'}}>Direct Python library integration in your codebase, Router with retry/fallback logic across multiple deployments (e.g. Azure/OpenAI) - <a href="https://docs.litellm.ai/docs/routing">Router</a>, application-level load balancing and cost tracking, exception handling with OpenAI-compatible errors, observability callbacks (Lunary, MLflow, Langfuse, etc.)</td>
|
||||||
|
</tr>
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
|
||||||
|
LiteLLM Performance: **8ms P95 latency** at 1k RPS (See benchmarks [here](https://docs.litellm.ai/docs/benchmarks))
|
||||||
|
|
||||||
|
[**Jump to LiteLLM Proxy (LLM Gateway) Docs**](https://docs.litellm.ai/docs/simple_proxy) <br>
|
||||||
|
[**Jump to Supported LLM Providers**](https://docs.litellm.ai/docs/providers)
|
||||||
|
|
||||||
|
**Stable Release:** Use docker images with the `-stable` tag. These have undergone 12 hour load tests, before being published. [More information about the release cycle here](https://docs.litellm.ai/docs/proxy/release_cycle)
|
||||||
|
|
||||||
|
Support for more providers. Missing a provider or LLM Platform, raise a [feature request](https://github.com/BerriAI/litellm/issues/new?assignees=&labels=enhancement&projects=&template=feature_request.yml&title=%5BFeature%5D%3A+).
|
||||||
|
|
||||||
|
## OSS Adopters
|
||||||
|
|
||||||
|
<table>
|
||||||
|
<tr>
|
||||||
|
<td><img height="60" alt="Stripe" src="https://github.com/user-attachments/assets/f7296d4f-9fbd-460d-9d05-e4df31697c4b" /></td>
|
||||||
|
<td><img height="60" alt="Google ADK" src="https://github.com/user-attachments/assets/caf270a2-5aee-45c4-8222-41a2070c4f19" /></td>
|
||||||
|
<td><img height="60" alt="Greptile" src="https://github.com/user-attachments/assets/0be4bd8a-7cfa-48d3-9090-f415fe948280" /></td>
|
||||||
|
<td><img height="60" alt="OpenHands" src="https://github.com/user-attachments/assets/a6150c4c-149e-4cae-888b-8b92be6e003f" /></td>
|
||||||
|
<td><h2>Netflix</h2></td>
|
||||||
|
<td><img height="60" alt="OpenAI Agents SDK" src="https://github.com/user-attachments/assets/c02f7be0-8c2e-4d27-aea7-7c024bfaebc0" /></td>
|
||||||
|
</tr>
|
||||||
|
</table>
|
||||||
|
|
||||||
|
## Supported Providers ([Website Supported Models](https://models.litellm.ai/) | [Docs](https://docs.litellm.ai/docs/providers))
|
||||||
|
|
||||||
|
| Provider | `/chat/completions` | `/messages` | `/responses` | `/embeddings` | `/image/generations` | `/audio/transcriptions` | `/audio/speech` | `/moderations` | `/batches` | `/rerank` |
|
||||||
|
|-------------------------------------------------------------------------------------|---------------------|-------------|--------------|---------------|----------------------|-------------------------|-----------------|----------------|-----------|-----------|
|
||||||
|
| [Abliteration (`abliteration`)](https://docs.litellm.ai/docs/providers/abliteration) | ✅ | | | | | | | | | |
|
||||||
|
| [AI/ML API (`aiml`)](https://docs.litellm.ai/docs/providers/aiml) | ✅ | ✅ | ✅ | ✅ | ✅ | | | | | |
|
||||||
|
| [AI21 (`ai21`)](https://docs.litellm.ai/docs/providers/ai21) | ✅ | ✅ | ✅ | | | | | | | |
|
||||||
|
| [AI21 Chat (`ai21_chat`)](https://docs.litellm.ai/docs/providers/ai21) | ✅ | ✅ | ✅ | | | | | | | |
|
||||||
|
| [Aleph Alpha](https://docs.litellm.ai/docs/providers/aleph_alpha) | ✅ | ✅ | ✅ | | | | | | | |
|
||||||
|
| [Amazon Nova](https://docs.litellm.ai/docs/providers/amazon_nova) | ✅ | ✅ | ✅ | | | | | | | |
|
||||||
|
| [Anthropic (`anthropic`)](https://docs.litellm.ai/docs/providers/anthropic) | ✅ | ✅ | ✅ | | | | | | ✅ | |
|
||||||
|
| [Anthropic Text (`anthropic_text`)](https://docs.litellm.ai/docs/providers/anthropic) | ✅ | ✅ | ✅ | | | | | | ✅ | |
|
||||||
|
| [Anyscale](https://docs.litellm.ai/docs/providers/anyscale) | ✅ | ✅ | ✅ | | | | | | | |
|
||||||
|
| [AssemblyAI (`assemblyai`)](https://docs.litellm.ai/docs/pass_through/assembly_ai) | ✅ | ✅ | ✅ | | | ✅ | | | | |
|
||||||
|
| [Auto Router (`auto_router`)](https://docs.litellm.ai/docs/proxy/auto_routing) | ✅ | ✅ | ✅ | | | | | | | |
|
||||||
|
| [AWS - Bedrock (`bedrock`)](https://docs.litellm.ai/docs/providers/bedrock) | ✅ | ✅ | ✅ | ✅ | | | | | | ✅ |
|
||||||
|
| [AWS - Sagemaker (`sagemaker`)](https://docs.litellm.ai/docs/providers/aws_sagemaker) | ✅ | ✅ | ✅ | ✅ | | | | | | |
|
||||||
|
| [Azure (`azure`)](https://docs.litellm.ai/docs/providers/azure) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | |
|
||||||
|
| [Azure AI (`azure_ai`)](https://docs.litellm.ai/docs/providers/azure_ai) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | |
|
||||||
|
| [Azure Text (`azure_text`)](https://docs.litellm.ai/docs/providers/azure) | ✅ | ✅ | ✅ | | | ✅ | ✅ | ✅ | ✅ | |
|
||||||
|
| [Baseten (`baseten`)](https://docs.litellm.ai/docs/providers/baseten) | ✅ | ✅ | ✅ | | | | | | | |
|
||||||
|
| [Bytez (`bytez`)](https://docs.litellm.ai/docs/providers/bytez) | ✅ | ✅ | ✅ | | | | | | | |
|
||||||
|
| [Cerebras (`cerebras`)](https://docs.litellm.ai/docs/providers/cerebras) | ✅ | ✅ | ✅ | | | | | | | |
|
||||||
|
| [Clarifai (`clarifai`)](https://docs.litellm.ai/docs/providers/clarifai) | ✅ | ✅ | ✅ | | | | | | | |
|
||||||
|
| [Cloudflare AI Workers (`cloudflare`)](https://docs.litellm.ai/docs/providers/cloudflare_workers) | ✅ | ✅ | ✅ | | | | | | | |
|
||||||
|
| [Codestral (`codestral`)](https://docs.litellm.ai/docs/providers/codestral) | ✅ | ✅ | ✅ | | | | | | | |
|
||||||
|
| [Cohere (`cohere`)](https://docs.litellm.ai/docs/providers/cohere) | ✅ | ✅ | ✅ | ✅ | | | | | | ✅ |
|
||||||
|
| [Cohere Chat (`cohere_chat`)](https://docs.litellm.ai/docs/providers/cohere) | ✅ | ✅ | ✅ | | | | | | | |
|
||||||
|
| [CometAPI (`cometapi`)](https://docs.litellm.ai/docs/providers/cometapi) | ✅ | ✅ | ✅ | ✅ | | | | | | |
|
||||||
|
| [CompactifAI (`compactifai`)](https://docs.litellm.ai/docs/providers/compactifai) | ✅ | ✅ | ✅ | | | | | | | |
|
||||||
|
| [Custom (`custom`)](https://docs.litellm.ai/docs/providers/custom_llm_server) | ✅ | ✅ | ✅ | | | | | | | |
|
||||||
|
| [Custom OpenAI (`custom_openai`)](https://docs.litellm.ai/docs/providers/openai_compatible) | ✅ | ✅ | ✅ | | | ✅ | ✅ | ✅ | ✅ | |
|
||||||
|
| [Dashscope (`dashscope`)](https://docs.litellm.ai/docs/providers/dashscope) | ✅ | ✅ | ✅ | | | | | | | |
|
||||||
|
| [Databricks (`databricks`)](https://docs.litellm.ai/docs/providers/databricks) | ✅ | ✅ | ✅ | | | | | | | |
|
||||||
|
| [DataRobot (`datarobot`)](https://docs.litellm.ai/docs/providers/datarobot) | ✅ | ✅ | ✅ | | | | | | | |
|
||||||
|
| [Deepgram (`deepgram`)](https://docs.litellm.ai/docs/providers/deepgram) | ✅ | ✅ | ✅ | | | ✅ | | | | |
|
||||||
|
| [DeepInfra (`deepinfra`)](https://docs.litellm.ai/docs/providers/deepinfra) | ✅ | ✅ | ✅ | | | | | | | |
|
||||||
|
| [Deepseek (`deepseek`)](https://docs.litellm.ai/docs/providers/deepseek) | ✅ | ✅ | ✅ | | | | | | | |
|
||||||
|
| [ElevenLabs (`elevenlabs`)](https://docs.litellm.ai/docs/providers/elevenlabs) | ✅ | ✅ | ✅ | | | ✅ | ✅ | | | |
|
||||||
|
| [Empower (`empower`)](https://docs.litellm.ai/docs/providers/empower) | ✅ | ✅ | ✅ | | | | | | | |
|
||||||
|
| [Fal AI (`fal_ai`)](https://docs.litellm.ai/docs/providers/fal_ai) | ✅ | ✅ | ✅ | | ✅ | | | | | |
|
||||||
|
| [Featherless AI (`featherless_ai`)](https://docs.litellm.ai/docs/providers/featherless_ai) | ✅ | ✅ | ✅ | | | | | | | |
|
||||||
|
| [Fireworks AI (`fireworks_ai`)](https://docs.litellm.ai/docs/providers/fireworks_ai) | ✅ | ✅ | ✅ | | | | | | | |
|
||||||
|
| [FriendliAI (`friendliai`)](https://docs.litellm.ai/docs/providers/friendliai) | ✅ | ✅ | ✅ | | | | | | | |
|
||||||
|
| [Galadriel (`galadriel`)](https://docs.litellm.ai/docs/providers/galadriel) | ✅ | ✅ | ✅ | | | | | | | |
|
||||||
|
| [GitHub Copilot (`github_copilot`)](https://docs.litellm.ai/docs/providers/github_copilot) | ✅ | ✅ | ✅ | ✅ | | | | | | |
|
||||||
|
| [GitHub Models (`github`)](https://docs.litellm.ai/docs/providers/github) | ✅ | ✅ | ✅ | | | | | | | |
|
||||||
|
| [Google - PaLM](https://docs.litellm.ai/docs/providers/palm) | ✅ | ✅ | ✅ | | | | | | | |
|
||||||
|
| [Google - Vertex AI (`vertex_ai`)](https://docs.litellm.ai/docs/providers/vertex) | ✅ | ✅ | ✅ | ✅ | ✅ | | | | | |
|
||||||
|
| [Google AI Studio - Gemini (`gemini`)](https://docs.litellm.ai/docs/providers/gemini) | ✅ | ✅ | ✅ | | | | | | | |
|
||||||
|
| [GradientAI (`gradient_ai`)](https://docs.litellm.ai/docs/providers/gradient_ai) | ✅ | ✅ | ✅ | | | | | | | |
|
||||||
|
| [Groq AI (`groq`)](https://docs.litellm.ai/docs/providers/groq) | ✅ | ✅ | ✅ | | | | | | | |
|
||||||
|
| [Heroku (`heroku`)](https://docs.litellm.ai/docs/providers/heroku) | ✅ | ✅ | ✅ | | | | | | | |
|
||||||
|
| [Hosted VLLM (`hosted_vllm`)](https://docs.litellm.ai/docs/providers/vllm) | ✅ | ✅ | ✅ | | | | | | | |
|
||||||
|
| [Huggingface (`huggingface`)](https://docs.litellm.ai/docs/providers/huggingface) | ✅ | ✅ | ✅ | ✅ | | | | | | ✅ |
|
||||||
|
| [Hyperbolic (`hyperbolic`)](https://docs.litellm.ai/docs/providers/hyperbolic) | ✅ | ✅ | ✅ | | | | | | | |
|
||||||
|
| [IBM - Watsonx.ai (`watsonx`)](https://docs.litellm.ai/docs/providers/watsonx) | ✅ | ✅ | ✅ | ✅ | | | | | | |
|
||||||
|
| [Infinity (`infinity`)](https://docs.litellm.ai/docs/providers/infinity) | | | | ✅ | | | | | | |
|
||||||
|
| [Jina AI (`jina_ai`)](https://docs.litellm.ai/docs/providers/jina_ai) | | | | ✅ | | | | | | |
|
||||||
|
| [Lambda AI (`lambda_ai`)](https://docs.litellm.ai/docs/providers/lambda_ai) | ✅ | ✅ | ✅ | | | | | | | |
|
||||||
|
| [Lemonade (`lemonade`)](https://docs.litellm.ai/docs/providers/lemonade) | ✅ | ✅ | ✅ | | | | | | | |
|
||||||
|
| [LiteLLM Proxy (`litellm_proxy`)](https://docs.litellm.ai/docs/providers/litellm_proxy) | ✅ | ✅ | ✅ | ✅ | ✅ | | | | | |
|
||||||
|
| [Llamafile (`llamafile`)](https://docs.litellm.ai/docs/providers/llamafile) | ✅ | ✅ | ✅ | | | | | | | |
|
||||||
|
| [LM Studio (`lm_studio`)](https://docs.litellm.ai/docs/providers/lm_studio) | ✅ | ✅ | ✅ | | | | | | | |
|
||||||
|
| [Maritalk (`maritalk`)](https://docs.litellm.ai/docs/providers/maritalk) | ✅ | ✅ | ✅ | | | | | | | |
|
||||||
|
| [Meta - Llama API (`meta_llama`)](https://docs.litellm.ai/docs/providers/meta_llama) | ✅ | ✅ | ✅ | | | | | | | |
|
||||||
|
| [Mistral AI API (`mistral`)](https://docs.litellm.ai/docs/providers/mistral) | ✅ | ✅ | ✅ | ✅ | | | | | | |
|
||||||
|
| [Moonshot (`moonshot`)](https://docs.litellm.ai/docs/providers/moonshot) | ✅ | ✅ | ✅ | | | | | | | |
|
||||||
|
| [Morph (`morph`)](https://docs.litellm.ai/docs/providers/morph) | ✅ | ✅ | ✅ | | | | | | | |
|
||||||
|
| [Nebius AI Studio (`nebius`)](https://docs.litellm.ai/docs/providers/nebius) | ✅ | ✅ | ✅ | ✅ | | | | | | |
|
||||||
|
| [NLP Cloud (`nlp_cloud`)](https://docs.litellm.ai/docs/providers/nlp_cloud) | ✅ | ✅ | ✅ | | | | | | | |
|
||||||
|
| [Novita AI (`novita`)](https://novita.ai/models/llm?utm_source=github_litellm&utm_medium=github_readme&utm_campaign=github_link) | ✅ | ✅ | ✅ | | | | | | | |
|
||||||
|
| [Nscale (`nscale`)](https://docs.litellm.ai/docs/providers/nscale) | ✅ | ✅ | ✅ | | | | | | | |
|
||||||
|
| [Nvidia NIM (`nvidia_nim`)](https://docs.litellm.ai/docs/providers/nvidia_nim) | ✅ | ✅ | ✅ | | | | | | | |
|
||||||
|
| [OCI (`oci`)](https://docs.litellm.ai/docs/providers/oci) | ✅ | ✅ | ✅ | | | | | | | |
|
||||||
|
| [Ollama (`ollama`)](https://docs.litellm.ai/docs/providers/ollama) | ✅ | ✅ | ✅ | ✅ | | | | | | |
|
||||||
|
| [Ollama Chat (`ollama_chat`)](https://docs.litellm.ai/docs/providers/ollama) | ✅ | ✅ | ✅ | | | | | | | |
|
||||||
|
| [Oobabooga (`oobabooga`)](https://docs.litellm.ai/docs/providers/openai_compatible) | ✅ | ✅ | ✅ | | | ✅ | ✅ | ✅ | ✅ | |
|
||||||
|
| [OpenAI (`openai`)](https://docs.litellm.ai/docs/providers/openai) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | |
|
||||||
|
| [OpenAI-like (`openai_like`)](https://docs.litellm.ai/docs/providers/openai_compatible) | | | | ✅ | | | | | | |
|
||||||
|
| [OpenRouter (`openrouter`)](https://docs.litellm.ai/docs/providers/openrouter) | ✅ | ✅ | ✅ | | | | | | | |
|
||||||
|
| [OVHCloud AI Endpoints (`ovhcloud`)](https://docs.litellm.ai/docs/providers/ovhcloud) | ✅ | ✅ | ✅ | | | | | | | |
|
||||||
|
| [Perplexity AI (`perplexity`)](https://docs.litellm.ai/docs/providers/perplexity) | ✅ | ✅ | ✅ | | | | | | | |
|
||||||
|
| [Petals (`petals`)](https://docs.litellm.ai/docs/providers/petals) | ✅ | ✅ | ✅ | | | | | | | |
|
||||||
|
| [Predibase (`predibase`)](https://docs.litellm.ai/docs/providers/predibase) | ✅ | ✅ | ✅ | | | | | | | |
|
||||||
|
| [Recraft (`recraft`)](https://docs.litellm.ai/docs/providers/recraft) | | | | | ✅ | | | | | |
|
||||||
|
| [Replicate (`replicate`)](https://docs.litellm.ai/docs/providers/replicate) | ✅ | ✅ | ✅ | | | | | | | |
|
||||||
|
| [Sagemaker Chat (`sagemaker_chat`)](https://docs.litellm.ai/docs/providers/aws_sagemaker) | ✅ | ✅ | ✅ | | | | | | | |
|
||||||
|
| [Sambanova (`sambanova`)](https://docs.litellm.ai/docs/providers/sambanova) | ✅ | ✅ | ✅ | | | | | | | |
|
||||||
|
| [Snowflake (`snowflake`)](https://docs.litellm.ai/docs/providers/snowflake) | ✅ | ✅ | ✅ | | | | | | | |
|
||||||
|
| [Text Completion Codestral (`text-completion-codestral`)](https://docs.litellm.ai/docs/providers/codestral) | ✅ | ✅ | ✅ | | | | | | | |
|
||||||
|
| [Text Completion OpenAI (`text-completion-openai`)](https://docs.litellm.ai/docs/providers/text_completion_openai) | ✅ | ✅ | ✅ | | | ✅ | ✅ | ✅ | ✅ | |
|
||||||
|
| [Together AI (`together_ai`)](https://docs.litellm.ai/docs/providers/togetherai) | ✅ | ✅ | ✅ | | | | | | | |
|
||||||
|
| [Topaz (`topaz`)](https://docs.litellm.ai/docs/providers/topaz) | ✅ | ✅ | ✅ | | | | | | | |
|
||||||
|
| [Triton (`triton`)](https://docs.litellm.ai/docs/providers/triton-inference-server) | ✅ | ✅ | ✅ | | | | | | | |
|
||||||
|
| [V0 (`v0`)](https://docs.litellm.ai/docs/providers/v0) | ✅ | ✅ | ✅ | | | | | | | |
|
||||||
|
| [Vercel AI Gateway (`vercel_ai_gateway`)](https://docs.litellm.ai/docs/providers/vercel_ai_gateway) | ✅ | ✅ | ✅ | | | | | | | |
|
||||||
|
| [VLLM (`vllm`)](https://docs.litellm.ai/docs/providers/vllm) | ✅ | ✅ | ✅ | | | | | | | |
|
||||||
|
| [Volcengine (`volcengine`)](https://docs.litellm.ai/docs/providers/volcano) | ✅ | ✅ | ✅ | | | | | | | |
|
||||||
|
| [Voyage AI (`voyage`)](https://docs.litellm.ai/docs/providers/voyage) | | | | ✅ | | | | | | |
|
||||||
|
| [WandB Inference (`wandb`)](https://docs.litellm.ai/docs/providers/wandb_inference) | ✅ | ✅ | ✅ | | | | | | | |
|
||||||
|
| [Watsonx Text (`watsonx_text`)](https://docs.litellm.ai/docs/providers/watsonx) | ✅ | ✅ | ✅ | | | | | | | |
|
||||||
|
| [xAI (`xai`)](https://docs.litellm.ai/docs/providers/xai) | ✅ | ✅ | ✅ | | | | | | | |
|
||||||
|
| [Xinference (`xinference`)](https://docs.litellm.ai/docs/providers/xinference) | | | | ✅ | | | | | | |
|
||||||
|
|
||||||
|
[**Read the Docs**](https://docs.litellm.ai/docs/)
|
||||||
|
|
||||||
|
## Run in Developer mode
|
||||||
|
### Services
|
||||||
|
1. Setup .env file in root
|
||||||
|
2. Run dependant services `docker-compose up db prometheus`
|
||||||
|
|
||||||
|
### Backend
|
||||||
|
1. (In root) create virtual environment `python -m venv .venv`
|
||||||
|
2. Activate virtual environment `source .venv/bin/activate`
|
||||||
|
3. Install dependencies `pip install -e ".[all]"`
|
||||||
|
4. `pip install prisma`
|
||||||
|
5. `prisma generate`
|
||||||
|
6. Start proxy backend `python litellm/proxy/proxy_cli.py`
|
||||||
|
|
||||||
|
### Frontend
|
||||||
|
1. Navigate to `ui/litellm-dashboard`
|
||||||
|
2. Install dependencies `npm install`
|
||||||
|
3. Run `npm run dev` to start the dashboard
|
||||||
|
|
||||||
|
# Enterprise
|
||||||
|
For companies that need better security, user management and professional support
|
||||||
|
|
||||||
|
[Talk to founders](https://calendly.com/d/cx9p-5yf-2nm/litellm-introductions)
|
||||||
|
|
||||||
|
This covers:
|
||||||
|
- ✅ **Features under the [LiteLLM Commercial License](https://docs.litellm.ai/docs/proxy/enterprise):**
|
||||||
|
- ✅ **Feature Prioritization**
|
||||||
|
- ✅ **Custom Integrations**
|
||||||
|
- ✅ **Professional Support - Dedicated discord + slack**
|
||||||
|
- ✅ **Custom SLAs**
|
||||||
|
- ✅ **Secure access with Single Sign-On**
|
||||||
|
|
||||||
|
# Contributing
|
||||||
|
|
||||||
|
We welcome contributions to LiteLLM! Whether you're fixing bugs, adding features, or improving documentation, we appreciate your help.
|
||||||
|
|
||||||
|
## Quick Start for Contributors
|
||||||
|
|
||||||
|
This requires poetry to be installed.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
git clone https://github.com/BerriAI/litellm.git
|
||||||
|
cd litellm
|
||||||
|
make install-dev # Install development dependencies
|
||||||
|
make format # Format your code
|
||||||
|
make lint # Run all linting checks
|
||||||
|
make test-unit # Run unit tests
|
||||||
|
make format-check # Check formatting only
|
||||||
|
```
|
||||||
|
|
||||||
|
For detailed contributing guidelines, see [CONTRIBUTING.md](CONTRIBUTING.md).
|
||||||
|
|
||||||
|
## Code Quality / Linting
|
||||||
|
|
||||||
|
LiteLLM follows the [Google Python Style Guide](https://google.github.io/styleguide/pyguide.html).
|
||||||
|
|
||||||
|
Our automated checks include:
|
||||||
|
- **Black** for code formatting
|
||||||
|
- **Ruff** for linting and code quality
|
||||||
|
- **MyPy** for type checking
|
||||||
|
- **Circular import detection**
|
||||||
|
- **Import safety checks**
|
||||||
|
|
||||||
|
|
||||||
|
All these checks must pass before your PR can be merged.
|
||||||
|
|
||||||
|
|
||||||
|
# Support / talk with founders
|
||||||
|
|
||||||
|
- [Schedule Demo 👋](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version)
|
||||||
|
- [Community Discord 💭](https://discord.gg/wuPM9dRgDw)
|
||||||
|
- [Community Slack 💭](https://www.litellm.ai/support)
|
||||||
|
- Our numbers 📞 +1 (770) 8783-106 / +1 (412) 618-6238
|
||||||
|
- Our emails ✉️ ishaan@berri.ai / krrish@berri.ai
|
||||||
|
|
||||||
|
# Why did we build this
|
||||||
|
|
||||||
|
- **Need for simplicity**: Our code started to get extremely complicated managing & translating calls between Azure, OpenAI and Cohere.
|
||||||
|
|
||||||
|
# Contributors
|
||||||
|
|
||||||
|
<!-- ALL-CONTRIBUTORS-LIST:START - Do not remove or modify this section -->
|
||||||
|
<!-- prettier-ignore-start -->
|
||||||
|
<!-- markdownlint-disable -->
|
||||||
|
|
||||||
|
<!-- markdownlint-restore -->
|
||||||
|
<!-- prettier-ignore-end -->
|
||||||
|
|
||||||
|
<!-- ALL-CONTRIBUTORS-LIST:END -->
|
||||||
|
|
||||||
|
<a href="https://github.com/BerriAI/litellm/graphs/contributors">
|
||||||
|
<img src="https://contrib.rocks/image?repo=BerriAI/litellm" />
|
||||||
|
</a>
|
||||||
|
|
||||||
|
|
||||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,4 @@
|
|||||||
|
Wheel-Version: 1.0
|
||||||
|
Generator: poetry-core 1.9.1
|
||||||
|
Root-Is-Purelib: true
|
||||||
|
Tag: py3-none-any
|
||||||
@@ -0,0 +1,4 @@
|
|||||||
|
[console_scripts]
|
||||||
|
litellm=litellm:run_server
|
||||||
|
litellm-proxy=litellm.proxy.client.cli:cli
|
||||||
|
|
||||||
2170
llm-gateway-competitors/litellm-wheel-src/litellm/__init__.py
Normal file
2170
llm-gateway-competitors/litellm-wheel-src/litellm/__init__.py
Normal file
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,449 @@
|
|||||||
|
"""
|
||||||
|
Lazy Import System
|
||||||
|
|
||||||
|
This module implements lazy loading for LiteLLM attributes. Instead of importing
|
||||||
|
everything when the module loads, we only import things when they're actually used.
|
||||||
|
|
||||||
|
How it works:
|
||||||
|
1. When someone accesses `litellm.some_attribute`, Python calls __getattr__ in __init__.py
|
||||||
|
2. __getattr__ looks up the attribute name in a registry
|
||||||
|
3. The registry points to a handler function (like _lazy_import_utils)
|
||||||
|
4. The handler function imports the module and returns the attribute
|
||||||
|
5. The result is cached so we don't import it again
|
||||||
|
|
||||||
|
This makes importing litellm much faster because we don't load heavy dependencies
|
||||||
|
until they're actually needed.
|
||||||
|
"""
|
||||||
|
import importlib
|
||||||
|
import sys
|
||||||
|
from typing import Any, Optional, cast, Callable
|
||||||
|
|
||||||
|
# Import all the data structures that define what can be lazy-loaded
|
||||||
|
# These are just lists of names and maps of where to find them
|
||||||
|
from ._lazy_imports_registry import (
|
||||||
|
# Name tuples
|
||||||
|
COST_CALCULATOR_NAMES,
|
||||||
|
LITELLM_LOGGING_NAMES,
|
||||||
|
UTILS_NAMES,
|
||||||
|
TOKEN_COUNTER_NAMES,
|
||||||
|
LLM_CLIENT_CACHE_NAMES,
|
||||||
|
BEDROCK_TYPES_NAMES,
|
||||||
|
TYPES_UTILS_NAMES,
|
||||||
|
CACHING_NAMES,
|
||||||
|
HTTP_HANDLER_NAMES,
|
||||||
|
DOTPROMPT_NAMES,
|
||||||
|
LLM_CONFIG_NAMES,
|
||||||
|
TYPES_NAMES,
|
||||||
|
LLM_PROVIDER_LOGIC_NAMES,
|
||||||
|
UTILS_MODULE_NAMES,
|
||||||
|
# Import maps
|
||||||
|
_UTILS_IMPORT_MAP,
|
||||||
|
_COST_CALCULATOR_IMPORT_MAP,
|
||||||
|
_TYPES_UTILS_IMPORT_MAP,
|
||||||
|
_TOKEN_COUNTER_IMPORT_MAP,
|
||||||
|
_BEDROCK_TYPES_IMPORT_MAP,
|
||||||
|
_CACHING_IMPORT_MAP,
|
||||||
|
_LITELLM_LOGGING_IMPORT_MAP,
|
||||||
|
_DOTPROMPT_IMPORT_MAP,
|
||||||
|
_TYPES_IMPORT_MAP,
|
||||||
|
_LLM_CONFIGS_IMPORT_MAP,
|
||||||
|
_LLM_PROVIDER_LOGIC_IMPORT_MAP,
|
||||||
|
_UTILS_MODULE_IMPORT_MAP,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _get_litellm_globals() -> dict:
|
||||||
|
"""
|
||||||
|
Get the globals dictionary of the litellm module.
|
||||||
|
|
||||||
|
This is where we cache imported attributes so we don't import them twice.
|
||||||
|
When you do `litellm.some_function`, it gets stored in this dictionary.
|
||||||
|
"""
|
||||||
|
return sys.modules["litellm"].__dict__
|
||||||
|
|
||||||
|
|
||||||
|
def _get_utils_globals() -> dict:
|
||||||
|
"""
|
||||||
|
Get the globals dictionary of the utils module.
|
||||||
|
|
||||||
|
This is where we cache imported attributes so we don't import them twice.
|
||||||
|
When you do `litellm.utils.some_function`, it gets stored in this dictionary.
|
||||||
|
"""
|
||||||
|
return sys.modules["litellm.utils"].__dict__
|
||||||
|
|
||||||
|
|
||||||
|
# These are special lazy loaders for things that are used internally
|
||||||
|
# They're separate from the main lazy import system because they have specific use cases
|
||||||
|
|
||||||
|
# Lazy loader for default encoding - avoids importing heavy tiktoken library at startup
|
||||||
|
_default_encoding: Optional[Any] = None
|
||||||
|
|
||||||
|
|
||||||
|
def _get_default_encoding() -> Any:
|
||||||
|
"""
|
||||||
|
Lazily load and cache the default OpenAI encoding.
|
||||||
|
|
||||||
|
This avoids importing `litellm.litellm_core_utils.default_encoding` (and thus tiktoken)
|
||||||
|
at `litellm` import time. The encoding is cached after the first import.
|
||||||
|
|
||||||
|
This is used internally by utils.py functions that need the encoding but shouldn't
|
||||||
|
trigger its import during module load.
|
||||||
|
"""
|
||||||
|
global _default_encoding
|
||||||
|
if _default_encoding is None:
|
||||||
|
from litellm.litellm_core_utils.default_encoding import encoding
|
||||||
|
|
||||||
|
_default_encoding = encoding
|
||||||
|
return _default_encoding
|
||||||
|
|
||||||
|
|
||||||
|
# Lazy loader for get_modified_max_tokens to avoid importing token_counter at module import time
|
||||||
|
_get_modified_max_tokens_func: Optional[Any] = None
|
||||||
|
|
||||||
|
|
||||||
|
def _get_modified_max_tokens() -> Any:
|
||||||
|
"""
|
||||||
|
Lazily load and cache the get_modified_max_tokens function.
|
||||||
|
|
||||||
|
This avoids importing `litellm.litellm_core_utils.token_counter` at `litellm` import time.
|
||||||
|
The function is cached after the first import.
|
||||||
|
|
||||||
|
This is used internally by utils.py functions that need the token counter but shouldn't
|
||||||
|
trigger its import during module load.
|
||||||
|
"""
|
||||||
|
global _get_modified_max_tokens_func
|
||||||
|
if _get_modified_max_tokens_func is None:
|
||||||
|
from litellm.litellm_core_utils.token_counter import (
|
||||||
|
get_modified_max_tokens as _get_modified_max_tokens_imported,
|
||||||
|
)
|
||||||
|
|
||||||
|
_get_modified_max_tokens_func = _get_modified_max_tokens_imported
|
||||||
|
return _get_modified_max_tokens_func
|
||||||
|
|
||||||
|
|
||||||
|
# Lazy loader for token_counter to avoid importing token_counter module at module import time
|
||||||
|
_token_counter_new_func: Optional[Any] = None
|
||||||
|
|
||||||
|
|
||||||
|
def _get_token_counter_new() -> Any:
|
||||||
|
"""
|
||||||
|
Lazily load and cache the token_counter function (aliased as token_counter_new).
|
||||||
|
|
||||||
|
This avoids importing `litellm.litellm_core_utils.token_counter` at `litellm` import time.
|
||||||
|
The function is cached after the first import.
|
||||||
|
|
||||||
|
This is used internally by utils.py functions that need the token counter but shouldn't
|
||||||
|
trigger its import during module load.
|
||||||
|
"""
|
||||||
|
global _token_counter_new_func
|
||||||
|
if _token_counter_new_func is None:
|
||||||
|
from litellm.litellm_core_utils.token_counter import (
|
||||||
|
token_counter as _token_counter_imported,
|
||||||
|
)
|
||||||
|
|
||||||
|
_token_counter_new_func = _token_counter_imported
|
||||||
|
return _token_counter_new_func
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# MAIN LAZY IMPORT SYSTEM
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
# This registry maps attribute names (like "ModelResponse") to handler functions
|
||||||
|
# It's built once the first time someone accesses a lazy-loaded attribute
|
||||||
|
# Example: {"ModelResponse": _lazy_import_utils, "Cache": _lazy_import_caching, ...}
|
||||||
|
_LAZY_IMPORT_REGISTRY: Optional[dict[str, Callable[[str], Any]]] = None
|
||||||
|
|
||||||
|
|
||||||
|
def _get_lazy_import_registry() -> dict[str, Callable[[str], Any]]:
|
||||||
|
"""
|
||||||
|
Build the registry that maps attribute names to their handler functions.
|
||||||
|
|
||||||
|
This is called once, the first time someone accesses a lazy-loaded attribute.
|
||||||
|
After that, we just look up the handler function in this dictionary.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dictionary like {"ModelResponse": _lazy_import_utils, ...}
|
||||||
|
"""
|
||||||
|
global _LAZY_IMPORT_REGISTRY
|
||||||
|
if _LAZY_IMPORT_REGISTRY is None:
|
||||||
|
# Build the registry by going through each category and mapping
|
||||||
|
# all the names in that category to their handler function
|
||||||
|
_LAZY_IMPORT_REGISTRY = {}
|
||||||
|
# For each category, map all its names to the handler function
|
||||||
|
# Example: All names in UTILS_NAMES get mapped to _lazy_import_utils
|
||||||
|
for name in COST_CALCULATOR_NAMES:
|
||||||
|
_LAZY_IMPORT_REGISTRY[name] = _lazy_import_cost_calculator
|
||||||
|
for name in LITELLM_LOGGING_NAMES:
|
||||||
|
_LAZY_IMPORT_REGISTRY[name] = _lazy_import_litellm_logging
|
||||||
|
for name in UTILS_NAMES:
|
||||||
|
_LAZY_IMPORT_REGISTRY[name] = _lazy_import_utils
|
||||||
|
for name in TOKEN_COUNTER_NAMES:
|
||||||
|
_LAZY_IMPORT_REGISTRY[name] = _lazy_import_token_counter
|
||||||
|
for name in LLM_CLIENT_CACHE_NAMES:
|
||||||
|
_LAZY_IMPORT_REGISTRY[name] = _lazy_import_llm_client_cache
|
||||||
|
for name in BEDROCK_TYPES_NAMES:
|
||||||
|
_LAZY_IMPORT_REGISTRY[name] = _lazy_import_bedrock_types
|
||||||
|
for name in TYPES_UTILS_NAMES:
|
||||||
|
_LAZY_IMPORT_REGISTRY[name] = _lazy_import_types_utils
|
||||||
|
for name in CACHING_NAMES:
|
||||||
|
_LAZY_IMPORT_REGISTRY[name] = _lazy_import_caching
|
||||||
|
for name in HTTP_HANDLER_NAMES:
|
||||||
|
_LAZY_IMPORT_REGISTRY[name] = _lazy_import_http_handlers
|
||||||
|
for name in DOTPROMPT_NAMES:
|
||||||
|
_LAZY_IMPORT_REGISTRY[name] = _lazy_import_dotprompt
|
||||||
|
for name in LLM_CONFIG_NAMES:
|
||||||
|
_LAZY_IMPORT_REGISTRY[name] = _lazy_import_llm_configs
|
||||||
|
for name in TYPES_NAMES:
|
||||||
|
_LAZY_IMPORT_REGISTRY[name] = _lazy_import_types
|
||||||
|
for name in LLM_PROVIDER_LOGIC_NAMES:
|
||||||
|
_LAZY_IMPORT_REGISTRY[name] = _lazy_import_llm_provider_logic
|
||||||
|
for name in UTILS_MODULE_NAMES:
|
||||||
|
_LAZY_IMPORT_REGISTRY[name] = _lazy_import_utils_module
|
||||||
|
|
||||||
|
return _LAZY_IMPORT_REGISTRY
|
||||||
|
|
||||||
|
|
||||||
|
def _generic_lazy_import(
|
||||||
|
name: str, import_map: dict[str, tuple[str, str]], category: str
|
||||||
|
) -> Any:
|
||||||
|
"""
|
||||||
|
Generic function that handles lazy importing for most attributes.
|
||||||
|
|
||||||
|
This is the workhorse function - it does the actual importing and caching.
|
||||||
|
Most handler functions just call this with their specific import map.
|
||||||
|
|
||||||
|
Steps:
|
||||||
|
1. Check if the name exists in the import map (if not, raise error)
|
||||||
|
2. Check if we've already imported it (if yes, return cached value)
|
||||||
|
3. Look up where to find it (module_path and attr_name from the map)
|
||||||
|
4. Import the module (Python caches this automatically)
|
||||||
|
5. Get the attribute from the module
|
||||||
|
6. Cache it in _globals so we don't import again
|
||||||
|
7. Return it
|
||||||
|
|
||||||
|
Args:
|
||||||
|
name: The attribute name someone is trying to access (e.g., "ModelResponse")
|
||||||
|
import_map: Dictionary telling us where to find each attribute
|
||||||
|
Format: {"ModelResponse": (".utils", "ModelResponse")}
|
||||||
|
category: Just for error messages (e.g., "Utils", "Cost calculator")
|
||||||
|
"""
|
||||||
|
# Step 1: Make sure this attribute exists in our map
|
||||||
|
if name not in import_map:
|
||||||
|
raise AttributeError(f"{category} lazy import: unknown attribute {name!r}")
|
||||||
|
|
||||||
|
# Step 2: Get the cache (where we store imported things)
|
||||||
|
_globals = _get_litellm_globals()
|
||||||
|
|
||||||
|
# Step 3: If we've already imported it, just return the cached version
|
||||||
|
if name in _globals:
|
||||||
|
return _globals[name]
|
||||||
|
|
||||||
|
# Step 4: Look up where to find this attribute
|
||||||
|
# The map tells us: (module_path, attribute_name)
|
||||||
|
# Example: (".utils", "ModelResponse") means "look in .utils module, get ModelResponse"
|
||||||
|
module_path, attr_name = import_map[name]
|
||||||
|
|
||||||
|
# Step 5: Import the module
|
||||||
|
# Python automatically caches modules in sys.modules, so calling this twice is fast
|
||||||
|
# If module_path starts with ".", it's a relative import (needs package="litellm")
|
||||||
|
# Otherwise it's an absolute import (like "litellm.caching.caching")
|
||||||
|
if module_path.startswith("."):
|
||||||
|
module = importlib.import_module(module_path, package="litellm")
|
||||||
|
else:
|
||||||
|
module = importlib.import_module(module_path)
|
||||||
|
|
||||||
|
# Step 6: Get the actual attribute from the module
|
||||||
|
# Example: getattr(utils_module, "ModelResponse") returns the ModelResponse class
|
||||||
|
value = getattr(module, attr_name)
|
||||||
|
|
||||||
|
# Step 7: Cache it so we don't have to import again next time
|
||||||
|
_globals[name] = value
|
||||||
|
|
||||||
|
# Step 8: Return it
|
||||||
|
return value
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# HANDLER FUNCTIONS
|
||||||
|
# ============================================================================
|
||||||
|
# These functions are called when someone accesses a lazy-loaded attribute.
|
||||||
|
# Most of them just call _generic_lazy_import with their specific import map.
|
||||||
|
# The registry (above) maps attribute names to these handler functions.
|
||||||
|
|
||||||
|
|
||||||
|
def _lazy_import_utils(name: str) -> Any:
|
||||||
|
"""Handler for utils module attributes (ModelResponse, token_counter, etc.)"""
|
||||||
|
return _generic_lazy_import(name, _UTILS_IMPORT_MAP, "Utils")
|
||||||
|
|
||||||
|
|
||||||
|
def _lazy_import_cost_calculator(name: str) -> Any:
|
||||||
|
"""Handler for cost calculator functions (completion_cost, cost_per_token, etc.)"""
|
||||||
|
return _generic_lazy_import(name, _COST_CALCULATOR_IMPORT_MAP, "Cost calculator")
|
||||||
|
|
||||||
|
|
||||||
|
def _lazy_import_token_counter(name: str) -> Any:
|
||||||
|
"""Handler for token counter utilities"""
|
||||||
|
return _generic_lazy_import(name, _TOKEN_COUNTER_IMPORT_MAP, "Token counter")
|
||||||
|
|
||||||
|
|
||||||
|
def _lazy_import_bedrock_types(name: str) -> Any:
|
||||||
|
"""Handler for Bedrock type aliases"""
|
||||||
|
return _generic_lazy_import(name, _BEDROCK_TYPES_IMPORT_MAP, "Bedrock types")
|
||||||
|
|
||||||
|
|
||||||
|
def _lazy_import_types_utils(name: str) -> Any:
|
||||||
|
"""Handler for types from litellm.types.utils (BudgetConfig, ImageObject, etc.)"""
|
||||||
|
return _generic_lazy_import(name, _TYPES_UTILS_IMPORT_MAP, "Types utils")
|
||||||
|
|
||||||
|
|
||||||
|
def _lazy_import_caching(name: str) -> Any:
|
||||||
|
"""Handler for caching classes (Cache, DualCache, RedisCache, etc.)"""
|
||||||
|
return _generic_lazy_import(name, _CACHING_IMPORT_MAP, "Caching")
|
||||||
|
|
||||||
|
|
||||||
|
def _lazy_import_dotprompt(name: str) -> Any:
|
||||||
|
"""Handler for dotprompt integration globals"""
|
||||||
|
return _generic_lazy_import(name, _DOTPROMPT_IMPORT_MAP, "Dotprompt")
|
||||||
|
|
||||||
|
|
||||||
|
def _lazy_import_types(name: str) -> Any:
|
||||||
|
"""Handler for type classes (GuardrailItem, etc.)"""
|
||||||
|
return _generic_lazy_import(name, _TYPES_IMPORT_MAP, "Types")
|
||||||
|
|
||||||
|
|
||||||
|
def _lazy_import_llm_configs(name: str) -> Any:
|
||||||
|
"""Handler for LLM config classes (AnthropicConfig, OpenAILikeChatConfig, etc.)"""
|
||||||
|
return _generic_lazy_import(name, _LLM_CONFIGS_IMPORT_MAP, "LLM config")
|
||||||
|
|
||||||
|
|
||||||
|
def _lazy_import_litellm_logging(name: str) -> Any:
|
||||||
|
"""Handler for litellm_logging module (Logging, modify_integration)"""
|
||||||
|
return _generic_lazy_import(name, _LITELLM_LOGGING_IMPORT_MAP, "Litellm logging")
|
||||||
|
|
||||||
|
|
||||||
|
def _lazy_import_llm_provider_logic(name: str) -> Any:
|
||||||
|
"""Handler for LLM provider logic functions (get_llm_provider, etc.)"""
|
||||||
|
return _generic_lazy_import(
|
||||||
|
name, _LLM_PROVIDER_LOGIC_IMPORT_MAP, "LLM provider logic"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _lazy_import_utils_module(name: str) -> Any:
|
||||||
|
"""
|
||||||
|
Handler for utils module lazy imports.
|
||||||
|
|
||||||
|
This uses a custom implementation because utils module needs to use
|
||||||
|
_get_utils_globals() instead of _get_litellm_globals() for caching.
|
||||||
|
"""
|
||||||
|
# Check if this attribute exists in our map
|
||||||
|
if name not in _UTILS_MODULE_IMPORT_MAP:
|
||||||
|
raise AttributeError(f"Utils module lazy import: unknown attribute {name!r}")
|
||||||
|
|
||||||
|
# Get the cache (where we store imported things) - use utils globals
|
||||||
|
_globals = _get_utils_globals()
|
||||||
|
|
||||||
|
# If we've already imported it, just return the cached version
|
||||||
|
if name in _globals:
|
||||||
|
return _globals[name]
|
||||||
|
|
||||||
|
# Look up where to find this attribute
|
||||||
|
module_path, attr_name = _UTILS_MODULE_IMPORT_MAP[name]
|
||||||
|
|
||||||
|
# Import the module
|
||||||
|
if module_path.startswith("."):
|
||||||
|
module = importlib.import_module(module_path, package="litellm")
|
||||||
|
else:
|
||||||
|
module = importlib.import_module(module_path)
|
||||||
|
|
||||||
|
# Get the actual attribute from the module
|
||||||
|
value = getattr(module, attr_name)
|
||||||
|
|
||||||
|
# Cache it so we don't have to import again next time
|
||||||
|
_globals[name] = value
|
||||||
|
|
||||||
|
# Return it
|
||||||
|
return value
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# SPECIAL HANDLERS
|
||||||
|
# ============================================================================
|
||||||
|
# These handlers have custom logic that doesn't fit the generic pattern
|
||||||
|
|
||||||
|
|
||||||
|
def _lazy_import_llm_client_cache(name: str) -> Any:
|
||||||
|
"""
|
||||||
|
Handler for LLM client cache - has special logic for singleton instance.
|
||||||
|
|
||||||
|
This one is different because:
|
||||||
|
- "LLMClientCache" is the class itself
|
||||||
|
- "in_memory_llm_clients_cache" is a singleton instance of that class
|
||||||
|
So we need custom logic to handle both cases.
|
||||||
|
"""
|
||||||
|
_globals = _get_litellm_globals()
|
||||||
|
|
||||||
|
# If already cached, return it
|
||||||
|
if name in _globals:
|
||||||
|
return _globals[name]
|
||||||
|
|
||||||
|
# Import the class
|
||||||
|
module = importlib.import_module("litellm.caching.llm_caching_handler")
|
||||||
|
LLMClientCache = getattr(module, "LLMClientCache")
|
||||||
|
|
||||||
|
# If they want the class itself, return it
|
||||||
|
if name == "LLMClientCache":
|
||||||
|
_globals["LLMClientCache"] = LLMClientCache
|
||||||
|
return LLMClientCache
|
||||||
|
|
||||||
|
# If they want the singleton instance, create it (only once)
|
||||||
|
if name == "in_memory_llm_clients_cache":
|
||||||
|
instance = LLMClientCache()
|
||||||
|
_globals["in_memory_llm_clients_cache"] = instance
|
||||||
|
return instance
|
||||||
|
|
||||||
|
raise AttributeError(f"LLM client cache lazy import: unknown attribute {name!r}")
|
||||||
|
|
||||||
|
|
||||||
|
def _lazy_import_http_handlers(name: str) -> Any:
|
||||||
|
"""
|
||||||
|
Handler for HTTP clients - has special logic for creating client instances.
|
||||||
|
|
||||||
|
This one is different because:
|
||||||
|
- These aren't just imports, they're actual client instances that need to be created
|
||||||
|
- They need configuration (timeout, etc.) from the module globals
|
||||||
|
- They use factory functions instead of direct instantiation
|
||||||
|
"""
|
||||||
|
_globals = _get_litellm_globals()
|
||||||
|
|
||||||
|
if name == "module_level_aclient":
|
||||||
|
# Create an async HTTP client using the factory function
|
||||||
|
from litellm.llms.custom_httpx.http_handler import get_async_httpx_client
|
||||||
|
|
||||||
|
# Get timeout from module config (if set)
|
||||||
|
timeout = _globals.get("request_timeout")
|
||||||
|
params = {"timeout": timeout, "client_alias": "module level aclient"}
|
||||||
|
|
||||||
|
# Create the client instance
|
||||||
|
provider_id = cast(Any, "litellm_module_level_client")
|
||||||
|
async_client = get_async_httpx_client(
|
||||||
|
llm_provider=provider_id,
|
||||||
|
params=params,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Cache it so we don't create it again
|
||||||
|
_globals["module_level_aclient"] = async_client
|
||||||
|
return async_client
|
||||||
|
|
||||||
|
if name == "module_level_client":
|
||||||
|
# Create a sync HTTP client
|
||||||
|
from litellm.llms.custom_httpx.http_handler import HTTPHandler
|
||||||
|
|
||||||
|
timeout = _globals.get("request_timeout")
|
||||||
|
sync_client = HTTPHandler(timeout=timeout)
|
||||||
|
|
||||||
|
# Cache it
|
||||||
|
_globals["module_level_client"] = sync_client
|
||||||
|
return sync_client
|
||||||
|
|
||||||
|
raise AttributeError(f"HTTP handlers lazy import: unknown attribute {name!r}")
|
||||||
File diff suppressed because it is too large
Load Diff
352
llm-gateway-competitors/litellm-wheel-src/litellm/_logging.py
Normal file
352
llm-gateway-competitors/litellm-wheel-src/litellm/_logging.py
Normal file
@@ -0,0 +1,352 @@
|
|||||||
|
import ast
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
from datetime import datetime
|
||||||
|
from logging import Formatter
|
||||||
|
from typing import Any, Dict, Optional
|
||||||
|
|
||||||
|
from litellm.litellm_core_utils.safe_json_dumps import safe_dumps
|
||||||
|
from litellm.litellm_core_utils.safe_json_loads import safe_json_loads
|
||||||
|
|
||||||
|
set_verbose = False
|
||||||
|
|
||||||
|
if set_verbose is True:
|
||||||
|
logging.warning(
|
||||||
|
"`litellm.set_verbose` is deprecated. Please set `os.environ['LITELLM_LOG'] = 'DEBUG'` for debug logs."
|
||||||
|
)
|
||||||
|
json_logs = bool(os.getenv("JSON_LOGS", False))
|
||||||
|
# Create a handler for the logger (you may need to adapt this based on your needs)
|
||||||
|
log_level = os.getenv("LITELLM_LOG", "DEBUG")
|
||||||
|
numeric_level: str = getattr(logging, log_level.upper())
|
||||||
|
handler = logging.StreamHandler()
|
||||||
|
handler.setLevel(numeric_level)
|
||||||
|
|
||||||
|
|
||||||
|
def _try_parse_json_message(message: str) -> Optional[Dict[str, Any]]:
|
||||||
|
"""
|
||||||
|
Try to parse a log message as JSON. Returns parsed dict if valid, else None.
|
||||||
|
Handles messages that are entirely valid JSON (e.g. json.dumps output).
|
||||||
|
Uses shared safe_json_loads for consistent error handling.
|
||||||
|
"""
|
||||||
|
if not message or not isinstance(message, str):
|
||||||
|
return None
|
||||||
|
msg_stripped = message.strip()
|
||||||
|
if not (msg_stripped.startswith("{") or msg_stripped.startswith("[")):
|
||||||
|
return None
|
||||||
|
parsed = safe_json_loads(message, default=None)
|
||||||
|
if parsed is None or not isinstance(parsed, dict):
|
||||||
|
return None
|
||||||
|
return parsed
|
||||||
|
|
||||||
|
|
||||||
|
def _try_parse_embedded_python_dict(message: str) -> Optional[Dict[str, Any]]:
|
||||||
|
"""
|
||||||
|
Try to find and parse a Python dict repr (e.g. str(d) or repr(d)) embedded in
|
||||||
|
the message. Handles patterns like:
|
||||||
|
"get_available_deployment for model: X, Selected deployment: {'model_name': '...', ...} for model: X"
|
||||||
|
Uses ast.literal_eval for safe parsing. Returns the parsed dict or None.
|
||||||
|
"""
|
||||||
|
if not message or not isinstance(message, str) or "{" not in message:
|
||||||
|
return None
|
||||||
|
i = 0
|
||||||
|
while i < len(message):
|
||||||
|
start = message.find("{", i)
|
||||||
|
if start == -1:
|
||||||
|
break
|
||||||
|
depth = 0
|
||||||
|
for j in range(start, len(message)):
|
||||||
|
c = message[j]
|
||||||
|
if c == "{":
|
||||||
|
depth += 1
|
||||||
|
elif c == "}":
|
||||||
|
depth -= 1
|
||||||
|
if depth == 0:
|
||||||
|
substr = message[start : j + 1]
|
||||||
|
try:
|
||||||
|
result = ast.literal_eval(substr)
|
||||||
|
if isinstance(result, dict) and len(result) > 0:
|
||||||
|
return result
|
||||||
|
except (ValueError, SyntaxError, TypeError):
|
||||||
|
pass
|
||||||
|
break
|
||||||
|
i = start + 1
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
# Standard LogRecord attribute names - used to identify 'extra' fields.
|
||||||
|
# Derived at runtime so we automatically include version-specific attrs (e.g. taskName).
|
||||||
|
def _get_standard_record_attrs() -> frozenset:
|
||||||
|
"""Standard LogRecord attribute names - excludes extra keys from logger.debug(..., extra={...})."""
|
||||||
|
return frozenset(logging.LogRecord("", 0, "", 0, "", (), None).__dict__.keys())
|
||||||
|
|
||||||
|
|
||||||
|
_STANDARD_RECORD_ATTRS = _get_standard_record_attrs()
|
||||||
|
|
||||||
|
|
||||||
|
class JsonFormatter(Formatter):
|
||||||
|
def __init__(self):
|
||||||
|
super(JsonFormatter, self).__init__()
|
||||||
|
|
||||||
|
def formatTime(self, record, datefmt=None):
|
||||||
|
# Use datetime to format the timestamp in ISO 8601 format
|
||||||
|
dt = datetime.fromtimestamp(record.created)
|
||||||
|
return dt.isoformat()
|
||||||
|
|
||||||
|
def format(self, record):
|
||||||
|
message_str = record.getMessage()
|
||||||
|
json_record: Dict[str, Any] = {
|
||||||
|
"message": message_str,
|
||||||
|
"level": record.levelname,
|
||||||
|
"timestamp": self.formatTime(record),
|
||||||
|
}
|
||||||
|
|
||||||
|
# Parse embedded JSON or Python dict repr in message so sub-fields become first-class properties
|
||||||
|
parsed = _try_parse_json_message(message_str)
|
||||||
|
if parsed is None:
|
||||||
|
parsed = _try_parse_embedded_python_dict(message_str)
|
||||||
|
if parsed is not None:
|
||||||
|
for key, value in parsed.items():
|
||||||
|
if key not in json_record:
|
||||||
|
json_record[key] = value
|
||||||
|
|
||||||
|
# Include extra attributes passed via logger.debug("msg", extra={...})
|
||||||
|
for key, value in record.__dict__.items():
|
||||||
|
if key not in _STANDARD_RECORD_ATTRS and key not in json_record:
|
||||||
|
json_record[key] = value
|
||||||
|
|
||||||
|
if record.exc_info:
|
||||||
|
json_record["stacktrace"] = self.formatException(record.exc_info)
|
||||||
|
|
||||||
|
return safe_dumps(json_record)
|
||||||
|
|
||||||
|
|
||||||
|
# Function to set up exception handlers for JSON logging
|
||||||
|
def _setup_json_exception_handlers(formatter):
|
||||||
|
# Create a handler with JSON formatting for exceptions
|
||||||
|
error_handler = logging.StreamHandler()
|
||||||
|
error_handler.setFormatter(formatter)
|
||||||
|
|
||||||
|
# Setup excepthook for uncaught exceptions
|
||||||
|
def json_excepthook(exc_type, exc_value, exc_traceback):
|
||||||
|
record = logging.LogRecord(
|
||||||
|
name="LiteLLM",
|
||||||
|
level=logging.ERROR,
|
||||||
|
pathname="",
|
||||||
|
lineno=0,
|
||||||
|
msg=str(exc_value),
|
||||||
|
args=(),
|
||||||
|
exc_info=(exc_type, exc_value, exc_traceback),
|
||||||
|
)
|
||||||
|
error_handler.handle(record)
|
||||||
|
|
||||||
|
sys.excepthook = json_excepthook
|
||||||
|
|
||||||
|
# Configure asyncio exception handler if possible
|
||||||
|
try:
|
||||||
|
import asyncio
|
||||||
|
|
||||||
|
def async_json_exception_handler(loop, context):
|
||||||
|
exception = context.get("exception")
|
||||||
|
if exception:
|
||||||
|
record = logging.LogRecord(
|
||||||
|
name="LiteLLM",
|
||||||
|
level=logging.ERROR,
|
||||||
|
pathname="",
|
||||||
|
lineno=0,
|
||||||
|
msg=str(exception),
|
||||||
|
args=(),
|
||||||
|
exc_info=None,
|
||||||
|
)
|
||||||
|
error_handler.handle(record)
|
||||||
|
else:
|
||||||
|
loop.default_exception_handler(context)
|
||||||
|
|
||||||
|
asyncio.get_event_loop().set_exception_handler(async_json_exception_handler)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
# Create a formatter and set it for the handler
|
||||||
|
if json_logs:
|
||||||
|
handler.setFormatter(JsonFormatter())
|
||||||
|
_setup_json_exception_handlers(JsonFormatter())
|
||||||
|
else:
|
||||||
|
formatter = logging.Formatter(
|
||||||
|
"\033[92m%(asctime)s - %(name)s:%(levelname)s\033[0m: %(filename)s:%(lineno)s - %(message)s",
|
||||||
|
datefmt="%H:%M:%S",
|
||||||
|
)
|
||||||
|
|
||||||
|
handler.setFormatter(formatter)
|
||||||
|
|
||||||
|
verbose_proxy_logger = logging.getLogger("LiteLLM Proxy")
|
||||||
|
verbose_router_logger = logging.getLogger("LiteLLM Router")
|
||||||
|
verbose_logger = logging.getLogger("LiteLLM")
|
||||||
|
|
||||||
|
# Add the handler to the logger
|
||||||
|
verbose_router_logger.addHandler(handler)
|
||||||
|
verbose_proxy_logger.addHandler(handler)
|
||||||
|
verbose_logger.addHandler(handler)
|
||||||
|
|
||||||
|
|
||||||
|
def _suppress_loggers():
|
||||||
|
"""Suppress noisy loggers at INFO level"""
|
||||||
|
# Suppress httpx request logging at INFO level
|
||||||
|
httpx_logger = logging.getLogger("httpx")
|
||||||
|
httpx_logger.setLevel(logging.WARNING)
|
||||||
|
|
||||||
|
# Suppress APScheduler logging at INFO level
|
||||||
|
apscheduler_executors_logger = logging.getLogger("apscheduler.executors.default")
|
||||||
|
apscheduler_executors_logger.setLevel(logging.WARNING)
|
||||||
|
apscheduler_scheduler_logger = logging.getLogger("apscheduler.scheduler")
|
||||||
|
apscheduler_scheduler_logger.setLevel(logging.WARNING)
|
||||||
|
|
||||||
|
|
||||||
|
# Call the suppression function
|
||||||
|
_suppress_loggers()
|
||||||
|
|
||||||
|
ALL_LOGGERS = [
|
||||||
|
logging.getLogger(),
|
||||||
|
verbose_logger,
|
||||||
|
verbose_router_logger,
|
||||||
|
verbose_proxy_logger,
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def _get_loggers_to_initialize():
|
||||||
|
"""
|
||||||
|
Get all loggers that should be initialized with the JSON handler.
|
||||||
|
|
||||||
|
Includes third-party integration loggers (like langfuse) if they are
|
||||||
|
configured as callbacks.
|
||||||
|
"""
|
||||||
|
import litellm
|
||||||
|
|
||||||
|
loggers = list(ALL_LOGGERS)
|
||||||
|
|
||||||
|
# Add langfuse logger if langfuse is being used as a callback
|
||||||
|
langfuse_callbacks = {"langfuse", "langfuse_otel"}
|
||||||
|
all_callbacks = set(litellm.success_callback + litellm.failure_callback)
|
||||||
|
if langfuse_callbacks & all_callbacks:
|
||||||
|
loggers.append(logging.getLogger("langfuse"))
|
||||||
|
|
||||||
|
return loggers
|
||||||
|
|
||||||
|
|
||||||
|
def _initialize_loggers_with_handler(handler: logging.Handler):
|
||||||
|
"""
|
||||||
|
Initialize all loggers with a handler
|
||||||
|
|
||||||
|
- Adds a handler to each logger
|
||||||
|
- Prevents bubbling to parent/root (critical to prevent duplicate JSON logs)
|
||||||
|
"""
|
||||||
|
for lg in _get_loggers_to_initialize():
|
||||||
|
lg.handlers.clear() # remove any existing handlers
|
||||||
|
lg.addHandler(handler) # add JSON formatter handler
|
||||||
|
lg.propagate = False # prevent bubbling to parent/root
|
||||||
|
|
||||||
|
|
||||||
|
def _get_uvicorn_json_log_config():
|
||||||
|
"""
|
||||||
|
Generate a uvicorn log_config dictionary that applies JSON formatting to all loggers.
|
||||||
|
|
||||||
|
This ensures that uvicorn's access logs, error logs, and all application logs
|
||||||
|
are formatted as JSON when json_logs is enabled.
|
||||||
|
"""
|
||||||
|
json_formatter_class = "litellm._logging.JsonFormatter"
|
||||||
|
|
||||||
|
# Use the module-level log_level variable for consistency
|
||||||
|
uvicorn_log_level = log_level.upper()
|
||||||
|
|
||||||
|
log_config = {
|
||||||
|
"version": 1,
|
||||||
|
"disable_existing_loggers": False,
|
||||||
|
"formatters": {
|
||||||
|
"json": {
|
||||||
|
"()": json_formatter_class,
|
||||||
|
},
|
||||||
|
"default": {
|
||||||
|
"()": json_formatter_class,
|
||||||
|
},
|
||||||
|
"access": {
|
||||||
|
"()": json_formatter_class,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"handlers": {
|
||||||
|
"default": {
|
||||||
|
"formatter": "json",
|
||||||
|
"class": "logging.StreamHandler",
|
||||||
|
"stream": "ext://sys.stdout",
|
||||||
|
},
|
||||||
|
"access": {
|
||||||
|
"formatter": "access",
|
||||||
|
"class": "logging.StreamHandler",
|
||||||
|
"stream": "ext://sys.stdout",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"loggers": {
|
||||||
|
"uvicorn": {
|
||||||
|
"handlers": ["default"],
|
||||||
|
"level": uvicorn_log_level,
|
||||||
|
"propagate": False,
|
||||||
|
},
|
||||||
|
"uvicorn.error": {
|
||||||
|
"handlers": ["default"],
|
||||||
|
"level": uvicorn_log_level,
|
||||||
|
"propagate": False,
|
||||||
|
},
|
||||||
|
"uvicorn.access": {
|
||||||
|
"handlers": ["access"],
|
||||||
|
"level": uvicorn_log_level,
|
||||||
|
"propagate": False,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
return log_config
|
||||||
|
|
||||||
|
|
||||||
|
def _turn_on_json():
|
||||||
|
"""
|
||||||
|
Turn on JSON logging
|
||||||
|
|
||||||
|
- Adds a JSON formatter to all loggers
|
||||||
|
"""
|
||||||
|
handler = logging.StreamHandler()
|
||||||
|
handler.setFormatter(JsonFormatter())
|
||||||
|
_initialize_loggers_with_handler(handler)
|
||||||
|
# Set up exception handlers
|
||||||
|
_setup_json_exception_handlers(JsonFormatter())
|
||||||
|
|
||||||
|
|
||||||
|
def _turn_on_debug():
|
||||||
|
verbose_logger.setLevel(level=logging.DEBUG) # set package log to debug
|
||||||
|
verbose_router_logger.setLevel(level=logging.DEBUG) # set router logs to debug
|
||||||
|
verbose_proxy_logger.setLevel(level=logging.DEBUG) # set proxy logs to debug
|
||||||
|
|
||||||
|
|
||||||
|
def _disable_debugging():
|
||||||
|
verbose_logger.disabled = True
|
||||||
|
verbose_router_logger.disabled = True
|
||||||
|
verbose_proxy_logger.disabled = True
|
||||||
|
|
||||||
|
|
||||||
|
def _enable_debugging():
|
||||||
|
verbose_logger.disabled = False
|
||||||
|
verbose_router_logger.disabled = False
|
||||||
|
verbose_proxy_logger.disabled = False
|
||||||
|
|
||||||
|
|
||||||
|
def print_verbose(print_statement):
|
||||||
|
try:
|
||||||
|
if set_verbose:
|
||||||
|
print(print_statement) # noqa
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def _is_debugging_on() -> bool:
|
||||||
|
"""
|
||||||
|
Returns True if debugging is on
|
||||||
|
"""
|
||||||
|
return verbose_logger.isEnabledFor(logging.DEBUG) or set_verbose is True
|
||||||
598
llm-gateway-competitors/litellm-wheel-src/litellm/_redis.py
Normal file
598
llm-gateway-competitors/litellm-wheel-src/litellm/_redis.py
Normal file
@@ -0,0 +1,598 @@
|
|||||||
|
# +-----------------------------------------------+
|
||||||
|
# | |
|
||||||
|
# | Give Feedback / Get Help |
|
||||||
|
# | https://github.com/BerriAI/litellm/issues/new |
|
||||||
|
# | |
|
||||||
|
# +-----------------------------------------------+
|
||||||
|
#
|
||||||
|
# Thank you users! We ❤️ you! - Krrish & Ishaan
|
||||||
|
|
||||||
|
import inspect
|
||||||
|
import json
|
||||||
|
|
||||||
|
# s/o [@Frank Colson](https://www.linkedin.com/in/frank-colson-422b9b183/) for this redis implementation
|
||||||
|
import os
|
||||||
|
from typing import Callable, List, Optional, Union
|
||||||
|
|
||||||
|
import redis # type: ignore
|
||||||
|
import redis.asyncio as async_redis # type: ignore
|
||||||
|
|
||||||
|
from litellm import get_secret, get_secret_str
|
||||||
|
from litellm.constants import REDIS_CONNECTION_POOL_TIMEOUT, REDIS_SOCKET_TIMEOUT
|
||||||
|
from litellm.litellm_core_utils.sensitive_data_masker import SensitiveDataMasker
|
||||||
|
|
||||||
|
from ._logging import verbose_logger
|
||||||
|
|
||||||
|
|
||||||
|
def _get_redis_kwargs():
|
||||||
|
arg_spec = inspect.getfullargspec(redis.Redis)
|
||||||
|
|
||||||
|
# Only allow primitive arguments
|
||||||
|
exclude_args = {
|
||||||
|
"self",
|
||||||
|
"connection_pool",
|
||||||
|
"retry",
|
||||||
|
}
|
||||||
|
|
||||||
|
include_args = [
|
||||||
|
"url",
|
||||||
|
"redis_connect_func",
|
||||||
|
"gcp_service_account",
|
||||||
|
"gcp_ssl_ca_certs",
|
||||||
|
]
|
||||||
|
|
||||||
|
available_args = [x for x in arg_spec.args if x not in exclude_args] + include_args
|
||||||
|
|
||||||
|
return available_args
|
||||||
|
|
||||||
|
|
||||||
|
def _get_redis_url_kwargs(client=None):
|
||||||
|
if client is None:
|
||||||
|
client = redis.Redis.from_url
|
||||||
|
arg_spec = inspect.getfullargspec(redis.Redis.from_url)
|
||||||
|
|
||||||
|
# Only allow primitive arguments
|
||||||
|
exclude_args = {
|
||||||
|
"self",
|
||||||
|
"connection_pool",
|
||||||
|
"retry",
|
||||||
|
}
|
||||||
|
|
||||||
|
include_args = ["url"]
|
||||||
|
|
||||||
|
available_args = [x for x in arg_spec.args if x not in exclude_args] + include_args
|
||||||
|
|
||||||
|
return available_args
|
||||||
|
|
||||||
|
|
||||||
|
def _get_redis_cluster_kwargs(client=None):
|
||||||
|
if client is None:
|
||||||
|
client = redis.Redis.from_url
|
||||||
|
arg_spec = inspect.getfullargspec(redis.RedisCluster)
|
||||||
|
|
||||||
|
# Only allow primitive arguments
|
||||||
|
exclude_args = {"self", "connection_pool", "retry", "host", "port", "startup_nodes"}
|
||||||
|
|
||||||
|
available_args = [x for x in arg_spec.args if x not in exclude_args]
|
||||||
|
available_args.append("password")
|
||||||
|
available_args.append("username")
|
||||||
|
available_args.append("ssl")
|
||||||
|
available_args.append("ssl_cert_reqs")
|
||||||
|
available_args.append("ssl_check_hostname")
|
||||||
|
available_args.append("ssl_ca_certs")
|
||||||
|
available_args.append(
|
||||||
|
"redis_connect_func"
|
||||||
|
) # Needed for sync clusters and IAM detection
|
||||||
|
available_args.append("gcp_service_account")
|
||||||
|
available_args.append("gcp_ssl_ca_certs")
|
||||||
|
available_args.append("max_connections")
|
||||||
|
|
||||||
|
return available_args
|
||||||
|
|
||||||
|
|
||||||
|
def _get_redis_env_kwarg_mapping():
|
||||||
|
PREFIX = "REDIS_"
|
||||||
|
|
||||||
|
return {f"{PREFIX}{x.upper()}": x for x in _get_redis_kwargs()}
|
||||||
|
|
||||||
|
|
||||||
|
def _redis_kwargs_from_environment():
|
||||||
|
mapping = _get_redis_env_kwarg_mapping()
|
||||||
|
|
||||||
|
return_dict = {}
|
||||||
|
for k, v in mapping.items():
|
||||||
|
value = get_secret(k, default_value=None) # type: ignore
|
||||||
|
if value is not None:
|
||||||
|
return_dict[v] = value
|
||||||
|
return return_dict
|
||||||
|
|
||||||
|
|
||||||
|
def _generate_gcp_iam_access_token(service_account: str) -> str:
|
||||||
|
"""
|
||||||
|
Generate GCP IAM access token for Redis authentication.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
service_account: GCP service account in format 'projects/-/serviceAccounts/name@project.iam.gserviceaccount.com'
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Access token string for GCP IAM authentication
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
from google.cloud import iam_credentials_v1
|
||||||
|
except ImportError:
|
||||||
|
raise ImportError(
|
||||||
|
"google-cloud-iam is required for GCP IAM Redis authentication. "
|
||||||
|
"Install it with: pip install google-cloud-iam"
|
||||||
|
)
|
||||||
|
|
||||||
|
client = iam_credentials_v1.IAMCredentialsClient()
|
||||||
|
request = iam_credentials_v1.GenerateAccessTokenRequest(
|
||||||
|
name=service_account,
|
||||||
|
scope=["https://www.googleapis.com/auth/cloud-platform"],
|
||||||
|
)
|
||||||
|
response = client.generate_access_token(request=request)
|
||||||
|
return str(response.access_token)
|
||||||
|
|
||||||
|
|
||||||
|
def create_gcp_iam_redis_connect_func(
|
||||||
|
service_account: str,
|
||||||
|
ssl_ca_certs: Optional[str] = None,
|
||||||
|
) -> Callable:
|
||||||
|
"""
|
||||||
|
Creates a custom Redis connection function for GCP IAM authentication.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
service_account: GCP service account in format 'projects/-/serviceAccounts/name@project.iam.gserviceaccount.com'
|
||||||
|
ssl_ca_certs: Path to SSL CA certificate file for secure connections
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A connection function that can be used with Redis clients
|
||||||
|
"""
|
||||||
|
|
||||||
|
def iam_connect(self):
|
||||||
|
"""Initialize the connection and authenticate using GCP IAM"""
|
||||||
|
from redis.exceptions import (
|
||||||
|
AuthenticationError,
|
||||||
|
AuthenticationWrongNumberOfArgsError,
|
||||||
|
)
|
||||||
|
from redis.utils import str_if_bytes
|
||||||
|
|
||||||
|
self._parser.on_connect(self)
|
||||||
|
|
||||||
|
auth_args = (_generate_gcp_iam_access_token(service_account),)
|
||||||
|
self.send_command("AUTH", *auth_args, check_health=False)
|
||||||
|
|
||||||
|
try:
|
||||||
|
auth_response = self.read_response()
|
||||||
|
except AuthenticationWrongNumberOfArgsError:
|
||||||
|
# Fallback to password auth if IAM fails
|
||||||
|
if hasattr(self, "password") and self.password:
|
||||||
|
self.send_command("AUTH", self.password, check_health=False)
|
||||||
|
auth_response = self.read_response()
|
||||||
|
else:
|
||||||
|
raise
|
||||||
|
|
||||||
|
if str_if_bytes(auth_response) != "OK":
|
||||||
|
raise AuthenticationError("GCP IAM authentication failed")
|
||||||
|
|
||||||
|
return iam_connect
|
||||||
|
|
||||||
|
|
||||||
|
def get_redis_url_from_environment():
|
||||||
|
if "REDIS_URL" in os.environ:
|
||||||
|
return os.environ["REDIS_URL"]
|
||||||
|
|
||||||
|
if "REDIS_HOST" not in os.environ or "REDIS_PORT" not in os.environ:
|
||||||
|
raise ValueError(
|
||||||
|
"Either 'REDIS_URL' or both 'REDIS_HOST' and 'REDIS_PORT' must be specified for Redis."
|
||||||
|
)
|
||||||
|
|
||||||
|
if "REDIS_SSL" in os.environ and os.environ["REDIS_SSL"].lower() == "true":
|
||||||
|
redis_protocol = "rediss"
|
||||||
|
else:
|
||||||
|
redis_protocol = "redis"
|
||||||
|
|
||||||
|
# Build authentication part of URL
|
||||||
|
auth_part = ""
|
||||||
|
if "REDIS_USERNAME" in os.environ and "REDIS_PASSWORD" in os.environ:
|
||||||
|
auth_part = f"{os.environ['REDIS_USERNAME']}:{os.environ['REDIS_PASSWORD']}@"
|
||||||
|
elif "REDIS_PASSWORD" in os.environ:
|
||||||
|
auth_part = f"{os.environ['REDIS_PASSWORD']}@"
|
||||||
|
|
||||||
|
return f"{redis_protocol}://{auth_part}{os.environ['REDIS_HOST']}:{os.environ['REDIS_PORT']}"
|
||||||
|
|
||||||
|
|
||||||
|
def _get_redis_client_logic(**env_overrides):
|
||||||
|
"""
|
||||||
|
Common functionality across sync + async redis client implementations
|
||||||
|
"""
|
||||||
|
### check if "os.environ/<key-name>" passed in
|
||||||
|
for k, v in env_overrides.items():
|
||||||
|
if isinstance(v, str) and v.startswith("os.environ/"):
|
||||||
|
v = v.replace("os.environ/", "")
|
||||||
|
value = get_secret(v) # type: ignore
|
||||||
|
env_overrides[k] = value
|
||||||
|
|
||||||
|
redis_kwargs = {
|
||||||
|
**_redis_kwargs_from_environment(),
|
||||||
|
**env_overrides,
|
||||||
|
}
|
||||||
|
|
||||||
|
_startup_nodes: Optional[Union[str, list]] = redis_kwargs.get("startup_nodes", None) or get_secret( # type: ignore
|
||||||
|
"REDIS_CLUSTER_NODES"
|
||||||
|
)
|
||||||
|
|
||||||
|
if _startup_nodes is not None and isinstance(_startup_nodes, str):
|
||||||
|
redis_kwargs["startup_nodes"] = json.loads(_startup_nodes)
|
||||||
|
|
||||||
|
_sentinel_nodes: Optional[Union[str, list]] = redis_kwargs.get("sentinel_nodes", None) or get_secret( # type: ignore
|
||||||
|
"REDIS_SENTINEL_NODES"
|
||||||
|
)
|
||||||
|
|
||||||
|
if _sentinel_nodes is not None and isinstance(_sentinel_nodes, str):
|
||||||
|
redis_kwargs["sentinel_nodes"] = json.loads(_sentinel_nodes)
|
||||||
|
|
||||||
|
_sentinel_password: Optional[str] = redis_kwargs.get(
|
||||||
|
"sentinel_password", None
|
||||||
|
) or get_secret_str("REDIS_SENTINEL_PASSWORD")
|
||||||
|
|
||||||
|
if _sentinel_password is not None:
|
||||||
|
redis_kwargs["sentinel_password"] = _sentinel_password
|
||||||
|
|
||||||
|
_service_name: Optional[str] = redis_kwargs.get("service_name", None) or get_secret( # type: ignore
|
||||||
|
"REDIS_SERVICE_NAME"
|
||||||
|
)
|
||||||
|
|
||||||
|
if _service_name is not None:
|
||||||
|
redis_kwargs["service_name"] = _service_name
|
||||||
|
|
||||||
|
# Handle GCP IAM authentication
|
||||||
|
_gcp_service_account = redis_kwargs.get("gcp_service_account") or get_secret_str(
|
||||||
|
"REDIS_GCP_SERVICE_ACCOUNT"
|
||||||
|
)
|
||||||
|
_gcp_ssl_ca_certs = redis_kwargs.get("gcp_ssl_ca_certs") or get_secret_str(
|
||||||
|
"REDIS_GCP_SSL_CA_CERTS"
|
||||||
|
)
|
||||||
|
|
||||||
|
if _gcp_service_account is not None:
|
||||||
|
verbose_logger.debug(
|
||||||
|
"Setting up GCP IAM authentication for Redis with service account."
|
||||||
|
)
|
||||||
|
redis_kwargs["redis_connect_func"] = create_gcp_iam_redis_connect_func(
|
||||||
|
service_account=_gcp_service_account, ssl_ca_certs=_gcp_ssl_ca_certs
|
||||||
|
)
|
||||||
|
# Store GCP service account in redis_connect_func for async cluster access
|
||||||
|
redis_kwargs["redis_connect_func"]._gcp_service_account = _gcp_service_account
|
||||||
|
|
||||||
|
# Remove GCP-specific kwargs that shouldn't be passed to Redis client
|
||||||
|
redis_kwargs.pop("gcp_service_account", None)
|
||||||
|
redis_kwargs.pop("gcp_ssl_ca_certs", None)
|
||||||
|
|
||||||
|
# Only enable SSL if explicitly requested AND SSL CA certs are provided
|
||||||
|
if _gcp_ssl_ca_certs and redis_kwargs.get("ssl", False):
|
||||||
|
redis_kwargs["ssl_ca_certs"] = _gcp_ssl_ca_certs
|
||||||
|
|
||||||
|
if "url" in redis_kwargs and redis_kwargs["url"] is not None:
|
||||||
|
redis_kwargs.pop("host", None)
|
||||||
|
redis_kwargs.pop("port", None)
|
||||||
|
redis_kwargs.pop("db", None)
|
||||||
|
redis_kwargs.pop("password", None)
|
||||||
|
elif "startup_nodes" in redis_kwargs and redis_kwargs["startup_nodes"] is not None:
|
||||||
|
pass
|
||||||
|
elif (
|
||||||
|
"sentinel_nodes" in redis_kwargs and redis_kwargs["sentinel_nodes"] is not None
|
||||||
|
):
|
||||||
|
pass
|
||||||
|
elif "host" not in redis_kwargs or redis_kwargs["host"] is None:
|
||||||
|
raise ValueError("Either 'host' or 'url' must be specified for redis.")
|
||||||
|
|
||||||
|
# litellm.print_verbose(f"redis_kwargs: {redis_kwargs}")
|
||||||
|
return redis_kwargs
|
||||||
|
|
||||||
|
|
||||||
|
def init_redis_cluster(redis_kwargs) -> redis.RedisCluster:
|
||||||
|
_redis_cluster_nodes_in_env: Optional[str] = get_secret("REDIS_CLUSTER_NODES") # type: ignore
|
||||||
|
if _redis_cluster_nodes_in_env is not None:
|
||||||
|
try:
|
||||||
|
redis_kwargs["startup_nodes"] = json.loads(_redis_cluster_nodes_in_env)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
raise ValueError(
|
||||||
|
"REDIS_CLUSTER_NODES environment variable is not valid JSON. Please ensure it's properly formatted."
|
||||||
|
)
|
||||||
|
|
||||||
|
verbose_logger.debug("init_redis_cluster: startup nodes are being initialized.")
|
||||||
|
from redis.cluster import ClusterNode
|
||||||
|
|
||||||
|
args = _get_redis_cluster_kwargs()
|
||||||
|
cluster_kwargs = {}
|
||||||
|
for arg in redis_kwargs:
|
||||||
|
if arg in args:
|
||||||
|
cluster_kwargs[arg] = redis_kwargs[arg]
|
||||||
|
|
||||||
|
new_startup_nodes: List[ClusterNode] = []
|
||||||
|
|
||||||
|
for item in redis_kwargs["startup_nodes"]:
|
||||||
|
new_startup_nodes.append(ClusterNode(**item))
|
||||||
|
|
||||||
|
cluster_kwargs.pop("startup_nodes", None)
|
||||||
|
return redis.RedisCluster(startup_nodes=new_startup_nodes, **cluster_kwargs) # type: ignore
|
||||||
|
|
||||||
|
|
||||||
|
def _init_redis_sentinel(redis_kwargs) -> redis.Redis:
|
||||||
|
sentinel_nodes = redis_kwargs.get("sentinel_nodes")
|
||||||
|
sentinel_password = redis_kwargs.get("sentinel_password")
|
||||||
|
service_name = redis_kwargs.get("service_name")
|
||||||
|
|
||||||
|
if not sentinel_nodes or not service_name:
|
||||||
|
raise ValueError(
|
||||||
|
"Both 'sentinel_nodes' and 'service_name' are required for Redis Sentinel."
|
||||||
|
)
|
||||||
|
|
||||||
|
verbose_logger.debug("init_redis_sentinel: sentinel nodes are being initialized.")
|
||||||
|
|
||||||
|
# Set up the Sentinel client
|
||||||
|
sentinel = redis.Sentinel(
|
||||||
|
sentinel_nodes,
|
||||||
|
socket_timeout=REDIS_SOCKET_TIMEOUT,
|
||||||
|
password=sentinel_password,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Return the master instance for the given service
|
||||||
|
|
||||||
|
return sentinel.master_for(service_name)
|
||||||
|
|
||||||
|
|
||||||
|
def _init_async_redis_sentinel(redis_kwargs) -> async_redis.Redis:
|
||||||
|
sentinel_nodes = redis_kwargs.get("sentinel_nodes")
|
||||||
|
sentinel_password = redis_kwargs.get("sentinel_password")
|
||||||
|
service_name = redis_kwargs.get("service_name")
|
||||||
|
|
||||||
|
if not sentinel_nodes or not service_name:
|
||||||
|
raise ValueError(
|
||||||
|
"Both 'sentinel_nodes' and 'service_name' are required for Redis Sentinel."
|
||||||
|
)
|
||||||
|
|
||||||
|
verbose_logger.debug("init_redis_sentinel: sentinel nodes are being initialized.")
|
||||||
|
|
||||||
|
# Set up the Sentinel client
|
||||||
|
sentinel = async_redis.Sentinel(
|
||||||
|
sentinel_nodes,
|
||||||
|
socket_timeout=REDIS_SOCKET_TIMEOUT,
|
||||||
|
password=sentinel_password,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Return the master instance for the given service
|
||||||
|
|
||||||
|
return sentinel.master_for(service_name)
|
||||||
|
|
||||||
|
|
||||||
|
def get_redis_client(**env_overrides):
|
||||||
|
redis_kwargs = _get_redis_client_logic(**env_overrides)
|
||||||
|
if "url" in redis_kwargs and redis_kwargs["url"] is not None:
|
||||||
|
args = _get_redis_url_kwargs()
|
||||||
|
url_kwargs = {}
|
||||||
|
for arg in redis_kwargs:
|
||||||
|
if arg in args:
|
||||||
|
url_kwargs[arg] = redis_kwargs[arg]
|
||||||
|
|
||||||
|
return redis.Redis.from_url(**url_kwargs)
|
||||||
|
|
||||||
|
if "startup_nodes" in redis_kwargs or get_secret("REDIS_CLUSTER_NODES") is not None: # type: ignore
|
||||||
|
return init_redis_cluster(redis_kwargs)
|
||||||
|
|
||||||
|
# Check for Redis Sentinel
|
||||||
|
if "sentinel_nodes" in redis_kwargs and "service_name" in redis_kwargs:
|
||||||
|
return _init_redis_sentinel(redis_kwargs)
|
||||||
|
|
||||||
|
return redis.Redis(**redis_kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
def get_redis_async_client(
|
||||||
|
connection_pool: Optional[async_redis.BlockingConnectionPool] = None,
|
||||||
|
**env_overrides,
|
||||||
|
) -> Union[async_redis.Redis, async_redis.RedisCluster]:
|
||||||
|
redis_kwargs = _get_redis_client_logic(**env_overrides)
|
||||||
|
if "url" in redis_kwargs and redis_kwargs["url"] is not None:
|
||||||
|
if connection_pool is not None:
|
||||||
|
return async_redis.Redis(connection_pool=connection_pool)
|
||||||
|
args = _get_redis_url_kwargs(client=async_redis.Redis.from_url)
|
||||||
|
url_kwargs = {}
|
||||||
|
for arg in redis_kwargs:
|
||||||
|
if arg in args:
|
||||||
|
url_kwargs[arg] = redis_kwargs[arg]
|
||||||
|
else:
|
||||||
|
verbose_logger.debug(
|
||||||
|
"REDIS: ignoring argument: {}. Not an allowed async_redis.Redis.from_url arg.".format(
|
||||||
|
arg
|
||||||
|
)
|
||||||
|
)
|
||||||
|
return async_redis.Redis.from_url(**url_kwargs)
|
||||||
|
|
||||||
|
if "startup_nodes" in redis_kwargs:
|
||||||
|
from redis.cluster import ClusterNode
|
||||||
|
|
||||||
|
args = _get_redis_cluster_kwargs()
|
||||||
|
cluster_kwargs = {}
|
||||||
|
for arg in redis_kwargs:
|
||||||
|
if arg in args:
|
||||||
|
cluster_kwargs[arg] = redis_kwargs[arg]
|
||||||
|
|
||||||
|
# Handle GCP IAM authentication for async clusters
|
||||||
|
redis_connect_func = cluster_kwargs.pop("redis_connect_func", None)
|
||||||
|
from litellm import get_secret_str
|
||||||
|
|
||||||
|
# Get GCP service account - first try from redis_connect_func, then from environment
|
||||||
|
gcp_service_account = None
|
||||||
|
if redis_connect_func and hasattr(redis_connect_func, "_gcp_service_account"):
|
||||||
|
gcp_service_account = redis_connect_func._gcp_service_account
|
||||||
|
else:
|
||||||
|
gcp_service_account = redis_kwargs.get(
|
||||||
|
"gcp_service_account"
|
||||||
|
) or get_secret_str("REDIS_GCP_SERVICE_ACCOUNT")
|
||||||
|
|
||||||
|
verbose_logger.debug(
|
||||||
|
f"DEBUG: Redis cluster kwargs: redis_connect_func={redis_connect_func is not None}, gcp_service_account_provided={gcp_service_account is not None}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# If GCP IAM is configured (indicated by redis_connect_func), generate access token and use as password
|
||||||
|
if redis_connect_func and gcp_service_account:
|
||||||
|
verbose_logger.debug(
|
||||||
|
"DEBUG: Generating IAM token for service account (value not logged for security reasons)"
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
# Generate IAM access token using the helper function
|
||||||
|
access_token = _generate_gcp_iam_access_token(gcp_service_account)
|
||||||
|
cluster_kwargs["password"] = access_token
|
||||||
|
verbose_logger.debug(
|
||||||
|
"DEBUG: Successfully generated GCP IAM access token for async Redis cluster"
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
verbose_logger.error(f"Failed to generate GCP IAM access token: {e}")
|
||||||
|
from redis.exceptions import AuthenticationError
|
||||||
|
|
||||||
|
raise AuthenticationError("Failed to generate GCP IAM access token")
|
||||||
|
else:
|
||||||
|
verbose_logger.debug(
|
||||||
|
f"DEBUG: Not using GCP IAM auth - redis_connect_func={redis_connect_func is not None}, gcp_service_account_provided={gcp_service_account is not None}"
|
||||||
|
)
|
||||||
|
|
||||||
|
new_startup_nodes: List[ClusterNode] = []
|
||||||
|
|
||||||
|
for item in redis_kwargs["startup_nodes"]:
|
||||||
|
new_startup_nodes.append(ClusterNode(**item))
|
||||||
|
cluster_kwargs.pop("startup_nodes", None)
|
||||||
|
|
||||||
|
# Create async RedisCluster with IAM token as password if available
|
||||||
|
cluster_client = async_redis.RedisCluster(
|
||||||
|
startup_nodes=new_startup_nodes, **cluster_kwargs # type: ignore
|
||||||
|
)
|
||||||
|
|
||||||
|
return cluster_client
|
||||||
|
|
||||||
|
# Check for Redis Sentinel
|
||||||
|
if "sentinel_nodes" in redis_kwargs and "service_name" in redis_kwargs:
|
||||||
|
return _init_async_redis_sentinel(redis_kwargs)
|
||||||
|
_pretty_print_redis_config(redis_kwargs=redis_kwargs)
|
||||||
|
|
||||||
|
if connection_pool is not None:
|
||||||
|
redis_kwargs["connection_pool"] = connection_pool
|
||||||
|
|
||||||
|
return async_redis.Redis(
|
||||||
|
**redis_kwargs,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def get_redis_connection_pool(**env_overrides):
|
||||||
|
redis_kwargs = _get_redis_client_logic(**env_overrides)
|
||||||
|
verbose_logger.debug("get_redis_connection_pool: redis_kwargs", redis_kwargs)
|
||||||
|
if "url" in redis_kwargs and redis_kwargs["url"] is not None:
|
||||||
|
pool_kwargs = {
|
||||||
|
"timeout": REDIS_CONNECTION_POOL_TIMEOUT,
|
||||||
|
"url": redis_kwargs["url"],
|
||||||
|
}
|
||||||
|
if "max_connections" in redis_kwargs:
|
||||||
|
try:
|
||||||
|
pool_kwargs["max_connections"] = int(redis_kwargs["max_connections"])
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
verbose_logger.warning(
|
||||||
|
"REDIS: invalid max_connections value %r, ignoring",
|
||||||
|
redis_kwargs["max_connections"],
|
||||||
|
)
|
||||||
|
return async_redis.BlockingConnectionPool.from_url(**pool_kwargs)
|
||||||
|
connection_class = async_redis.Connection
|
||||||
|
if "ssl" in redis_kwargs:
|
||||||
|
connection_class = async_redis.SSLConnection
|
||||||
|
redis_kwargs.pop("ssl", None)
|
||||||
|
redis_kwargs["connection_class"] = connection_class
|
||||||
|
redis_kwargs.pop("startup_nodes", None)
|
||||||
|
return async_redis.BlockingConnectionPool(
|
||||||
|
timeout=REDIS_CONNECTION_POOL_TIMEOUT, **redis_kwargs
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _pretty_print_redis_config(redis_kwargs: dict) -> None:
|
||||||
|
"""Pretty print the Redis configuration using rich with sensitive data masking"""
|
||||||
|
try:
|
||||||
|
import logging
|
||||||
|
|
||||||
|
from rich.console import Console
|
||||||
|
from rich.panel import Panel
|
||||||
|
from rich.table import Table
|
||||||
|
from rich.text import Text
|
||||||
|
|
||||||
|
if not verbose_logger.isEnabledFor(logging.DEBUG):
|
||||||
|
return
|
||||||
|
|
||||||
|
console = Console()
|
||||||
|
|
||||||
|
# Initialize the sensitive data masker
|
||||||
|
masker = SensitiveDataMasker()
|
||||||
|
|
||||||
|
# Mask sensitive data in redis_kwargs
|
||||||
|
masked_redis_kwargs = masker.mask_dict(redis_kwargs)
|
||||||
|
|
||||||
|
# Create main panel title
|
||||||
|
title = Text("Redis Configuration", style="bold blue")
|
||||||
|
|
||||||
|
# Create configuration table
|
||||||
|
config_table = Table(
|
||||||
|
title="🔧 Redis Connection Parameters",
|
||||||
|
show_header=True,
|
||||||
|
header_style="bold magenta",
|
||||||
|
title_justify="left",
|
||||||
|
)
|
||||||
|
config_table.add_column("Parameter", style="cyan", no_wrap=True)
|
||||||
|
config_table.add_column("Value", style="yellow")
|
||||||
|
|
||||||
|
# Add rows for each configuration parameter
|
||||||
|
for key, value in masked_redis_kwargs.items():
|
||||||
|
if value is not None:
|
||||||
|
# Special handling for complex objects
|
||||||
|
if isinstance(value, list):
|
||||||
|
if key == "startup_nodes" and value:
|
||||||
|
# Special handling for cluster nodes
|
||||||
|
value_str = f"[{len(value)} cluster nodes]"
|
||||||
|
elif key == "sentinel_nodes" and value:
|
||||||
|
# Special handling for sentinel nodes
|
||||||
|
value_str = f"[{len(value)} sentinel nodes]"
|
||||||
|
else:
|
||||||
|
value_str = str(value)
|
||||||
|
else:
|
||||||
|
value_str = str(value)
|
||||||
|
|
||||||
|
config_table.add_row(key, value_str)
|
||||||
|
|
||||||
|
# Determine connection type
|
||||||
|
connection_type = "Standard Redis"
|
||||||
|
if masked_redis_kwargs.get("startup_nodes"):
|
||||||
|
connection_type = "Redis Cluster"
|
||||||
|
elif masked_redis_kwargs.get("sentinel_nodes"):
|
||||||
|
connection_type = "Redis Sentinel"
|
||||||
|
elif masked_redis_kwargs.get("url"):
|
||||||
|
connection_type = "Redis (URL-based)"
|
||||||
|
|
||||||
|
# Create connection type info
|
||||||
|
info_table = Table(
|
||||||
|
title="📊 Connection Info",
|
||||||
|
show_header=True,
|
||||||
|
header_style="bold green",
|
||||||
|
title_justify="left",
|
||||||
|
)
|
||||||
|
info_table.add_column("Property", style="cyan", no_wrap=True)
|
||||||
|
info_table.add_column("Value", style="yellow")
|
||||||
|
info_table.add_row("Connection Type", connection_type)
|
||||||
|
|
||||||
|
# Print everything in a nice panel
|
||||||
|
console.print("\n")
|
||||||
|
console.print(Panel(title, border_style="blue"))
|
||||||
|
console.print(info_table)
|
||||||
|
console.print(config_table)
|
||||||
|
console.print("\n")
|
||||||
|
|
||||||
|
except ImportError:
|
||||||
|
# Fallback to simple logging if rich is not available
|
||||||
|
masker = SensitiveDataMasker()
|
||||||
|
masked_redis_kwargs = masker.mask_dict(redis_kwargs)
|
||||||
|
verbose_logger.info(f"Redis configuration: {masked_redis_kwargs}")
|
||||||
|
except Exception as e:
|
||||||
|
verbose_logger.error(f"Error pretty printing Redis configuration: {e}")
|
||||||
@@ -0,0 +1,323 @@
|
|||||||
|
import asyncio
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
from typing import TYPE_CHECKING, Any, Optional, Union
|
||||||
|
|
||||||
|
import litellm
|
||||||
|
from litellm._logging import verbose_logger
|
||||||
|
|
||||||
|
from .integrations.custom_logger import CustomLogger
|
||||||
|
from .integrations.datadog.datadog import DataDogLogger
|
||||||
|
from .integrations.opentelemetry import OpenTelemetry
|
||||||
|
from .integrations.prometheus_services import PrometheusServicesLogger
|
||||||
|
from .types.services import ServiceLoggerPayload, ServiceTypes
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from opentelemetry.trace import Span as _Span
|
||||||
|
|
||||||
|
from litellm.proxy._types import UserAPIKeyAuth
|
||||||
|
|
||||||
|
Span = Union[_Span, Any]
|
||||||
|
OTELClass = OpenTelemetry
|
||||||
|
else:
|
||||||
|
Span = Any
|
||||||
|
OTELClass = Any
|
||||||
|
UserAPIKeyAuth = Any
|
||||||
|
|
||||||
|
|
||||||
|
class ServiceLogging(CustomLogger):
|
||||||
|
"""
|
||||||
|
Separate class used for monitoring health of litellm-adjacent services (redis/postgres).
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, mock_testing: bool = False) -> None:
|
||||||
|
self.mock_testing = mock_testing
|
||||||
|
self.mock_testing_sync_success_hook = 0
|
||||||
|
self.mock_testing_async_success_hook = 0
|
||||||
|
self.mock_testing_sync_failure_hook = 0
|
||||||
|
self.mock_testing_async_failure_hook = 0
|
||||||
|
if "prometheus_system" in litellm.service_callback:
|
||||||
|
self.prometheusServicesLogger = PrometheusServicesLogger()
|
||||||
|
|
||||||
|
def service_success_hook(
|
||||||
|
self,
|
||||||
|
service: ServiceTypes,
|
||||||
|
duration: float,
|
||||||
|
call_type: str,
|
||||||
|
parent_otel_span: Optional[Span] = None,
|
||||||
|
start_time: Optional[Union[datetime, float]] = None,
|
||||||
|
end_time: Optional[Union[float, datetime]] = None,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Handles both sync and async monitoring by checking for existing event loop.
|
||||||
|
"""
|
||||||
|
|
||||||
|
if self.mock_testing:
|
||||||
|
self.mock_testing_sync_success_hook += 1
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Try to get the current event loop
|
||||||
|
loop = asyncio.get_event_loop()
|
||||||
|
# Check if the loop is running
|
||||||
|
if loop.is_running():
|
||||||
|
# If we're in a running loop, create a task
|
||||||
|
loop.create_task(
|
||||||
|
self.async_service_success_hook(
|
||||||
|
service=service,
|
||||||
|
duration=duration,
|
||||||
|
call_type=call_type,
|
||||||
|
parent_otel_span=parent_otel_span,
|
||||||
|
start_time=start_time,
|
||||||
|
end_time=end_time,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
# Loop exists but not running, we can use run_until_complete
|
||||||
|
loop.run_until_complete(
|
||||||
|
self.async_service_success_hook(
|
||||||
|
service=service,
|
||||||
|
duration=duration,
|
||||||
|
call_type=call_type,
|
||||||
|
parent_otel_span=parent_otel_span,
|
||||||
|
start_time=start_time,
|
||||||
|
end_time=end_time,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
except RuntimeError:
|
||||||
|
# No event loop exists, create a new one and run
|
||||||
|
asyncio.run(
|
||||||
|
self.async_service_success_hook(
|
||||||
|
service=service,
|
||||||
|
duration=duration,
|
||||||
|
call_type=call_type,
|
||||||
|
parent_otel_span=parent_otel_span,
|
||||||
|
start_time=start_time,
|
||||||
|
end_time=end_time,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
def service_failure_hook(
|
||||||
|
self, service: ServiceTypes, duration: float, error: Exception, call_type: str
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
[TODO] Not implemented for sync calls yet. V0 is focused on async monitoring (used by proxy).
|
||||||
|
"""
|
||||||
|
if self.mock_testing:
|
||||||
|
self.mock_testing_sync_failure_hook += 1
|
||||||
|
|
||||||
|
async def async_service_success_hook(
|
||||||
|
self,
|
||||||
|
service: ServiceTypes,
|
||||||
|
call_type: str,
|
||||||
|
duration: float,
|
||||||
|
parent_otel_span: Optional[Span] = None,
|
||||||
|
start_time: Optional[Union[datetime, float]] = None,
|
||||||
|
end_time: Optional[Union[datetime, float]] = None,
|
||||||
|
event_metadata: Optional[dict] = None,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
- For counting if the redis, postgres call is successful
|
||||||
|
"""
|
||||||
|
if self.mock_testing:
|
||||||
|
self.mock_testing_async_success_hook += 1
|
||||||
|
|
||||||
|
payload = ServiceLoggerPayload(
|
||||||
|
is_error=False,
|
||||||
|
error=None,
|
||||||
|
service=service,
|
||||||
|
duration=duration,
|
||||||
|
call_type=call_type,
|
||||||
|
event_metadata=event_metadata,
|
||||||
|
)
|
||||||
|
|
||||||
|
for callback in litellm.service_callback:
|
||||||
|
if callback == "prometheus_system":
|
||||||
|
await self.init_prometheus_services_logger_if_none()
|
||||||
|
await self.prometheusServicesLogger.async_service_success_hook(
|
||||||
|
payload=payload
|
||||||
|
)
|
||||||
|
elif callback == "datadog" or isinstance(callback, DataDogLogger):
|
||||||
|
await self.init_datadog_logger_if_none()
|
||||||
|
await self.dd_logger.async_service_success_hook(
|
||||||
|
payload=payload,
|
||||||
|
parent_otel_span=parent_otel_span,
|
||||||
|
start_time=start_time,
|
||||||
|
end_time=end_time,
|
||||||
|
event_metadata=event_metadata,
|
||||||
|
)
|
||||||
|
elif callback == "otel" or isinstance(callback, OpenTelemetry):
|
||||||
|
_otel_logger_to_use: Optional[OpenTelemetry] = None
|
||||||
|
if isinstance(callback, OpenTelemetry):
|
||||||
|
_otel_logger_to_use = callback
|
||||||
|
else:
|
||||||
|
from litellm.proxy.proxy_server import open_telemetry_logger
|
||||||
|
|
||||||
|
if open_telemetry_logger is not None and isinstance(
|
||||||
|
open_telemetry_logger, OpenTelemetry
|
||||||
|
):
|
||||||
|
_otel_logger_to_use = open_telemetry_logger
|
||||||
|
|
||||||
|
if _otel_logger_to_use is not None and parent_otel_span is not None:
|
||||||
|
await _otel_logger_to_use.async_service_success_hook(
|
||||||
|
payload=payload,
|
||||||
|
parent_otel_span=parent_otel_span,
|
||||||
|
start_time=start_time,
|
||||||
|
end_time=end_time,
|
||||||
|
event_metadata=event_metadata,
|
||||||
|
)
|
||||||
|
|
||||||
|
async def init_prometheus_services_logger_if_none(self):
|
||||||
|
"""
|
||||||
|
initializes prometheusServicesLogger if it is None or no attribute exists on ServiceLogging Object
|
||||||
|
|
||||||
|
"""
|
||||||
|
if not hasattr(self, "prometheusServicesLogger"):
|
||||||
|
self.prometheusServicesLogger = PrometheusServicesLogger()
|
||||||
|
elif self.prometheusServicesLogger is None:
|
||||||
|
self.prometheusServicesLogger = self.prometheusServicesLogger()
|
||||||
|
return
|
||||||
|
|
||||||
|
async def init_datadog_logger_if_none(self):
|
||||||
|
"""
|
||||||
|
initializes dd_logger if it is None or no attribute exists on ServiceLogging Object
|
||||||
|
|
||||||
|
"""
|
||||||
|
from litellm.integrations.datadog.datadog import DataDogLogger
|
||||||
|
|
||||||
|
if not hasattr(self, "dd_logger"):
|
||||||
|
self.dd_logger: DataDogLogger = DataDogLogger()
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
async def init_otel_logger_if_none(self):
|
||||||
|
"""
|
||||||
|
initializes otel_logger if it is None or no attribute exists on ServiceLogging Object
|
||||||
|
|
||||||
|
"""
|
||||||
|
from litellm.proxy.proxy_server import open_telemetry_logger
|
||||||
|
|
||||||
|
if not hasattr(self, "otel_logger"):
|
||||||
|
if open_telemetry_logger is not None and isinstance(
|
||||||
|
open_telemetry_logger, OpenTelemetry
|
||||||
|
):
|
||||||
|
self.otel_logger: OpenTelemetry = open_telemetry_logger
|
||||||
|
else:
|
||||||
|
verbose_logger.warning(
|
||||||
|
"ServiceLogger: open_telemetry_logger is None or not an instance of OpenTelemetry"
|
||||||
|
)
|
||||||
|
return
|
||||||
|
|
||||||
|
async def async_service_failure_hook(
|
||||||
|
self,
|
||||||
|
service: ServiceTypes,
|
||||||
|
duration: float,
|
||||||
|
error: Union[str, Exception],
|
||||||
|
call_type: str,
|
||||||
|
parent_otel_span: Optional[Span] = None,
|
||||||
|
start_time: Optional[Union[datetime, float]] = None,
|
||||||
|
end_time: Optional[Union[float, datetime]] = None,
|
||||||
|
event_metadata: Optional[dict] = None,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
- For counting if the redis, postgres call is unsuccessful
|
||||||
|
"""
|
||||||
|
if self.mock_testing:
|
||||||
|
self.mock_testing_async_failure_hook += 1
|
||||||
|
|
||||||
|
error_message = ""
|
||||||
|
if isinstance(error, Exception):
|
||||||
|
error_message = str(error)
|
||||||
|
elif isinstance(error, str):
|
||||||
|
error_message = error
|
||||||
|
|
||||||
|
payload = ServiceLoggerPayload(
|
||||||
|
is_error=True,
|
||||||
|
error=error_message,
|
||||||
|
service=service,
|
||||||
|
duration=duration,
|
||||||
|
call_type=call_type,
|
||||||
|
event_metadata=event_metadata,
|
||||||
|
)
|
||||||
|
|
||||||
|
for callback in litellm.service_callback:
|
||||||
|
if callback == "prometheus_system":
|
||||||
|
await self.init_prometheus_services_logger_if_none()
|
||||||
|
await self.prometheusServicesLogger.async_service_failure_hook(
|
||||||
|
payload=payload,
|
||||||
|
error=error,
|
||||||
|
)
|
||||||
|
elif callback == "datadog" or isinstance(callback, DataDogLogger):
|
||||||
|
await self.init_datadog_logger_if_none()
|
||||||
|
await self.dd_logger.async_service_failure_hook(
|
||||||
|
payload=payload,
|
||||||
|
error=error_message,
|
||||||
|
parent_otel_span=parent_otel_span,
|
||||||
|
start_time=start_time,
|
||||||
|
end_time=end_time,
|
||||||
|
event_metadata=event_metadata,
|
||||||
|
)
|
||||||
|
elif callback == "otel" or isinstance(callback, OpenTelemetry):
|
||||||
|
_otel_logger_to_use: Optional[OpenTelemetry] = None
|
||||||
|
if isinstance(callback, OpenTelemetry):
|
||||||
|
_otel_logger_to_use = callback
|
||||||
|
else:
|
||||||
|
from litellm.proxy.proxy_server import open_telemetry_logger
|
||||||
|
|
||||||
|
if open_telemetry_logger is not None and isinstance(
|
||||||
|
open_telemetry_logger, OpenTelemetry
|
||||||
|
):
|
||||||
|
_otel_logger_to_use = open_telemetry_logger
|
||||||
|
|
||||||
|
if not isinstance(error, str):
|
||||||
|
error = str(error)
|
||||||
|
|
||||||
|
if _otel_logger_to_use is not None and parent_otel_span is not None:
|
||||||
|
await _otel_logger_to_use.async_service_failure_hook(
|
||||||
|
payload=payload,
|
||||||
|
error=error,
|
||||||
|
parent_otel_span=parent_otel_span,
|
||||||
|
start_time=start_time,
|
||||||
|
end_time=end_time,
|
||||||
|
event_metadata=event_metadata,
|
||||||
|
)
|
||||||
|
|
||||||
|
async def async_post_call_failure_hook(
|
||||||
|
self,
|
||||||
|
request_data: dict,
|
||||||
|
original_exception: Exception,
|
||||||
|
user_api_key_dict: UserAPIKeyAuth,
|
||||||
|
traceback_str: Optional[str] = None,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Hook to track failed litellm-service calls
|
||||||
|
"""
|
||||||
|
return await super().async_post_call_failure_hook(
|
||||||
|
request_data,
|
||||||
|
original_exception,
|
||||||
|
user_api_key_dict,
|
||||||
|
)
|
||||||
|
|
||||||
|
async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
|
||||||
|
"""
|
||||||
|
Hook to track latency for litellm proxy llm api calls
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
_duration = end_time - start_time
|
||||||
|
if isinstance(_duration, timedelta):
|
||||||
|
_duration = _duration.total_seconds()
|
||||||
|
elif isinstance(_duration, float):
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
raise Exception(
|
||||||
|
"Duration={} is not a float or timedelta object. type={}".format(
|
||||||
|
_duration, type(_duration)
|
||||||
|
)
|
||||||
|
) # invalid _duration value
|
||||||
|
# Batch polling callbacks (check_batch_cost) don't include call_type in kwargs.
|
||||||
|
# Use .get() to avoid KeyError.
|
||||||
|
await self.async_service_success_hook(
|
||||||
|
service=ServiceTypes.LITELLM,
|
||||||
|
duration=_duration,
|
||||||
|
call_type=kwargs.get("call_type", "unknown"),
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
raise e
|
||||||
16
llm-gateway-competitors/litellm-wheel-src/litellm/_uuid.py
Normal file
16
llm-gateway-competitors/litellm-wheel-src/litellm/_uuid.py
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
"""
|
||||||
|
Internal unified UUID helper.
|
||||||
|
|
||||||
|
Always uses fastuuid for performance.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import fastuuid as _uuid # type: ignore
|
||||||
|
|
||||||
|
|
||||||
|
# Expose a module-like alias so callers can use: uuid.uuid4()
|
||||||
|
uuid = _uuid
|
||||||
|
|
||||||
|
|
||||||
|
def uuid4():
|
||||||
|
"""Return a UUID4 using the selected backend."""
|
||||||
|
return uuid.uuid4()
|
||||||
@@ -0,0 +1,6 @@
|
|||||||
|
import importlib_metadata
|
||||||
|
|
||||||
|
try:
|
||||||
|
version = importlib_metadata.version("litellm")
|
||||||
|
except Exception:
|
||||||
|
version = "unknown"
|
||||||
@@ -0,0 +1,73 @@
|
|||||||
|
"""
|
||||||
|
LiteLLM A2A - Wrapper for invoking A2A protocol agents.
|
||||||
|
|
||||||
|
This module provides a thin wrapper around the official `a2a` SDK that:
|
||||||
|
- Handles httpx client creation and agent card resolution
|
||||||
|
- Adds LiteLLM logging via @client decorator
|
||||||
|
- Matches the A2A SDK interface (SendMessageRequest, SendMessageResponse, etc.)
|
||||||
|
|
||||||
|
Example usage (standalone functions with @client decorator):
|
||||||
|
```python
|
||||||
|
from litellm.a2a_protocol import asend_message
|
||||||
|
from a2a.types import SendMessageRequest, MessageSendParams
|
||||||
|
from uuid import uuid4
|
||||||
|
|
||||||
|
request = SendMessageRequest(
|
||||||
|
id=str(uuid4()),
|
||||||
|
params=MessageSendParams(
|
||||||
|
message={
|
||||||
|
"role": "user",
|
||||||
|
"parts": [{"kind": "text", "text": "Hello!"}],
|
||||||
|
"messageId": uuid4().hex,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
)
|
||||||
|
response = await asend_message(
|
||||||
|
base_url="http://localhost:10001",
|
||||||
|
request=request,
|
||||||
|
)
|
||||||
|
print(response.model_dump(mode='json', exclude_none=True))
|
||||||
|
```
|
||||||
|
|
||||||
|
Example usage (class-based):
|
||||||
|
```python
|
||||||
|
from litellm.a2a_protocol import A2AClient
|
||||||
|
|
||||||
|
client = A2AClient(base_url="http://localhost:10001")
|
||||||
|
response = await client.send_message(request)
|
||||||
|
```
|
||||||
|
"""
|
||||||
|
|
||||||
|
from litellm.a2a_protocol.client import A2AClient
|
||||||
|
from litellm.a2a_protocol.exceptions import (
|
||||||
|
A2AAgentCardError,
|
||||||
|
A2AConnectionError,
|
||||||
|
A2AError,
|
||||||
|
A2ALocalhostURLError,
|
||||||
|
)
|
||||||
|
from litellm.a2a_protocol.main import (
|
||||||
|
aget_agent_card,
|
||||||
|
asend_message,
|
||||||
|
asend_message_streaming,
|
||||||
|
create_a2a_client,
|
||||||
|
send_message,
|
||||||
|
)
|
||||||
|
from litellm.types.agents import LiteLLMSendMessageResponse
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
# Client
|
||||||
|
"A2AClient",
|
||||||
|
# Functions
|
||||||
|
"asend_message",
|
||||||
|
"send_message",
|
||||||
|
"asend_message_streaming",
|
||||||
|
"aget_agent_card",
|
||||||
|
"create_a2a_client",
|
||||||
|
# Response types
|
||||||
|
"LiteLLMSendMessageResponse",
|
||||||
|
# Exceptions
|
||||||
|
"A2AError",
|
||||||
|
"A2AConnectionError",
|
||||||
|
"A2AAgentCardError",
|
||||||
|
"A2ALocalhostURLError",
|
||||||
|
]
|
||||||
@@ -0,0 +1,144 @@
|
|||||||
|
"""
|
||||||
|
Custom A2A Card Resolver for LiteLLM.
|
||||||
|
|
||||||
|
Extends the A2A SDK's card resolver to support multiple well-known paths.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from typing import TYPE_CHECKING, Any, Dict, Optional
|
||||||
|
|
||||||
|
from litellm._logging import verbose_logger
|
||||||
|
from litellm.constants import LOCALHOST_URL_PATTERNS
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from a2a.types import AgentCard
|
||||||
|
|
||||||
|
# Runtime imports with availability check
|
||||||
|
_A2ACardResolver: Any = None
|
||||||
|
AGENT_CARD_WELL_KNOWN_PATH: str = "/.well-known/agent-card.json"
|
||||||
|
PREV_AGENT_CARD_WELL_KNOWN_PATH: str = "/.well-known/agent.json"
|
||||||
|
|
||||||
|
try:
|
||||||
|
from a2a.client import A2ACardResolver as _A2ACardResolver # type: ignore[no-redef]
|
||||||
|
from a2a.utils.constants import ( # type: ignore[no-redef]
|
||||||
|
AGENT_CARD_WELL_KNOWN_PATH,
|
||||||
|
PREV_AGENT_CARD_WELL_KNOWN_PATH,
|
||||||
|
)
|
||||||
|
except ImportError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def is_localhost_or_internal_url(url: Optional[str]) -> bool:
|
||||||
|
"""
|
||||||
|
Check if a URL is a localhost or internal URL.
|
||||||
|
|
||||||
|
This detects common development URLs that are accidentally left in
|
||||||
|
agent cards when deploying to production.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
url: The URL to check
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if the URL is localhost/internal
|
||||||
|
"""
|
||||||
|
if not url:
|
||||||
|
return False
|
||||||
|
|
||||||
|
url_lower = url.lower()
|
||||||
|
|
||||||
|
return any(pattern in url_lower for pattern in LOCALHOST_URL_PATTERNS)
|
||||||
|
|
||||||
|
|
||||||
|
def fix_agent_card_url(agent_card: "AgentCard", base_url: str) -> "AgentCard":
|
||||||
|
"""
|
||||||
|
Fix the agent card URL if it contains a localhost/internal address.
|
||||||
|
|
||||||
|
Many A2A agents are deployed with agent cards that contain internal URLs
|
||||||
|
like "http://0.0.0.0:8001/" or "http://localhost:8000/". This function
|
||||||
|
replaces such URLs with the provided base_url.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
agent_card: The agent card to fix
|
||||||
|
base_url: The base URL to use as replacement
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The agent card with the URL fixed if necessary
|
||||||
|
"""
|
||||||
|
card_url = getattr(agent_card, "url", None)
|
||||||
|
|
||||||
|
if card_url and is_localhost_or_internal_url(card_url):
|
||||||
|
# Normalize base_url to ensure it ends with /
|
||||||
|
fixed_url = base_url.rstrip("/") + "/"
|
||||||
|
agent_card.url = fixed_url
|
||||||
|
|
||||||
|
return agent_card
|
||||||
|
|
||||||
|
|
||||||
|
class LiteLLMA2ACardResolver(_A2ACardResolver): # type: ignore[misc]
|
||||||
|
"""
|
||||||
|
Custom A2A card resolver that supports multiple well-known paths.
|
||||||
|
|
||||||
|
Extends the base A2ACardResolver to try both:
|
||||||
|
- /.well-known/agent-card.json (standard)
|
||||||
|
- /.well-known/agent.json (previous/alternative)
|
||||||
|
"""
|
||||||
|
|
||||||
|
async def get_agent_card(
|
||||||
|
self,
|
||||||
|
relative_card_path: Optional[str] = None,
|
||||||
|
http_kwargs: Optional[Dict[str, Any]] = None,
|
||||||
|
) -> "AgentCard":
|
||||||
|
"""
|
||||||
|
Fetch the agent card, trying multiple well-known paths.
|
||||||
|
|
||||||
|
First tries the standard path, then falls back to the previous path.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
relative_card_path: Optional path to the agent card endpoint.
|
||||||
|
If None, tries both well-known paths.
|
||||||
|
http_kwargs: Optional dictionary of keyword arguments to pass to httpx.get
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
AgentCard from the A2A agent
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
A2AClientHTTPError or A2AClientJSONError if both paths fail
|
||||||
|
"""
|
||||||
|
# If a specific path is provided, use the parent implementation
|
||||||
|
if relative_card_path is not None:
|
||||||
|
return await super().get_agent_card(
|
||||||
|
relative_card_path=relative_card_path,
|
||||||
|
http_kwargs=http_kwargs,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Try both well-known paths
|
||||||
|
paths = [
|
||||||
|
AGENT_CARD_WELL_KNOWN_PATH,
|
||||||
|
PREV_AGENT_CARD_WELL_KNOWN_PATH,
|
||||||
|
]
|
||||||
|
|
||||||
|
last_error = None
|
||||||
|
for path in paths:
|
||||||
|
try:
|
||||||
|
verbose_logger.debug(
|
||||||
|
f"Attempting to fetch agent card from {self.base_url}{path}"
|
||||||
|
)
|
||||||
|
return await super().get_agent_card(
|
||||||
|
relative_card_path=path,
|
||||||
|
http_kwargs=http_kwargs,
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
verbose_logger.debug(
|
||||||
|
f"Failed to fetch agent card from {self.base_url}{path}: {e}"
|
||||||
|
)
|
||||||
|
last_error = e
|
||||||
|
continue
|
||||||
|
|
||||||
|
# If we get here, all paths failed - re-raise the last error
|
||||||
|
if last_error is not None:
|
||||||
|
raise last_error
|
||||||
|
|
||||||
|
# This shouldn't happen, but just in case
|
||||||
|
raise Exception(
|
||||||
|
f"Failed to fetch agent card from {self.base_url}. "
|
||||||
|
f"Tried paths: {', '.join(paths)}"
|
||||||
|
)
|
||||||
@@ -0,0 +1,109 @@
|
|||||||
|
"""
|
||||||
|
LiteLLM A2A Client class.
|
||||||
|
|
||||||
|
Provides a class-based interface for A2A agent invocation.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from typing import TYPE_CHECKING, AsyncIterator, Dict, Optional
|
||||||
|
|
||||||
|
from litellm.types.agents import LiteLLMSendMessageResponse
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from a2a.client import A2AClient as A2AClientType
|
||||||
|
from a2a.types import (
|
||||||
|
AgentCard,
|
||||||
|
SendMessageRequest,
|
||||||
|
SendStreamingMessageRequest,
|
||||||
|
SendStreamingMessageResponse,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class A2AClient:
|
||||||
|
"""
|
||||||
|
LiteLLM wrapper for A2A agent invocation.
|
||||||
|
|
||||||
|
Creates the underlying A2A client once on first use and reuses it.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
```python
|
||||||
|
from litellm.a2a_protocol import A2AClient
|
||||||
|
from a2a.types import SendMessageRequest, MessageSendParams
|
||||||
|
from uuid import uuid4
|
||||||
|
|
||||||
|
client = A2AClient(base_url="http://localhost:10001")
|
||||||
|
|
||||||
|
request = SendMessageRequest(
|
||||||
|
id=str(uuid4()),
|
||||||
|
params=MessageSendParams(
|
||||||
|
message={
|
||||||
|
"role": "user",
|
||||||
|
"parts": [{"kind": "text", "text": "Hello!"}],
|
||||||
|
"messageId": uuid4().hex,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
)
|
||||||
|
response = await client.send_message(request)
|
||||||
|
```
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
base_url: str,
|
||||||
|
timeout: float = 60.0,
|
||||||
|
extra_headers: Optional[Dict[str, str]] = None,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Initialize the A2A client wrapper.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
base_url: The base URL of the A2A agent (e.g., "http://localhost:10001")
|
||||||
|
timeout: Request timeout in seconds (default: 60.0)
|
||||||
|
extra_headers: Optional additional headers to include in requests
|
||||||
|
"""
|
||||||
|
self.base_url = base_url
|
||||||
|
self.timeout = timeout
|
||||||
|
self.extra_headers = extra_headers
|
||||||
|
self._a2a_client: Optional["A2AClientType"] = None
|
||||||
|
|
||||||
|
async def _get_client(self) -> "A2AClientType":
|
||||||
|
"""Get or create the underlying A2A client."""
|
||||||
|
if self._a2a_client is None:
|
||||||
|
from litellm.a2a_protocol.main import create_a2a_client
|
||||||
|
|
||||||
|
self._a2a_client = await create_a2a_client(
|
||||||
|
base_url=self.base_url,
|
||||||
|
timeout=self.timeout,
|
||||||
|
extra_headers=self.extra_headers,
|
||||||
|
)
|
||||||
|
return self._a2a_client
|
||||||
|
|
||||||
|
async def get_agent_card(self) -> "AgentCard":
|
||||||
|
"""Fetch the agent card from the server."""
|
||||||
|
from litellm.a2a_protocol.main import aget_agent_card
|
||||||
|
|
||||||
|
return await aget_agent_card(
|
||||||
|
base_url=self.base_url,
|
||||||
|
timeout=self.timeout,
|
||||||
|
extra_headers=self.extra_headers,
|
||||||
|
)
|
||||||
|
|
||||||
|
async def send_message(
|
||||||
|
self, request: "SendMessageRequest"
|
||||||
|
) -> LiteLLMSendMessageResponse:
|
||||||
|
"""Send a message to the A2A agent."""
|
||||||
|
from litellm.a2a_protocol.main import asend_message
|
||||||
|
|
||||||
|
a2a_client = await self._get_client()
|
||||||
|
return await asend_message(a2a_client=a2a_client, request=request)
|
||||||
|
|
||||||
|
async def send_message_streaming(
|
||||||
|
self, request: "SendStreamingMessageRequest"
|
||||||
|
) -> AsyncIterator["SendStreamingMessageResponse"]:
|
||||||
|
"""Send a streaming message to the A2A agent."""
|
||||||
|
from litellm.a2a_protocol.main import asend_message_streaming
|
||||||
|
|
||||||
|
a2a_client = await self._get_client()
|
||||||
|
async for chunk in asend_message_streaming(
|
||||||
|
a2a_client=a2a_client, request=request
|
||||||
|
):
|
||||||
|
yield chunk
|
||||||
@@ -0,0 +1,107 @@
|
|||||||
|
"""
|
||||||
|
Cost calculator for A2A (Agent-to-Agent) calls.
|
||||||
|
|
||||||
|
Supports dynamic cost parameters that allow platform owners
|
||||||
|
to define custom costs per agent query or per token.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from typing import TYPE_CHECKING, Any, Optional
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from litellm.litellm_core_utils.litellm_logging import (
|
||||||
|
Logging as LitellmLoggingObject,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
LitellmLoggingObject = Any
|
||||||
|
|
||||||
|
|
||||||
|
class A2ACostCalculator:
|
||||||
|
@staticmethod
|
||||||
|
def calculate_a2a_cost(
|
||||||
|
litellm_logging_obj: Optional[LitellmLoggingObject],
|
||||||
|
) -> float:
|
||||||
|
"""
|
||||||
|
Calculate the cost of an A2A send_message call.
|
||||||
|
|
||||||
|
Supports multiple cost parameters for platform owners:
|
||||||
|
- cost_per_query: Fixed cost per query
|
||||||
|
- input_cost_per_token + output_cost_per_token: Token-based pricing
|
||||||
|
|
||||||
|
Priority order:
|
||||||
|
1. response_cost - if set directly (backward compatibility)
|
||||||
|
2. cost_per_query - fixed cost per query
|
||||||
|
3. input_cost_per_token + output_cost_per_token - token-based cost
|
||||||
|
4. Default to 0.0
|
||||||
|
|
||||||
|
Args:
|
||||||
|
litellm_logging_obj: The LiteLLM logging object containing call details
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
float: The cost of the A2A call
|
||||||
|
"""
|
||||||
|
if litellm_logging_obj is None:
|
||||||
|
return 0.0
|
||||||
|
|
||||||
|
model_call_details = litellm_logging_obj.model_call_details
|
||||||
|
|
||||||
|
# Check if user set a custom response cost (backward compatibility)
|
||||||
|
response_cost = model_call_details.get("response_cost", None)
|
||||||
|
if response_cost is not None:
|
||||||
|
return float(response_cost)
|
||||||
|
|
||||||
|
# Get litellm_params for cost parameters
|
||||||
|
litellm_params = model_call_details.get("litellm_params", {}) or {}
|
||||||
|
|
||||||
|
# Check for cost_per_query (fixed cost per query)
|
||||||
|
if litellm_params.get("cost_per_query") is not None:
|
||||||
|
return float(litellm_params["cost_per_query"])
|
||||||
|
|
||||||
|
# Check for token-based pricing
|
||||||
|
input_cost_per_token = litellm_params.get("input_cost_per_token")
|
||||||
|
output_cost_per_token = litellm_params.get("output_cost_per_token")
|
||||||
|
|
||||||
|
if input_cost_per_token is not None or output_cost_per_token is not None:
|
||||||
|
return A2ACostCalculator._calculate_token_based_cost(
|
||||||
|
model_call_details=model_call_details,
|
||||||
|
input_cost_per_token=input_cost_per_token,
|
||||||
|
output_cost_per_token=output_cost_per_token,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Default to 0.0 for A2A calls
|
||||||
|
return 0.0
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _calculate_token_based_cost(
|
||||||
|
model_call_details: dict,
|
||||||
|
input_cost_per_token: Optional[float],
|
||||||
|
output_cost_per_token: Optional[float],
|
||||||
|
) -> float:
|
||||||
|
"""
|
||||||
|
Calculate cost based on token usage and per-token pricing.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
model_call_details: The model call details containing usage
|
||||||
|
input_cost_per_token: Cost per input token (can be None, defaults to 0)
|
||||||
|
output_cost_per_token: Cost per output token (can be None, defaults to 0)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
float: The calculated cost
|
||||||
|
"""
|
||||||
|
# Get usage from model_call_details
|
||||||
|
usage = model_call_details.get("usage")
|
||||||
|
if usage is None:
|
||||||
|
return 0.0
|
||||||
|
|
||||||
|
# Get token counts
|
||||||
|
prompt_tokens = getattr(usage, "prompt_tokens", 0) or 0
|
||||||
|
completion_tokens = getattr(usage, "completion_tokens", 0) or 0
|
||||||
|
|
||||||
|
# Calculate costs
|
||||||
|
input_cost = prompt_tokens * (
|
||||||
|
float(input_cost_per_token) if input_cost_per_token else 0.0
|
||||||
|
)
|
||||||
|
output_cost = completion_tokens * (
|
||||||
|
float(output_cost_per_token) if output_cost_per_token else 0.0
|
||||||
|
)
|
||||||
|
|
||||||
|
return input_cost + output_cost
|
||||||
@@ -0,0 +1,203 @@
|
|||||||
|
"""
|
||||||
|
A2A Protocol Exception Mapping Utils.
|
||||||
|
|
||||||
|
Maps A2A SDK exceptions to LiteLLM A2A exception types.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from typing import TYPE_CHECKING, Any, Optional
|
||||||
|
|
||||||
|
from litellm._logging import verbose_logger
|
||||||
|
from litellm.a2a_protocol.card_resolver import (
|
||||||
|
fix_agent_card_url,
|
||||||
|
is_localhost_or_internal_url,
|
||||||
|
)
|
||||||
|
from litellm.a2a_protocol.exceptions import (
|
||||||
|
A2AAgentCardError,
|
||||||
|
A2AConnectionError,
|
||||||
|
A2AError,
|
||||||
|
A2ALocalhostURLError,
|
||||||
|
)
|
||||||
|
from litellm.constants import CONNECTION_ERROR_PATTERNS
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from a2a.client import A2AClient as A2AClientType
|
||||||
|
|
||||||
|
|
||||||
|
# Runtime import
|
||||||
|
A2A_SDK_AVAILABLE = False
|
||||||
|
try:
|
||||||
|
from a2a.client import A2AClient as _A2AClient # type: ignore[no-redef]
|
||||||
|
|
||||||
|
A2A_SDK_AVAILABLE = True
|
||||||
|
except ImportError:
|
||||||
|
_A2AClient = None # type: ignore[assignment, misc]
|
||||||
|
|
||||||
|
|
||||||
|
class A2AExceptionCheckers:
|
||||||
|
"""
|
||||||
|
Helper class for checking various A2A error conditions.
|
||||||
|
"""
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def is_connection_error(error_str: str) -> bool:
|
||||||
|
"""
|
||||||
|
Check if an error string indicates a connection error.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
error_str: The error string to check
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if the error indicates a connection issue
|
||||||
|
"""
|
||||||
|
if not isinstance(error_str, str):
|
||||||
|
return False
|
||||||
|
|
||||||
|
error_str_lower = error_str.lower()
|
||||||
|
return any(pattern in error_str_lower for pattern in CONNECTION_ERROR_PATTERNS)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def is_localhost_url(url: Optional[str]) -> bool:
|
||||||
|
"""
|
||||||
|
Check if a URL is a localhost/internal URL.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
url: The URL to check
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if the URL is localhost/internal
|
||||||
|
"""
|
||||||
|
return is_localhost_or_internal_url(url)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def is_agent_card_error(error_str: str) -> bool:
|
||||||
|
"""
|
||||||
|
Check if an error string indicates an agent card error.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
error_str: The error string to check
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if the error is related to agent card fetching/parsing
|
||||||
|
"""
|
||||||
|
if not isinstance(error_str, str):
|
||||||
|
return False
|
||||||
|
|
||||||
|
error_str_lower = error_str.lower()
|
||||||
|
agent_card_patterns = [
|
||||||
|
"agent card",
|
||||||
|
"agent-card",
|
||||||
|
".well-known",
|
||||||
|
"card not found",
|
||||||
|
"invalid agent",
|
||||||
|
]
|
||||||
|
return any(pattern in error_str_lower for pattern in agent_card_patterns)
|
||||||
|
|
||||||
|
|
||||||
|
def map_a2a_exception(
|
||||||
|
original_exception: Exception,
|
||||||
|
card_url: Optional[str] = None,
|
||||||
|
api_base: Optional[str] = None,
|
||||||
|
model: Optional[str] = None,
|
||||||
|
) -> Exception:
|
||||||
|
"""
|
||||||
|
Map an A2A SDK exception to a LiteLLM A2A exception type.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
original_exception: The original exception from the A2A SDK
|
||||||
|
card_url: The URL from the agent card (if available)
|
||||||
|
api_base: The original API base URL
|
||||||
|
model: The model/agent name
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A mapped LiteLLM A2A exception
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
A2ALocalhostURLError: If the error is a connection error to a localhost URL
|
||||||
|
A2AConnectionError: If the error is a general connection error
|
||||||
|
A2AAgentCardError: If the error is related to agent card issues
|
||||||
|
A2AError: For other A2A-related errors
|
||||||
|
"""
|
||||||
|
error_str = str(original_exception)
|
||||||
|
|
||||||
|
# Check for localhost URL connection error (special case - retryable)
|
||||||
|
if (
|
||||||
|
card_url
|
||||||
|
and api_base
|
||||||
|
and A2AExceptionCheckers.is_localhost_url(card_url)
|
||||||
|
and A2AExceptionCheckers.is_connection_error(error_str)
|
||||||
|
):
|
||||||
|
raise A2ALocalhostURLError(
|
||||||
|
localhost_url=card_url,
|
||||||
|
base_url=api_base,
|
||||||
|
original_error=original_exception,
|
||||||
|
model=model,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Check for agent card errors
|
||||||
|
if A2AExceptionCheckers.is_agent_card_error(error_str):
|
||||||
|
raise A2AAgentCardError(
|
||||||
|
message=error_str,
|
||||||
|
url=api_base,
|
||||||
|
model=model,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Check for general connection errors
|
||||||
|
if A2AExceptionCheckers.is_connection_error(error_str):
|
||||||
|
raise A2AConnectionError(
|
||||||
|
message=error_str,
|
||||||
|
url=card_url or api_base,
|
||||||
|
model=model,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Default: wrap in generic A2AError
|
||||||
|
raise A2AError(
|
||||||
|
message=error_str,
|
||||||
|
model=model,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def handle_a2a_localhost_retry(
|
||||||
|
error: A2ALocalhostURLError,
|
||||||
|
agent_card: Any,
|
||||||
|
a2a_client: "A2AClientType",
|
||||||
|
is_streaming: bool = False,
|
||||||
|
) -> "A2AClientType":
|
||||||
|
"""
|
||||||
|
Handle A2ALocalhostURLError by fixing the URL and creating a new client.
|
||||||
|
|
||||||
|
This is called when we catch an A2ALocalhostURLError and want to retry
|
||||||
|
with the corrected URL.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
error: The localhost URL error
|
||||||
|
agent_card: The agent card object to fix
|
||||||
|
a2a_client: The current A2A client
|
||||||
|
is_streaming: Whether this is a streaming request (for logging)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A new A2A client with the fixed URL
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
ImportError: If the A2A SDK is not installed
|
||||||
|
"""
|
||||||
|
if not A2A_SDK_AVAILABLE or _A2AClient is None:
|
||||||
|
raise ImportError(
|
||||||
|
"A2A SDK is required for localhost retry handling. "
|
||||||
|
"Install it with: pip install a2a"
|
||||||
|
)
|
||||||
|
|
||||||
|
request_type = "streaming " if is_streaming else ""
|
||||||
|
verbose_logger.warning(
|
||||||
|
f"A2A {request_type}request to '{error.localhost_url}' failed: {error.original_error}. "
|
||||||
|
f"Agent card contains localhost/internal URL. "
|
||||||
|
f"Retrying with base_url '{error.base_url}'."
|
||||||
|
)
|
||||||
|
|
||||||
|
# Fix the agent card URL
|
||||||
|
fix_agent_card_url(agent_card, error.base_url)
|
||||||
|
|
||||||
|
# Create a new client with the fixed agent card (transport caches URL)
|
||||||
|
return _A2AClient(
|
||||||
|
httpx_client=a2a_client._transport.httpx_client, # type: ignore[union-attr]
|
||||||
|
agent_card=agent_card,
|
||||||
|
)
|
||||||
@@ -0,0 +1,150 @@
|
|||||||
|
"""
|
||||||
|
A2A Protocol Exceptions.
|
||||||
|
|
||||||
|
Custom exception types for A2A protocol operations, following LiteLLM's exception pattern.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
|
||||||
|
class A2AError(Exception):
|
||||||
|
"""
|
||||||
|
Base exception for A2A protocol errors.
|
||||||
|
|
||||||
|
Follows the same pattern as LiteLLM's main exceptions.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
message: str,
|
||||||
|
status_code: int = 500,
|
||||||
|
llm_provider: str = "a2a_agent",
|
||||||
|
model: Optional[str] = None,
|
||||||
|
response: Optional[httpx.Response] = None,
|
||||||
|
litellm_debug_info: Optional[str] = None,
|
||||||
|
max_retries: Optional[int] = None,
|
||||||
|
num_retries: Optional[int] = None,
|
||||||
|
):
|
||||||
|
self.status_code = status_code
|
||||||
|
self.message = f"litellm.A2AError: {message}"
|
||||||
|
self.llm_provider = llm_provider
|
||||||
|
self.model = model
|
||||||
|
self.litellm_debug_info = litellm_debug_info
|
||||||
|
self.max_retries = max_retries
|
||||||
|
self.num_retries = num_retries
|
||||||
|
self.response = response or httpx.Response(
|
||||||
|
status_code=self.status_code,
|
||||||
|
request=httpx.Request(method="POST", url="https://litellm.ai"),
|
||||||
|
)
|
||||||
|
super().__init__(self.message)
|
||||||
|
|
||||||
|
def __str__(self) -> str:
|
||||||
|
_message = self.message
|
||||||
|
if self.num_retries:
|
||||||
|
_message += f" LiteLLM Retried: {self.num_retries} times"
|
||||||
|
if self.max_retries:
|
||||||
|
_message += f", LiteLLM Max Retries: {self.max_retries}"
|
||||||
|
return _message
|
||||||
|
|
||||||
|
def __repr__(self) -> str:
|
||||||
|
return self.__str__()
|
||||||
|
|
||||||
|
|
||||||
|
class A2AConnectionError(A2AError):
|
||||||
|
"""
|
||||||
|
Raised when connection to an A2A agent fails.
|
||||||
|
|
||||||
|
This typically occurs when:
|
||||||
|
- The agent is unreachable
|
||||||
|
- The agent card contains a localhost/internal URL
|
||||||
|
- Network issues prevent connection
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
message: str,
|
||||||
|
url: Optional[str] = None,
|
||||||
|
model: Optional[str] = None,
|
||||||
|
response: Optional[httpx.Response] = None,
|
||||||
|
litellm_debug_info: Optional[str] = None,
|
||||||
|
max_retries: Optional[int] = None,
|
||||||
|
num_retries: Optional[int] = None,
|
||||||
|
):
|
||||||
|
self.url = url
|
||||||
|
super().__init__(
|
||||||
|
message=message,
|
||||||
|
status_code=503,
|
||||||
|
llm_provider="a2a_agent",
|
||||||
|
model=model,
|
||||||
|
response=response,
|
||||||
|
litellm_debug_info=litellm_debug_info,
|
||||||
|
max_retries=max_retries,
|
||||||
|
num_retries=num_retries,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class A2AAgentCardError(A2AError):
|
||||||
|
"""
|
||||||
|
Raised when there's an issue with the agent card.
|
||||||
|
|
||||||
|
This includes:
|
||||||
|
- Failed to fetch agent card
|
||||||
|
- Invalid agent card format
|
||||||
|
- Missing required fields
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
message: str,
|
||||||
|
url: Optional[str] = None,
|
||||||
|
model: Optional[str] = None,
|
||||||
|
response: Optional[httpx.Response] = None,
|
||||||
|
litellm_debug_info: Optional[str] = None,
|
||||||
|
):
|
||||||
|
self.url = url
|
||||||
|
super().__init__(
|
||||||
|
message=message,
|
||||||
|
status_code=404,
|
||||||
|
llm_provider="a2a_agent",
|
||||||
|
model=model,
|
||||||
|
response=response,
|
||||||
|
litellm_debug_info=litellm_debug_info,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class A2ALocalhostURLError(A2AConnectionError):
|
||||||
|
"""
|
||||||
|
Raised when an agent card contains a localhost/internal URL.
|
||||||
|
|
||||||
|
Many A2A agents are deployed with agent cards that contain internal URLs
|
||||||
|
like "http://0.0.0.0:8001/" or "http://localhost:8000/". This error
|
||||||
|
indicates that the URL needs to be corrected and the request should be retried.
|
||||||
|
|
||||||
|
Attributes:
|
||||||
|
localhost_url: The localhost/internal URL found in the agent card
|
||||||
|
base_url: The public base URL that should be used instead
|
||||||
|
original_error: The original connection error that was raised
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
localhost_url: str,
|
||||||
|
base_url: str,
|
||||||
|
original_error: Optional[Exception] = None,
|
||||||
|
model: Optional[str] = None,
|
||||||
|
):
|
||||||
|
self.localhost_url = localhost_url
|
||||||
|
self.base_url = base_url
|
||||||
|
self.original_error = original_error
|
||||||
|
|
||||||
|
message = (
|
||||||
|
f"Agent card contains localhost/internal URL '{localhost_url}'. "
|
||||||
|
f"Retrying with base URL '{base_url}'."
|
||||||
|
)
|
||||||
|
super().__init__(
|
||||||
|
message=message,
|
||||||
|
url=localhost_url,
|
||||||
|
model=model,
|
||||||
|
)
|
||||||
@@ -0,0 +1,74 @@
|
|||||||
|
# A2A to LiteLLM Completion Bridge
|
||||||
|
|
||||||
|
Routes A2A protocol requests through `litellm.acompletion`, enabling any LiteLLM-supported provider to be invoked via A2A.
|
||||||
|
|
||||||
|
## Flow
|
||||||
|
|
||||||
|
```
|
||||||
|
A2A Request → Transform → litellm.acompletion → Transform → A2A Response
|
||||||
|
```
|
||||||
|
|
||||||
|
## SDK Usage
|
||||||
|
|
||||||
|
Use the existing `asend_message` and `asend_message_streaming` functions with `litellm_params`:
|
||||||
|
|
||||||
|
```python
|
||||||
|
from litellm.a2a_protocol import asend_message, asend_message_streaming
|
||||||
|
from a2a.types import SendMessageRequest, SendStreamingMessageRequest, MessageSendParams
|
||||||
|
from uuid import uuid4
|
||||||
|
|
||||||
|
# Non-streaming
|
||||||
|
request = SendMessageRequest(
|
||||||
|
id=str(uuid4()),
|
||||||
|
params=MessageSendParams(
|
||||||
|
message={"role": "user", "parts": [{"kind": "text", "text": "Hello!"}], "messageId": uuid4().hex}
|
||||||
|
)
|
||||||
|
)
|
||||||
|
response = await asend_message(
|
||||||
|
request=request,
|
||||||
|
api_base="http://localhost:2024",
|
||||||
|
litellm_params={"custom_llm_provider": "langgraph", "model": "agent"},
|
||||||
|
)
|
||||||
|
|
||||||
|
# Streaming
|
||||||
|
stream_request = SendStreamingMessageRequest(
|
||||||
|
id=str(uuid4()),
|
||||||
|
params=MessageSendParams(
|
||||||
|
message={"role": "user", "parts": [{"kind": "text", "text": "Hello!"}], "messageId": uuid4().hex}
|
||||||
|
)
|
||||||
|
)
|
||||||
|
async for chunk in asend_message_streaming(
|
||||||
|
request=stream_request,
|
||||||
|
api_base="http://localhost:2024",
|
||||||
|
litellm_params={"custom_llm_provider": "langgraph", "model": "agent"},
|
||||||
|
):
|
||||||
|
print(chunk)
|
||||||
|
```
|
||||||
|
|
||||||
|
## Proxy Usage
|
||||||
|
|
||||||
|
Configure an agent with `custom_llm_provider` in `litellm_params`:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
agents:
|
||||||
|
- agent_name: my-langgraph-agent
|
||||||
|
agent_card_params:
|
||||||
|
name: "LangGraph Agent"
|
||||||
|
url: "http://localhost:2024" # Used as api_base
|
||||||
|
litellm_params:
|
||||||
|
custom_llm_provider: langgraph
|
||||||
|
model: agent
|
||||||
|
```
|
||||||
|
|
||||||
|
When an A2A request hits `/a2a/{agent_id}/message/send`, the bridge:
|
||||||
|
|
||||||
|
1. Detects `custom_llm_provider` in agent's `litellm_params`
|
||||||
|
2. Transforms A2A message → OpenAI messages
|
||||||
|
3. Calls `litellm.acompletion(model="langgraph/agent", api_base="http://localhost:2024")`
|
||||||
|
4. Transforms response → A2A format
|
||||||
|
|
||||||
|
## Classes
|
||||||
|
|
||||||
|
- `A2ACompletionBridgeTransformation` - Static methods for message format conversion
|
||||||
|
- `A2ACompletionBridgeHandler` - Static methods for handling requests (streaming/non-streaming)
|
||||||
|
|
||||||
@@ -0,0 +1,23 @@
|
|||||||
|
"""
|
||||||
|
A2A to LiteLLM Completion Bridge.
|
||||||
|
|
||||||
|
This module provides transformation between A2A protocol messages and
|
||||||
|
LiteLLM completion API, enabling any LiteLLM-supported provider to be
|
||||||
|
invoked via the A2A protocol.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from litellm.a2a_protocol.litellm_completion_bridge.handler import (
|
||||||
|
A2ACompletionBridgeHandler,
|
||||||
|
handle_a2a_completion,
|
||||||
|
handle_a2a_completion_streaming,
|
||||||
|
)
|
||||||
|
from litellm.a2a_protocol.litellm_completion_bridge.transformation import (
|
||||||
|
A2ACompletionBridgeTransformation,
|
||||||
|
)
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"A2ACompletionBridgeTransformation",
|
||||||
|
"A2ACompletionBridgeHandler",
|
||||||
|
"handle_a2a_completion",
|
||||||
|
"handle_a2a_completion_streaming",
|
||||||
|
]
|
||||||
@@ -0,0 +1,299 @@
|
|||||||
|
"""
|
||||||
|
Handler for A2A to LiteLLM completion bridge.
|
||||||
|
|
||||||
|
Routes A2A requests through litellm.acompletion based on custom_llm_provider.
|
||||||
|
|
||||||
|
A2A Streaming Events (in order):
|
||||||
|
1. Task event (kind: "task") - Initial task creation with status "submitted"
|
||||||
|
2. Status update (kind: "status-update") - Status change to "working"
|
||||||
|
3. Artifact update (kind: "artifact-update") - Content/artifact delivery
|
||||||
|
4. Status update (kind: "status-update") - Final status "completed" with final=true
|
||||||
|
"""
|
||||||
|
|
||||||
|
from typing import Any, AsyncIterator, Dict, Optional
|
||||||
|
|
||||||
|
import litellm
|
||||||
|
from litellm._logging import verbose_logger
|
||||||
|
from litellm.a2a_protocol.litellm_completion_bridge.transformation import (
|
||||||
|
A2ACompletionBridgeTransformation,
|
||||||
|
A2AStreamingContext,
|
||||||
|
)
|
||||||
|
from litellm.a2a_protocol.providers.config_manager import A2AProviderConfigManager
|
||||||
|
|
||||||
|
|
||||||
|
class A2ACompletionBridgeHandler:
|
||||||
|
"""
|
||||||
|
Static methods for handling A2A requests via LiteLLM completion.
|
||||||
|
"""
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
async def handle_non_streaming(
|
||||||
|
request_id: str,
|
||||||
|
params: Dict[str, Any],
|
||||||
|
litellm_params: Dict[str, Any],
|
||||||
|
api_base: Optional[str] = None,
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Handle non-streaming A2A request via litellm.acompletion.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
request_id: A2A JSON-RPC request ID
|
||||||
|
params: A2A MessageSendParams containing the message
|
||||||
|
litellm_params: Agent's litellm_params (custom_llm_provider, model, etc.)
|
||||||
|
api_base: API base URL from agent_card_params
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A2A SendMessageResponse dict
|
||||||
|
"""
|
||||||
|
# Get provider config for custom_llm_provider
|
||||||
|
custom_llm_provider = litellm_params.get("custom_llm_provider")
|
||||||
|
a2a_provider_config = A2AProviderConfigManager.get_provider_config(
|
||||||
|
custom_llm_provider=custom_llm_provider
|
||||||
|
)
|
||||||
|
|
||||||
|
# If provider config exists, use it
|
||||||
|
if a2a_provider_config is not None:
|
||||||
|
if api_base is None:
|
||||||
|
raise ValueError(f"api_base is required for {custom_llm_provider}")
|
||||||
|
|
||||||
|
verbose_logger.info(f"A2A: Using provider config for {custom_llm_provider}")
|
||||||
|
|
||||||
|
response_data = await a2a_provider_config.handle_non_streaming(
|
||||||
|
request_id=request_id,
|
||||||
|
params=params,
|
||||||
|
api_base=api_base,
|
||||||
|
)
|
||||||
|
|
||||||
|
return response_data
|
||||||
|
|
||||||
|
# Extract message from params
|
||||||
|
message = params.get("message", {})
|
||||||
|
|
||||||
|
# Transform A2A message to OpenAI format
|
||||||
|
openai_messages = (
|
||||||
|
A2ACompletionBridgeTransformation.a2a_message_to_openai_messages(message)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Get completion params
|
||||||
|
custom_llm_provider = litellm_params.get("custom_llm_provider")
|
||||||
|
model = litellm_params.get("model", "agent")
|
||||||
|
|
||||||
|
# Build full model string if provider specified
|
||||||
|
# Skip prepending if model already starts with the provider prefix
|
||||||
|
if custom_llm_provider and not model.startswith(f"{custom_llm_provider}/"):
|
||||||
|
full_model = f"{custom_llm_provider}/{model}"
|
||||||
|
else:
|
||||||
|
full_model = model
|
||||||
|
|
||||||
|
verbose_logger.info(
|
||||||
|
f"A2A completion bridge: model={full_model}, api_base={api_base}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Build completion params dict
|
||||||
|
completion_params = {
|
||||||
|
"model": full_model,
|
||||||
|
"messages": openai_messages,
|
||||||
|
"api_base": api_base,
|
||||||
|
"stream": False,
|
||||||
|
}
|
||||||
|
# Add litellm_params (contains api_key, client_id, client_secret, tenant_id, etc.)
|
||||||
|
litellm_params_to_add = {
|
||||||
|
k: v
|
||||||
|
for k, v in litellm_params.items()
|
||||||
|
if k not in ("model", "custom_llm_provider")
|
||||||
|
}
|
||||||
|
completion_params.update(litellm_params_to_add)
|
||||||
|
|
||||||
|
# Call litellm.acompletion
|
||||||
|
response = await litellm.acompletion(**completion_params)
|
||||||
|
|
||||||
|
# Transform response to A2A format
|
||||||
|
a2a_response = (
|
||||||
|
A2ACompletionBridgeTransformation.openai_response_to_a2a_response(
|
||||||
|
response=response,
|
||||||
|
request_id=request_id,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
verbose_logger.info(f"A2A completion bridge completed: request_id={request_id}")
|
||||||
|
|
||||||
|
return a2a_response
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
async def handle_streaming(
|
||||||
|
request_id: str,
|
||||||
|
params: Dict[str, Any],
|
||||||
|
litellm_params: Dict[str, Any],
|
||||||
|
api_base: Optional[str] = None,
|
||||||
|
) -> AsyncIterator[Dict[str, Any]]:
|
||||||
|
"""
|
||||||
|
Handle streaming A2A request via litellm.acompletion with stream=True.
|
||||||
|
|
||||||
|
Emits proper A2A streaming events:
|
||||||
|
1. Task event (kind: "task") - Initial task with status "submitted"
|
||||||
|
2. Status update (kind: "status-update") - Status "working"
|
||||||
|
3. Artifact update (kind: "artifact-update") - Content delivery
|
||||||
|
4. Status update (kind: "status-update") - Final "completed" status
|
||||||
|
|
||||||
|
Args:
|
||||||
|
request_id: A2A JSON-RPC request ID
|
||||||
|
params: A2A MessageSendParams containing the message
|
||||||
|
litellm_params: Agent's litellm_params (custom_llm_provider, model, etc.)
|
||||||
|
api_base: API base URL from agent_card_params
|
||||||
|
|
||||||
|
Yields:
|
||||||
|
A2A streaming response events
|
||||||
|
"""
|
||||||
|
# Get provider config for custom_llm_provider
|
||||||
|
custom_llm_provider = litellm_params.get("custom_llm_provider")
|
||||||
|
a2a_provider_config = A2AProviderConfigManager.get_provider_config(
|
||||||
|
custom_llm_provider=custom_llm_provider
|
||||||
|
)
|
||||||
|
|
||||||
|
# If provider config exists, use it
|
||||||
|
if a2a_provider_config is not None:
|
||||||
|
if api_base is None:
|
||||||
|
raise ValueError(f"api_base is required for {custom_llm_provider}")
|
||||||
|
|
||||||
|
verbose_logger.info(
|
||||||
|
f"A2A: Using provider config for {custom_llm_provider} (streaming)"
|
||||||
|
)
|
||||||
|
|
||||||
|
async for chunk in a2a_provider_config.handle_streaming(
|
||||||
|
request_id=request_id,
|
||||||
|
params=params,
|
||||||
|
api_base=api_base,
|
||||||
|
):
|
||||||
|
yield chunk
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
# Extract message from params
|
||||||
|
message = params.get("message", {})
|
||||||
|
|
||||||
|
# Create streaming context
|
||||||
|
ctx = A2AStreamingContext(
|
||||||
|
request_id=request_id,
|
||||||
|
input_message=message,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Transform A2A message to OpenAI format
|
||||||
|
openai_messages = (
|
||||||
|
A2ACompletionBridgeTransformation.a2a_message_to_openai_messages(message)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Get completion params
|
||||||
|
custom_llm_provider = litellm_params.get("custom_llm_provider")
|
||||||
|
model = litellm_params.get("model", "agent")
|
||||||
|
|
||||||
|
# Build full model string if provider specified
|
||||||
|
# Skip prepending if model already starts with the provider prefix
|
||||||
|
if custom_llm_provider and not model.startswith(f"{custom_llm_provider}/"):
|
||||||
|
full_model = f"{custom_llm_provider}/{model}"
|
||||||
|
else:
|
||||||
|
full_model = model
|
||||||
|
|
||||||
|
verbose_logger.info(
|
||||||
|
f"A2A completion bridge streaming: model={full_model}, api_base={api_base}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Build completion params dict
|
||||||
|
completion_params = {
|
||||||
|
"model": full_model,
|
||||||
|
"messages": openai_messages,
|
||||||
|
"api_base": api_base,
|
||||||
|
"stream": True,
|
||||||
|
}
|
||||||
|
# Add litellm_params (contains api_key, client_id, client_secret, tenant_id, etc.)
|
||||||
|
litellm_params_to_add = {
|
||||||
|
k: v
|
||||||
|
for k, v in litellm_params.items()
|
||||||
|
if k not in ("model", "custom_llm_provider")
|
||||||
|
}
|
||||||
|
completion_params.update(litellm_params_to_add)
|
||||||
|
|
||||||
|
# 1. Emit initial task event (kind: "task", status: "submitted")
|
||||||
|
task_event = A2ACompletionBridgeTransformation.create_task_event(ctx)
|
||||||
|
yield task_event
|
||||||
|
|
||||||
|
# 2. Emit status update (kind: "status-update", status: "working")
|
||||||
|
working_event = A2ACompletionBridgeTransformation.create_status_update_event(
|
||||||
|
ctx=ctx,
|
||||||
|
state="working",
|
||||||
|
final=False,
|
||||||
|
message_text="Processing request...",
|
||||||
|
)
|
||||||
|
yield working_event
|
||||||
|
|
||||||
|
# Call litellm.acompletion with streaming
|
||||||
|
response = await litellm.acompletion(**completion_params)
|
||||||
|
|
||||||
|
# 3. Accumulate content and emit artifact update
|
||||||
|
accumulated_text = ""
|
||||||
|
chunk_count = 0
|
||||||
|
async for chunk in response: # type: ignore[union-attr]
|
||||||
|
chunk_count += 1
|
||||||
|
|
||||||
|
# Extract delta content
|
||||||
|
content = ""
|
||||||
|
if chunk is not None and hasattr(chunk, "choices") and chunk.choices:
|
||||||
|
choice = chunk.choices[0]
|
||||||
|
if hasattr(choice, "delta") and choice.delta:
|
||||||
|
content = choice.delta.content or ""
|
||||||
|
|
||||||
|
if content:
|
||||||
|
accumulated_text += content
|
||||||
|
|
||||||
|
# Emit artifact update with accumulated content
|
||||||
|
if accumulated_text:
|
||||||
|
artifact_event = (
|
||||||
|
A2ACompletionBridgeTransformation.create_artifact_update_event(
|
||||||
|
ctx=ctx,
|
||||||
|
text=accumulated_text,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
yield artifact_event
|
||||||
|
|
||||||
|
# 4. Emit final status update (kind: "status-update", status: "completed", final: true)
|
||||||
|
completed_event = A2ACompletionBridgeTransformation.create_status_update_event(
|
||||||
|
ctx=ctx,
|
||||||
|
state="completed",
|
||||||
|
final=True,
|
||||||
|
)
|
||||||
|
yield completed_event
|
||||||
|
|
||||||
|
verbose_logger.info(
|
||||||
|
f"A2A completion bridge streaming completed: request_id={request_id}, chunks={chunk_count}"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# Convenience functions that delegate to the class methods
|
||||||
|
async def handle_a2a_completion(
|
||||||
|
request_id: str,
|
||||||
|
params: Dict[str, Any],
|
||||||
|
litellm_params: Dict[str, Any],
|
||||||
|
api_base: Optional[str] = None,
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""Convenience function for non-streaming A2A completion."""
|
||||||
|
return await A2ACompletionBridgeHandler.handle_non_streaming(
|
||||||
|
request_id=request_id,
|
||||||
|
params=params,
|
||||||
|
litellm_params=litellm_params,
|
||||||
|
api_base=api_base,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def handle_a2a_completion_streaming(
|
||||||
|
request_id: str,
|
||||||
|
params: Dict[str, Any],
|
||||||
|
litellm_params: Dict[str, Any],
|
||||||
|
api_base: Optional[str] = None,
|
||||||
|
) -> AsyncIterator[Dict[str, Any]]:
|
||||||
|
"""Convenience function for streaming A2A completion."""
|
||||||
|
async for chunk in A2ACompletionBridgeHandler.handle_streaming(
|
||||||
|
request_id=request_id,
|
||||||
|
params=params,
|
||||||
|
litellm_params=litellm_params,
|
||||||
|
api_base=api_base,
|
||||||
|
):
|
||||||
|
yield chunk
|
||||||
@@ -0,0 +1,284 @@
|
|||||||
|
"""
|
||||||
|
Transformation utilities for A2A <-> OpenAI message format conversion.
|
||||||
|
|
||||||
|
A2A Message Format:
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"parts": [{"kind": "text", "text": "Hello!"}],
|
||||||
|
"messageId": "abc123"
|
||||||
|
}
|
||||||
|
|
||||||
|
OpenAI Message Format:
|
||||||
|
{"role": "user", "content": "Hello!"}
|
||||||
|
|
||||||
|
A2A Streaming Events:
|
||||||
|
- Task event (kind: "task") - Initial task creation with status "submitted"
|
||||||
|
- Status update (kind: "status-update") - Status changes (working, completed)
|
||||||
|
- Artifact update (kind: "artifact-update") - Content/artifact delivery
|
||||||
|
"""
|
||||||
|
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from typing import Any, Dict, List, Optional
|
||||||
|
from uuid import uuid4
|
||||||
|
|
||||||
|
from litellm._logging import verbose_logger
|
||||||
|
|
||||||
|
|
||||||
|
class A2AStreamingContext:
|
||||||
|
"""
|
||||||
|
Context holder for A2A streaming state.
|
||||||
|
Tracks task_id, context_id, and message accumulation.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, request_id: str, input_message: Dict[str, Any]):
|
||||||
|
self.request_id = request_id
|
||||||
|
self.task_id = str(uuid4())
|
||||||
|
self.context_id = str(uuid4())
|
||||||
|
self.input_message = input_message
|
||||||
|
self.accumulated_text = ""
|
||||||
|
self.has_emitted_task = False
|
||||||
|
self.has_emitted_working = False
|
||||||
|
|
||||||
|
|
||||||
|
class A2ACompletionBridgeTransformation:
|
||||||
|
"""
|
||||||
|
Static methods for transforming between A2A and OpenAI message formats.
|
||||||
|
"""
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def a2a_message_to_openai_messages(
|
||||||
|
a2a_message: Dict[str, Any],
|
||||||
|
) -> List[Dict[str, str]]:
|
||||||
|
"""
|
||||||
|
Transform an A2A message to OpenAI message format.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
a2a_message: A2A message with role, parts, and messageId
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of OpenAI-format messages
|
||||||
|
"""
|
||||||
|
role = a2a_message.get("role", "user")
|
||||||
|
parts = a2a_message.get("parts", [])
|
||||||
|
|
||||||
|
# Map A2A roles to OpenAI roles
|
||||||
|
openai_role = role
|
||||||
|
if role == "user":
|
||||||
|
openai_role = "user"
|
||||||
|
elif role == "assistant":
|
||||||
|
openai_role = "assistant"
|
||||||
|
elif role == "system":
|
||||||
|
openai_role = "system"
|
||||||
|
|
||||||
|
# Extract text content from parts
|
||||||
|
content_parts = []
|
||||||
|
for part in parts:
|
||||||
|
kind = part.get("kind", "")
|
||||||
|
if kind == "text":
|
||||||
|
text = part.get("text", "")
|
||||||
|
content_parts.append(text)
|
||||||
|
|
||||||
|
content = "\n".join(content_parts) if content_parts else ""
|
||||||
|
|
||||||
|
verbose_logger.debug(
|
||||||
|
f"A2A -> OpenAI transform: role={role} -> {openai_role}, content_length={len(content)}"
|
||||||
|
)
|
||||||
|
|
||||||
|
return [{"role": openai_role, "content": content}]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def openai_response_to_a2a_response(
|
||||||
|
response: Any,
|
||||||
|
request_id: Optional[str] = None,
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Transform a LiteLLM ModelResponse to A2A SendMessageResponse format.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
response: LiteLLM ModelResponse object
|
||||||
|
request_id: Original A2A request ID
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A2A SendMessageResponse dict
|
||||||
|
"""
|
||||||
|
# Extract content from response
|
||||||
|
content = ""
|
||||||
|
if hasattr(response, "choices") and response.choices:
|
||||||
|
choice = response.choices[0]
|
||||||
|
if hasattr(choice, "message") and choice.message:
|
||||||
|
content = choice.message.content or ""
|
||||||
|
|
||||||
|
# Build A2A message
|
||||||
|
a2a_message = {
|
||||||
|
"role": "agent",
|
||||||
|
"parts": [{"kind": "text", "text": content}],
|
||||||
|
"messageId": uuid4().hex,
|
||||||
|
}
|
||||||
|
|
||||||
|
# Build A2A response
|
||||||
|
a2a_response = {
|
||||||
|
"jsonrpc": "2.0",
|
||||||
|
"id": request_id,
|
||||||
|
"result": {
|
||||||
|
"message": a2a_message,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
verbose_logger.debug(f"OpenAI -> A2A transform: content_length={len(content)}")
|
||||||
|
|
||||||
|
return a2a_response
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _get_timestamp() -> str:
|
||||||
|
"""Get current timestamp in ISO format with timezone."""
|
||||||
|
return datetime.now(timezone.utc).isoformat()
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def create_task_event(
|
||||||
|
ctx: A2AStreamingContext,
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Create the initial task event with status 'submitted'.
|
||||||
|
|
||||||
|
This is the first event emitted in an A2A streaming response.
|
||||||
|
"""
|
||||||
|
return {
|
||||||
|
"id": ctx.request_id,
|
||||||
|
"jsonrpc": "2.0",
|
||||||
|
"result": {
|
||||||
|
"contextId": ctx.context_id,
|
||||||
|
"history": [
|
||||||
|
{
|
||||||
|
"contextId": ctx.context_id,
|
||||||
|
"kind": "message",
|
||||||
|
"messageId": ctx.input_message.get("messageId", uuid4().hex),
|
||||||
|
"parts": ctx.input_message.get("parts", []),
|
||||||
|
"role": ctx.input_message.get("role", "user"),
|
||||||
|
"taskId": ctx.task_id,
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"id": ctx.task_id,
|
||||||
|
"kind": "task",
|
||||||
|
"status": {
|
||||||
|
"state": "submitted",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def create_status_update_event(
|
||||||
|
ctx: A2AStreamingContext,
|
||||||
|
state: str,
|
||||||
|
final: bool = False,
|
||||||
|
message_text: Optional[str] = None,
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Create a status update event.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
ctx: Streaming context
|
||||||
|
state: Status state ('working', 'completed')
|
||||||
|
final: Whether this is the final event
|
||||||
|
message_text: Optional message text for 'working' status
|
||||||
|
"""
|
||||||
|
status: Dict[str, Any] = {
|
||||||
|
"state": state,
|
||||||
|
"timestamp": A2ACompletionBridgeTransformation._get_timestamp(),
|
||||||
|
}
|
||||||
|
|
||||||
|
# Add message for 'working' status
|
||||||
|
if state == "working" and message_text:
|
||||||
|
status["message"] = {
|
||||||
|
"contextId": ctx.context_id,
|
||||||
|
"kind": "message",
|
||||||
|
"messageId": str(uuid4()),
|
||||||
|
"parts": [{"kind": "text", "text": message_text}],
|
||||||
|
"role": "agent",
|
||||||
|
"taskId": ctx.task_id,
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
"id": ctx.request_id,
|
||||||
|
"jsonrpc": "2.0",
|
||||||
|
"result": {
|
||||||
|
"contextId": ctx.context_id,
|
||||||
|
"final": final,
|
||||||
|
"kind": "status-update",
|
||||||
|
"status": status,
|
||||||
|
"taskId": ctx.task_id,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def create_artifact_update_event(
|
||||||
|
ctx: A2AStreamingContext,
|
||||||
|
text: str,
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Create an artifact update event with content.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
ctx: Streaming context
|
||||||
|
text: The text content for the artifact
|
||||||
|
"""
|
||||||
|
return {
|
||||||
|
"id": ctx.request_id,
|
||||||
|
"jsonrpc": "2.0",
|
||||||
|
"result": {
|
||||||
|
"artifact": {
|
||||||
|
"artifactId": str(uuid4()),
|
||||||
|
"name": "response",
|
||||||
|
"parts": [{"kind": "text", "text": text}],
|
||||||
|
},
|
||||||
|
"contextId": ctx.context_id,
|
||||||
|
"kind": "artifact-update",
|
||||||
|
"taskId": ctx.task_id,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def openai_chunk_to_a2a_chunk(
|
||||||
|
chunk: Any,
|
||||||
|
request_id: Optional[str] = None,
|
||||||
|
is_final: bool = False,
|
||||||
|
) -> Optional[Dict[str, Any]]:
|
||||||
|
"""
|
||||||
|
Transform a LiteLLM streaming chunk to A2A streaming format.
|
||||||
|
|
||||||
|
NOTE: This method is deprecated for streaming. Use the event-based
|
||||||
|
methods (create_task_event, create_status_update_event,
|
||||||
|
create_artifact_update_event) instead for proper A2A streaming.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
chunk: LiteLLM ModelResponse chunk
|
||||||
|
request_id: Original A2A request ID
|
||||||
|
is_final: Whether this is the final chunk
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A2A streaming chunk dict or None if no content
|
||||||
|
"""
|
||||||
|
# Extract delta content
|
||||||
|
content = ""
|
||||||
|
if chunk is not None and hasattr(chunk, "choices") and chunk.choices:
|
||||||
|
choice = chunk.choices[0]
|
||||||
|
if hasattr(choice, "delta") and choice.delta:
|
||||||
|
content = choice.delta.content or ""
|
||||||
|
|
||||||
|
if not content and not is_final:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Build A2A streaming chunk (legacy format)
|
||||||
|
a2a_chunk = {
|
||||||
|
"jsonrpc": "2.0",
|
||||||
|
"id": request_id,
|
||||||
|
"result": {
|
||||||
|
"message": {
|
||||||
|
"role": "agent",
|
||||||
|
"parts": [{"kind": "text", "text": content}],
|
||||||
|
"messageId": uuid4().hex,
|
||||||
|
},
|
||||||
|
"final": is_final,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
return a2a_chunk
|
||||||
@@ -0,0 +1,744 @@
|
|||||||
|
"""
|
||||||
|
LiteLLM A2A SDK functions.
|
||||||
|
|
||||||
|
Provides standalone functions with @client decorator for LiteLLM logging integration.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import datetime
|
||||||
|
import uuid
|
||||||
|
from typing import TYPE_CHECKING, Any, AsyncIterator, Coroutine, Dict, Optional, Union
|
||||||
|
|
||||||
|
import litellm
|
||||||
|
from litellm._logging import verbose_logger, verbose_proxy_logger
|
||||||
|
from litellm.a2a_protocol.streaming_iterator import A2AStreamingIterator
|
||||||
|
from litellm.a2a_protocol.utils import A2ARequestUtils
|
||||||
|
from litellm.constants import DEFAULT_A2A_AGENT_TIMEOUT
|
||||||
|
from litellm.litellm_core_utils.litellm_logging import Logging
|
||||||
|
from litellm.llms.custom_httpx.http_handler import (
|
||||||
|
get_async_httpx_client,
|
||||||
|
httpxSpecialProvider,
|
||||||
|
)
|
||||||
|
from litellm.types.agents import LiteLLMSendMessageResponse
|
||||||
|
from litellm.utils import client
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from a2a.client import A2AClient as A2AClientType
|
||||||
|
from a2a.types import AgentCard, SendMessageRequest, SendStreamingMessageRequest
|
||||||
|
|
||||||
|
# Runtime imports with availability check
|
||||||
|
A2A_SDK_AVAILABLE = False
|
||||||
|
A2ACardResolver: Any = None
|
||||||
|
_A2AClient: Any = None
|
||||||
|
|
||||||
|
try:
|
||||||
|
from a2a.client import A2AClient as _A2AClient # type: ignore[no-redef]
|
||||||
|
|
||||||
|
A2A_SDK_AVAILABLE = True
|
||||||
|
except ImportError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Import our custom card resolver that supports multiple well-known paths
|
||||||
|
from litellm.a2a_protocol.card_resolver import LiteLLMA2ACardResolver
|
||||||
|
from litellm.a2a_protocol.exception_mapping_utils import (
|
||||||
|
handle_a2a_localhost_retry,
|
||||||
|
map_a2a_exception,
|
||||||
|
)
|
||||||
|
from litellm.a2a_protocol.exceptions import A2ALocalhostURLError
|
||||||
|
|
||||||
|
# Use our custom resolver instead of the default A2A SDK resolver
|
||||||
|
A2ACardResolver = LiteLLMA2ACardResolver
|
||||||
|
|
||||||
|
|
||||||
|
def _set_usage_on_logging_obj(
|
||||||
|
kwargs: Dict[str, Any],
|
||||||
|
prompt_tokens: int,
|
||||||
|
completion_tokens: int,
|
||||||
|
) -> None:
|
||||||
|
"""
|
||||||
|
Set usage on litellm_logging_obj for standard logging payload.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
kwargs: The kwargs dict containing litellm_logging_obj
|
||||||
|
prompt_tokens: Number of input tokens
|
||||||
|
completion_tokens: Number of output tokens
|
||||||
|
"""
|
||||||
|
litellm_logging_obj = kwargs.get("litellm_logging_obj")
|
||||||
|
if litellm_logging_obj is not None:
|
||||||
|
usage = litellm.Usage(
|
||||||
|
prompt_tokens=prompt_tokens,
|
||||||
|
completion_tokens=completion_tokens,
|
||||||
|
total_tokens=prompt_tokens + completion_tokens,
|
||||||
|
)
|
||||||
|
litellm_logging_obj.model_call_details["usage"] = usage
|
||||||
|
|
||||||
|
|
||||||
|
def _set_agent_id_on_logging_obj(
|
||||||
|
kwargs: Dict[str, Any],
|
||||||
|
agent_id: Optional[str],
|
||||||
|
) -> None:
|
||||||
|
"""
|
||||||
|
Set agent_id on litellm_logging_obj for SpendLogs tracking.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
kwargs: The kwargs dict containing litellm_logging_obj
|
||||||
|
agent_id: The A2A agent ID
|
||||||
|
"""
|
||||||
|
if agent_id is None:
|
||||||
|
return
|
||||||
|
|
||||||
|
litellm_logging_obj = kwargs.get("litellm_logging_obj")
|
||||||
|
if litellm_logging_obj is not None:
|
||||||
|
# Set agent_id directly on model_call_details (same pattern as custom_llm_provider)
|
||||||
|
litellm_logging_obj.model_call_details["agent_id"] = agent_id
|
||||||
|
|
||||||
|
|
||||||
|
def _get_a2a_model_info(a2a_client: Any, kwargs: Dict[str, Any]) -> str:
|
||||||
|
"""
|
||||||
|
Extract agent info and set model/custom_llm_provider for cost tracking.
|
||||||
|
|
||||||
|
Sets model info on the litellm_logging_obj if available.
|
||||||
|
Returns the agent name for logging.
|
||||||
|
"""
|
||||||
|
agent_name = "unknown"
|
||||||
|
|
||||||
|
# Try to get agent card from our stored attribute first, then fallback to SDK attribute
|
||||||
|
agent_card = getattr(a2a_client, "_litellm_agent_card", None)
|
||||||
|
if agent_card is None:
|
||||||
|
agent_card = getattr(a2a_client, "agent_card", None)
|
||||||
|
|
||||||
|
if agent_card is not None:
|
||||||
|
agent_name = getattr(agent_card, "name", "unknown") or "unknown"
|
||||||
|
|
||||||
|
# Build model string
|
||||||
|
model = f"a2a_agent/{agent_name}"
|
||||||
|
custom_llm_provider = "a2a_agent"
|
||||||
|
|
||||||
|
# Set on litellm_logging_obj if available (for standard logging payload)
|
||||||
|
litellm_logging_obj = kwargs.get("litellm_logging_obj")
|
||||||
|
if litellm_logging_obj is not None:
|
||||||
|
litellm_logging_obj.model = model
|
||||||
|
litellm_logging_obj.custom_llm_provider = custom_llm_provider
|
||||||
|
litellm_logging_obj.model_call_details["model"] = model
|
||||||
|
litellm_logging_obj.model_call_details[
|
||||||
|
"custom_llm_provider"
|
||||||
|
] = custom_llm_provider
|
||||||
|
|
||||||
|
return agent_name
|
||||||
|
|
||||||
|
|
||||||
|
async def _send_message_via_completion_bridge(
|
||||||
|
request: "SendMessageRequest",
|
||||||
|
custom_llm_provider: str,
|
||||||
|
api_base: Optional[str],
|
||||||
|
litellm_params: Dict[str, Any],
|
||||||
|
) -> LiteLLMSendMessageResponse:
|
||||||
|
"""
|
||||||
|
Route a send_message through the LiteLLM completion bridge (e.g. LangGraph, Bedrock AgentCore).
|
||||||
|
|
||||||
|
Requires request; api_base is optional for providers that derive endpoint from model.
|
||||||
|
"""
|
||||||
|
verbose_logger.info(
|
||||||
|
f"A2A using completion bridge: provider={custom_llm_provider}, api_base={api_base}"
|
||||||
|
)
|
||||||
|
|
||||||
|
from litellm.a2a_protocol.litellm_completion_bridge.handler import (
|
||||||
|
A2ACompletionBridgeHandler,
|
||||||
|
)
|
||||||
|
|
||||||
|
params = (
|
||||||
|
request.params.model_dump(mode="json")
|
||||||
|
if hasattr(request.params, "model_dump")
|
||||||
|
else dict(request.params)
|
||||||
|
)
|
||||||
|
|
||||||
|
response_dict = await A2ACompletionBridgeHandler.handle_non_streaming(
|
||||||
|
request_id=str(request.id),
|
||||||
|
params=params,
|
||||||
|
litellm_params=litellm_params,
|
||||||
|
api_base=api_base,
|
||||||
|
)
|
||||||
|
|
||||||
|
return LiteLLMSendMessageResponse.from_dict(response_dict)
|
||||||
|
|
||||||
|
|
||||||
|
async def _execute_a2a_send_with_retry(
|
||||||
|
a2a_client: Any,
|
||||||
|
request: Any,
|
||||||
|
agent_card: Any,
|
||||||
|
card_url: Optional[str],
|
||||||
|
api_base: Optional[str],
|
||||||
|
agent_name: Optional[str],
|
||||||
|
) -> Any:
|
||||||
|
"""Send an A2A message with retry logic for localhost URL errors."""
|
||||||
|
a2a_response = None
|
||||||
|
for _ in range(2): # max 2 attempts: original + 1 retry
|
||||||
|
try:
|
||||||
|
a2a_response = await a2a_client.send_message(request)
|
||||||
|
break # success, exit retry loop
|
||||||
|
except A2ALocalhostURLError as e:
|
||||||
|
a2a_client = handle_a2a_localhost_retry(
|
||||||
|
error=e,
|
||||||
|
agent_card=agent_card,
|
||||||
|
a2a_client=a2a_client,
|
||||||
|
is_streaming=False,
|
||||||
|
)
|
||||||
|
card_url = agent_card.url if agent_card else None
|
||||||
|
except Exception as e:
|
||||||
|
try:
|
||||||
|
map_a2a_exception(e, card_url, api_base, model=agent_name)
|
||||||
|
except A2ALocalhostURLError as localhost_err:
|
||||||
|
a2a_client = handle_a2a_localhost_retry(
|
||||||
|
error=localhost_err,
|
||||||
|
agent_card=agent_card,
|
||||||
|
a2a_client=a2a_client,
|
||||||
|
is_streaming=False,
|
||||||
|
)
|
||||||
|
card_url = agent_card.url if agent_card else None
|
||||||
|
continue
|
||||||
|
except Exception:
|
||||||
|
raise
|
||||||
|
if a2a_response is None:
|
||||||
|
raise RuntimeError(
|
||||||
|
"A2A send_message failed: no response received after retry attempts."
|
||||||
|
)
|
||||||
|
return a2a_response
|
||||||
|
|
||||||
|
|
||||||
|
@client
|
||||||
|
async def asend_message(
|
||||||
|
a2a_client: Optional["A2AClientType"] = None,
|
||||||
|
request: Optional["SendMessageRequest"] = None,
|
||||||
|
api_base: Optional[str] = None,
|
||||||
|
litellm_params: Optional[Dict[str, Any]] = None,
|
||||||
|
agent_id: Optional[str] = None,
|
||||||
|
agent_extra_headers: Optional[Dict[str, str]] = None,
|
||||||
|
**kwargs: Any,
|
||||||
|
) -> LiteLLMSendMessageResponse:
|
||||||
|
"""
|
||||||
|
Async: Send a message to an A2A agent.
|
||||||
|
|
||||||
|
Uses the @client decorator for LiteLLM logging and tracking.
|
||||||
|
If litellm_params contains custom_llm_provider, routes through the completion bridge.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
a2a_client: An initialized a2a.client.A2AClient instance (optional if using completion bridge)
|
||||||
|
request: SendMessageRequest from a2a.types (optional if using completion bridge with api_base)
|
||||||
|
api_base: API base URL (required for completion bridge, optional for standard A2A)
|
||||||
|
litellm_params: Optional dict with custom_llm_provider, model, etc. for completion bridge
|
||||||
|
agent_id: Optional agent ID for tracking in SpendLogs
|
||||||
|
**kwargs: Additional arguments passed to the client decorator
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
LiteLLMSendMessageResponse (wraps a2a SendMessageResponse with _hidden_params)
|
||||||
|
|
||||||
|
Example (standard A2A):
|
||||||
|
```python
|
||||||
|
from litellm.a2a_protocol import asend_message, create_a2a_client
|
||||||
|
from a2a.types import SendMessageRequest, MessageSendParams
|
||||||
|
from uuid import uuid4
|
||||||
|
|
||||||
|
a2a_client = await create_a2a_client(base_url="http://localhost:10001")
|
||||||
|
request = SendMessageRequest(
|
||||||
|
id=str(uuid4()),
|
||||||
|
params=MessageSendParams(
|
||||||
|
message={"role": "user", "parts": [{"kind": "text", "text": "Hello!"}], "messageId": uuid4().hex}
|
||||||
|
)
|
||||||
|
)
|
||||||
|
response = await asend_message(a2a_client=a2a_client, request=request)
|
||||||
|
```
|
||||||
|
|
||||||
|
Example (completion bridge with LangGraph):
|
||||||
|
```python
|
||||||
|
from litellm.a2a_protocol import asend_message
|
||||||
|
from a2a.types import SendMessageRequest, MessageSendParams
|
||||||
|
from uuid import uuid4
|
||||||
|
|
||||||
|
request = SendMessageRequest(
|
||||||
|
id=str(uuid4()),
|
||||||
|
params=MessageSendParams(
|
||||||
|
message={"role": "user", "parts": [{"kind": "text", "text": "Hello!"}], "messageId": uuid4().hex}
|
||||||
|
)
|
||||||
|
)
|
||||||
|
response = await asend_message(
|
||||||
|
request=request,
|
||||||
|
api_base="http://localhost:2024",
|
||||||
|
litellm_params={"custom_llm_provider": "langgraph", "model": "agent"},
|
||||||
|
)
|
||||||
|
```
|
||||||
|
"""
|
||||||
|
litellm_params = litellm_params or {}
|
||||||
|
logging_obj = kwargs.get("litellm_logging_obj")
|
||||||
|
trace_id = getattr(logging_obj, "litellm_trace_id", None) if logging_obj else None
|
||||||
|
custom_llm_provider = litellm_params.get("custom_llm_provider")
|
||||||
|
|
||||||
|
# Route through completion bridge if custom_llm_provider is set
|
||||||
|
if custom_llm_provider:
|
||||||
|
if request is None:
|
||||||
|
raise ValueError("request is required for completion bridge")
|
||||||
|
return await _send_message_via_completion_bridge(
|
||||||
|
request=request,
|
||||||
|
custom_llm_provider=custom_llm_provider,
|
||||||
|
api_base=api_base,
|
||||||
|
litellm_params=litellm_params,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Standard A2A client flow
|
||||||
|
if request is None:
|
||||||
|
raise ValueError("request is required")
|
||||||
|
|
||||||
|
# Create A2A client if not provided but api_base is available
|
||||||
|
if a2a_client is None:
|
||||||
|
if api_base is None:
|
||||||
|
raise ValueError(
|
||||||
|
"Either a2a_client or api_base is required for standard A2A flow"
|
||||||
|
)
|
||||||
|
trace_id = trace_id or str(uuid.uuid4())
|
||||||
|
extra_headers: Dict[str, str] = {"X-LiteLLM-Trace-Id": trace_id}
|
||||||
|
if agent_id:
|
||||||
|
extra_headers["X-LiteLLM-Agent-Id"] = agent_id
|
||||||
|
# Overlay agent-level headers (agent headers take precedence over LiteLLM internal ones)
|
||||||
|
if agent_extra_headers:
|
||||||
|
extra_headers.update(agent_extra_headers)
|
||||||
|
a2a_client = await create_a2a_client(
|
||||||
|
base_url=api_base, extra_headers=extra_headers
|
||||||
|
)
|
||||||
|
|
||||||
|
# Type assertion: a2a_client is guaranteed to be non-None here
|
||||||
|
assert a2a_client is not None
|
||||||
|
|
||||||
|
agent_name = _get_a2a_model_info(a2a_client, kwargs)
|
||||||
|
|
||||||
|
verbose_logger.info(f"A2A send_message request_id={request.id}, agent={agent_name}")
|
||||||
|
|
||||||
|
# Get agent card URL for localhost retry logic
|
||||||
|
agent_card = getattr(a2a_client, "_litellm_agent_card", None) or getattr(
|
||||||
|
a2a_client, "agent_card", None
|
||||||
|
)
|
||||||
|
card_url = getattr(agent_card, "url", None) if agent_card else None
|
||||||
|
|
||||||
|
context_id = trace_id or str(uuid.uuid4())
|
||||||
|
message = request.params.message
|
||||||
|
if isinstance(message, dict):
|
||||||
|
if message.get("context_id") is None:
|
||||||
|
message["context_id"] = context_id
|
||||||
|
else:
|
||||||
|
if getattr(message, "context_id", None) is None:
|
||||||
|
message.context_id = context_id
|
||||||
|
|
||||||
|
a2a_response = await _execute_a2a_send_with_retry(
|
||||||
|
a2a_client=a2a_client,
|
||||||
|
request=request,
|
||||||
|
agent_card=agent_card,
|
||||||
|
card_url=card_url,
|
||||||
|
api_base=api_base,
|
||||||
|
agent_name=agent_name,
|
||||||
|
)
|
||||||
|
|
||||||
|
verbose_logger.info(f"A2A send_message completed, request_id={request.id}")
|
||||||
|
|
||||||
|
# Wrap in LiteLLM response type for _hidden_params support
|
||||||
|
response = LiteLLMSendMessageResponse.from_a2a_response(a2a_response)
|
||||||
|
|
||||||
|
# Calculate token usage from request and response
|
||||||
|
response_dict = a2a_response.model_dump(mode="json", exclude_none=True)
|
||||||
|
(
|
||||||
|
prompt_tokens,
|
||||||
|
completion_tokens,
|
||||||
|
_,
|
||||||
|
) = A2ARequestUtils.calculate_usage_from_request_response(
|
||||||
|
request=request,
|
||||||
|
response_dict=response_dict,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Set usage on logging obj for standard logging payload
|
||||||
|
_set_usage_on_logging_obj(
|
||||||
|
kwargs=kwargs,
|
||||||
|
prompt_tokens=prompt_tokens,
|
||||||
|
completion_tokens=completion_tokens,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Set agent_id on logging obj for SpendLogs tracking
|
||||||
|
_set_agent_id_on_logging_obj(kwargs=kwargs, agent_id=agent_id)
|
||||||
|
|
||||||
|
return response
|
||||||
|
|
||||||
|
|
||||||
|
@client
|
||||||
|
def send_message(
|
||||||
|
a2a_client: "A2AClientType",
|
||||||
|
request: "SendMessageRequest",
|
||||||
|
**kwargs: Any,
|
||||||
|
) -> Union[LiteLLMSendMessageResponse, Coroutine[Any, Any, LiteLLMSendMessageResponse]]:
|
||||||
|
"""
|
||||||
|
Sync: Send a message to an A2A agent.
|
||||||
|
|
||||||
|
Uses the @client decorator for LiteLLM logging and tracking.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
a2a_client: An initialized a2a.client.A2AClient instance
|
||||||
|
request: SendMessageRequest from a2a.types
|
||||||
|
**kwargs: Additional arguments passed to the client decorator
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
LiteLLMSendMessageResponse (wraps a2a SendMessageResponse with _hidden_params)
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
loop = asyncio.get_running_loop()
|
||||||
|
except RuntimeError:
|
||||||
|
loop = None
|
||||||
|
|
||||||
|
if loop is not None:
|
||||||
|
return asend_message(a2a_client=a2a_client, request=request, **kwargs)
|
||||||
|
else:
|
||||||
|
return asyncio.run(
|
||||||
|
asend_message(a2a_client=a2a_client, request=request, **kwargs)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _build_streaming_logging_obj(
|
||||||
|
request: "SendStreamingMessageRequest",
|
||||||
|
agent_name: str,
|
||||||
|
agent_id: Optional[str],
|
||||||
|
litellm_params: Optional[Dict[str, Any]],
|
||||||
|
metadata: Optional[Dict[str, Any]],
|
||||||
|
proxy_server_request: Optional[Dict[str, Any]],
|
||||||
|
) -> Logging:
|
||||||
|
"""Build logging object for streaming A2A requests."""
|
||||||
|
start_time = datetime.datetime.now()
|
||||||
|
model = f"a2a_agent/{agent_name}"
|
||||||
|
|
||||||
|
logging_obj = Logging(
|
||||||
|
model=model,
|
||||||
|
messages=[{"role": "user", "content": "streaming-request"}],
|
||||||
|
stream=False,
|
||||||
|
call_type="asend_message_streaming",
|
||||||
|
start_time=start_time,
|
||||||
|
litellm_call_id=str(request.id),
|
||||||
|
function_id=str(request.id),
|
||||||
|
)
|
||||||
|
logging_obj.model = model
|
||||||
|
logging_obj.custom_llm_provider = "a2a_agent"
|
||||||
|
logging_obj.model_call_details["model"] = model
|
||||||
|
logging_obj.model_call_details["custom_llm_provider"] = "a2a_agent"
|
||||||
|
if agent_id:
|
||||||
|
logging_obj.model_call_details["agent_id"] = agent_id
|
||||||
|
|
||||||
|
_litellm_params = litellm_params.copy() if litellm_params else {}
|
||||||
|
if metadata:
|
||||||
|
_litellm_params["metadata"] = metadata
|
||||||
|
if proxy_server_request:
|
||||||
|
_litellm_params["proxy_server_request"] = proxy_server_request
|
||||||
|
|
||||||
|
logging_obj.litellm_params = _litellm_params
|
||||||
|
logging_obj.optional_params = _litellm_params
|
||||||
|
logging_obj.model_call_details["litellm_params"] = _litellm_params
|
||||||
|
logging_obj.model_call_details["metadata"] = metadata or {}
|
||||||
|
|
||||||
|
return logging_obj
|
||||||
|
|
||||||
|
|
||||||
|
async def asend_message_streaming( # noqa: PLR0915
|
||||||
|
a2a_client: Optional["A2AClientType"] = None,
|
||||||
|
request: Optional["SendStreamingMessageRequest"] = None,
|
||||||
|
api_base: Optional[str] = None,
|
||||||
|
litellm_params: Optional[Dict[str, Any]] = None,
|
||||||
|
agent_id: Optional[str] = None,
|
||||||
|
metadata: Optional[Dict[str, Any]] = None,
|
||||||
|
proxy_server_request: Optional[Dict[str, Any]] = None,
|
||||||
|
agent_extra_headers: Optional[Dict[str, str]] = None,
|
||||||
|
) -> AsyncIterator[Any]:
|
||||||
|
"""
|
||||||
|
Async: Send a streaming message to an A2A agent.
|
||||||
|
|
||||||
|
If litellm_params contains custom_llm_provider, routes through the completion bridge.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
a2a_client: An initialized a2a.client.A2AClient instance (optional if using completion bridge)
|
||||||
|
request: SendStreamingMessageRequest from a2a.types
|
||||||
|
api_base: API base URL (required for completion bridge)
|
||||||
|
litellm_params: Optional dict with custom_llm_provider, model, etc. for completion bridge
|
||||||
|
agent_id: Optional agent ID for tracking in SpendLogs
|
||||||
|
metadata: Optional metadata dict (contains user_api_key, user_id, team_id, etc.)
|
||||||
|
proxy_server_request: Optional proxy server request data
|
||||||
|
|
||||||
|
Yields:
|
||||||
|
SendStreamingMessageResponse chunks from the agent
|
||||||
|
|
||||||
|
Example (completion bridge with LangGraph):
|
||||||
|
```python
|
||||||
|
from litellm.a2a_protocol import asend_message_streaming
|
||||||
|
from a2a.types import SendStreamingMessageRequest, MessageSendParams
|
||||||
|
from uuid import uuid4
|
||||||
|
|
||||||
|
request = SendStreamingMessageRequest(
|
||||||
|
id=str(uuid4()),
|
||||||
|
params=MessageSendParams(
|
||||||
|
message={"role": "user", "parts": [{"kind": "text", "text": "Hello!"}], "messageId": uuid4().hex}
|
||||||
|
)
|
||||||
|
)
|
||||||
|
async for chunk in asend_message_streaming(
|
||||||
|
request=request,
|
||||||
|
api_base="http://localhost:2024",
|
||||||
|
litellm_params={"custom_llm_provider": "langgraph", "model": "agent"},
|
||||||
|
):
|
||||||
|
print(chunk)
|
||||||
|
```
|
||||||
|
"""
|
||||||
|
litellm_params = litellm_params or {}
|
||||||
|
custom_llm_provider = litellm_params.get("custom_llm_provider")
|
||||||
|
|
||||||
|
# Route through completion bridge if custom_llm_provider is set
|
||||||
|
if custom_llm_provider:
|
||||||
|
if request is None:
|
||||||
|
raise ValueError("request is required for completion bridge")
|
||||||
|
# api_base is optional for providers that derive endpoint from model (e.g., bedrock/agentcore)
|
||||||
|
|
||||||
|
verbose_logger.info(
|
||||||
|
f"A2A streaming using completion bridge: provider={custom_llm_provider}"
|
||||||
|
)
|
||||||
|
|
||||||
|
from litellm.a2a_protocol.litellm_completion_bridge.handler import (
|
||||||
|
A2ACompletionBridgeHandler,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Extract params from request
|
||||||
|
params = (
|
||||||
|
request.params.model_dump(mode="json")
|
||||||
|
if hasattr(request.params, "model_dump")
|
||||||
|
else dict(request.params)
|
||||||
|
)
|
||||||
|
|
||||||
|
async for chunk in A2ACompletionBridgeHandler.handle_streaming(
|
||||||
|
request_id=str(request.id),
|
||||||
|
params=params,
|
||||||
|
litellm_params=litellm_params,
|
||||||
|
api_base=api_base,
|
||||||
|
):
|
||||||
|
yield chunk
|
||||||
|
return
|
||||||
|
|
||||||
|
# Standard A2A client flow
|
||||||
|
if request is None:
|
||||||
|
raise ValueError("request is required")
|
||||||
|
|
||||||
|
# Create A2A client if not provided but api_base is available
|
||||||
|
if a2a_client is None:
|
||||||
|
if api_base is None:
|
||||||
|
raise ValueError(
|
||||||
|
"Either a2a_client or api_base is required for standard A2A flow"
|
||||||
|
)
|
||||||
|
# Mirror the non-streaming path: always include trace and agent-id headers
|
||||||
|
streaming_extra_headers: Dict[str, str] = {
|
||||||
|
"X-LiteLLM-Trace-Id": str(request.id),
|
||||||
|
}
|
||||||
|
if agent_id:
|
||||||
|
streaming_extra_headers["X-LiteLLM-Agent-Id"] = agent_id
|
||||||
|
if agent_extra_headers:
|
||||||
|
streaming_extra_headers.update(agent_extra_headers)
|
||||||
|
a2a_client = await create_a2a_client(
|
||||||
|
base_url=api_base, extra_headers=streaming_extra_headers
|
||||||
|
)
|
||||||
|
|
||||||
|
# Type assertion: a2a_client is guaranteed to be non-None here
|
||||||
|
assert a2a_client is not None
|
||||||
|
|
||||||
|
verbose_logger.info(f"A2A send_message_streaming request_id={request.id}")
|
||||||
|
|
||||||
|
# Build logging object for streaming completion callbacks
|
||||||
|
agent_card = getattr(a2a_client, "_litellm_agent_card", None) or getattr(
|
||||||
|
a2a_client, "agent_card", None
|
||||||
|
)
|
||||||
|
card_url = getattr(agent_card, "url", None) if agent_card else None
|
||||||
|
agent_name = getattr(agent_card, "name", "unknown") if agent_card else "unknown"
|
||||||
|
|
||||||
|
logging_obj = _build_streaming_logging_obj(
|
||||||
|
request=request,
|
||||||
|
agent_name=agent_name,
|
||||||
|
agent_id=agent_id,
|
||||||
|
litellm_params=litellm_params,
|
||||||
|
metadata=metadata,
|
||||||
|
proxy_server_request=proxy_server_request,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Retry loop: if connection fails due to localhost URL in agent card, retry with fixed URL
|
||||||
|
# Connection errors in streaming typically occur on first chunk iteration
|
||||||
|
first_chunk = True
|
||||||
|
for attempt in range(2): # max 2 attempts: original + 1 retry
|
||||||
|
stream = a2a_client.send_message_streaming(request)
|
||||||
|
iterator = A2AStreamingIterator(
|
||||||
|
stream=stream,
|
||||||
|
request=request,
|
||||||
|
logging_obj=logging_obj,
|
||||||
|
agent_name=agent_name,
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
first_chunk = True
|
||||||
|
async for chunk in iterator:
|
||||||
|
if first_chunk:
|
||||||
|
first_chunk = False # connection succeeded
|
||||||
|
yield chunk
|
||||||
|
return # stream completed successfully
|
||||||
|
except A2ALocalhostURLError as e:
|
||||||
|
# Only retry on first chunk, not mid-stream
|
||||||
|
if first_chunk and attempt == 0:
|
||||||
|
a2a_client = handle_a2a_localhost_retry(
|
||||||
|
error=e,
|
||||||
|
agent_card=agent_card,
|
||||||
|
a2a_client=a2a_client,
|
||||||
|
is_streaming=True,
|
||||||
|
)
|
||||||
|
card_url = agent_card.url if agent_card else None
|
||||||
|
else:
|
||||||
|
raise
|
||||||
|
except Exception as e:
|
||||||
|
# Only map exception on first chunk
|
||||||
|
if first_chunk and attempt == 0:
|
||||||
|
try:
|
||||||
|
map_a2a_exception(e, card_url, api_base, model=agent_name)
|
||||||
|
except A2ALocalhostURLError as localhost_err:
|
||||||
|
# Localhost URL error - fix and retry
|
||||||
|
a2a_client = handle_a2a_localhost_retry(
|
||||||
|
error=localhost_err,
|
||||||
|
agent_card=agent_card,
|
||||||
|
a2a_client=a2a_client,
|
||||||
|
is_streaming=True,
|
||||||
|
)
|
||||||
|
card_url = agent_card.url if agent_card else None
|
||||||
|
continue
|
||||||
|
except Exception:
|
||||||
|
# Re-raise the mapped exception
|
||||||
|
raise
|
||||||
|
raise
|
||||||
|
|
||||||
|
|
||||||
|
async def create_a2a_client(
|
||||||
|
base_url: str,
|
||||||
|
timeout: float = 60.0,
|
||||||
|
extra_headers: Optional[Dict[str, str]] = None,
|
||||||
|
) -> "A2AClientType":
|
||||||
|
"""
|
||||||
|
Create an A2A client for the given agent URL.
|
||||||
|
|
||||||
|
This resolves the agent card and returns a ready-to-use A2A client.
|
||||||
|
The client can be reused for multiple requests.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
base_url: The base URL of the A2A agent (e.g., "http://localhost:10001")
|
||||||
|
timeout: Request timeout in seconds (default: 60.0)
|
||||||
|
extra_headers: Optional additional headers to include in requests
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
An initialized a2a.client.A2AClient instance
|
||||||
|
|
||||||
|
Example:
|
||||||
|
```python
|
||||||
|
from litellm.a2a_protocol import create_a2a_client, asend_message
|
||||||
|
|
||||||
|
# Create client once
|
||||||
|
client = await create_a2a_client(base_url="http://localhost:10001")
|
||||||
|
|
||||||
|
# Reuse for multiple requests
|
||||||
|
response1 = await asend_message(a2a_client=client, request=request1)
|
||||||
|
response2 = await asend_message(a2a_client=client, request=request2)
|
||||||
|
```
|
||||||
|
"""
|
||||||
|
if not A2A_SDK_AVAILABLE:
|
||||||
|
raise ImportError(
|
||||||
|
"The 'a2a' package is required for A2A agent invocation. "
|
||||||
|
"Install it with: pip install a2a-sdk"
|
||||||
|
)
|
||||||
|
|
||||||
|
verbose_logger.info(f"Creating A2A client for {base_url}")
|
||||||
|
|
||||||
|
# Use get_async_httpx_client with per-agent params so that different agents
|
||||||
|
# (with different extra_headers) get separate cached clients. The params
|
||||||
|
# dict is hashed into the cache key, keeping agent auth isolated while
|
||||||
|
# still reusing connections within the same agent.
|
||||||
|
#
|
||||||
|
# Only pass params that AsyncHTTPHandler.__init__ accepts (e.g. timeout).
|
||||||
|
# Use "disable_aiohttp_transport" key for cache-key-only data (it's
|
||||||
|
# filtered out before reaching the constructor).
|
||||||
|
_client_params: dict = {"timeout": timeout}
|
||||||
|
if extra_headers:
|
||||||
|
# Encode headers into a cache-key-only param so each unique header
|
||||||
|
# set produces a distinct cache key.
|
||||||
|
_client_params["disable_aiohttp_transport"] = str(sorted(extra_headers.items()))
|
||||||
|
_async_handler = get_async_httpx_client(
|
||||||
|
llm_provider=httpxSpecialProvider.A2AProvider,
|
||||||
|
params=_client_params,
|
||||||
|
)
|
||||||
|
httpx_client = _async_handler.client
|
||||||
|
if extra_headers:
|
||||||
|
httpx_client.headers.update(extra_headers)
|
||||||
|
verbose_proxy_logger.debug(
|
||||||
|
f"A2A client created with extra_headers={list(extra_headers.keys())}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Resolve agent card
|
||||||
|
resolver = A2ACardResolver(
|
||||||
|
httpx_client=httpx_client,
|
||||||
|
base_url=base_url,
|
||||||
|
)
|
||||||
|
agent_card = await resolver.get_agent_card()
|
||||||
|
|
||||||
|
verbose_logger.debug(
|
||||||
|
f"Resolved agent card: {agent_card.name if hasattr(agent_card, 'name') else 'unknown'}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create A2A client
|
||||||
|
a2a_client = _A2AClient(
|
||||||
|
httpx_client=httpx_client,
|
||||||
|
agent_card=agent_card,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Store agent_card on client for later retrieval (SDK doesn't expose it)
|
||||||
|
a2a_client._litellm_agent_card = agent_card # type: ignore[attr-defined]
|
||||||
|
|
||||||
|
verbose_logger.info(f"A2A client created for {base_url}")
|
||||||
|
|
||||||
|
return a2a_client
|
||||||
|
|
||||||
|
|
||||||
|
async def aget_agent_card(
|
||||||
|
base_url: str,
|
||||||
|
timeout: float = DEFAULT_A2A_AGENT_TIMEOUT,
|
||||||
|
extra_headers: Optional[Dict[str, str]] = None,
|
||||||
|
) -> "AgentCard":
|
||||||
|
"""
|
||||||
|
Fetch the agent card from an A2A agent.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
base_url: The base URL of the A2A agent (e.g., "http://localhost:10001")
|
||||||
|
timeout: Request timeout in seconds (default: 60.0)
|
||||||
|
extra_headers: Optional additional headers to include in requests
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
AgentCard from the A2A agent
|
||||||
|
"""
|
||||||
|
if not A2A_SDK_AVAILABLE:
|
||||||
|
raise ImportError(
|
||||||
|
"The 'a2a' package is required for A2A agent invocation. "
|
||||||
|
"Install it with: pip install a2a-sdk"
|
||||||
|
)
|
||||||
|
|
||||||
|
verbose_logger.info(f"Fetching agent card from {base_url}")
|
||||||
|
|
||||||
|
# Use LiteLLM's cached httpx client
|
||||||
|
http_handler = get_async_httpx_client(
|
||||||
|
llm_provider=httpxSpecialProvider.A2A,
|
||||||
|
params={"timeout": timeout},
|
||||||
|
)
|
||||||
|
httpx_client = http_handler.client
|
||||||
|
|
||||||
|
resolver = A2ACardResolver(
|
||||||
|
httpx_client=httpx_client,
|
||||||
|
base_url=base_url,
|
||||||
|
)
|
||||||
|
agent_card = await resolver.get_agent_card()
|
||||||
|
|
||||||
|
verbose_logger.info(
|
||||||
|
f"Fetched agent card: {agent_card.name if hasattr(agent_card, 'name') else 'unknown'}"
|
||||||
|
)
|
||||||
|
return agent_card
|
||||||
@@ -0,0 +1,10 @@
|
|||||||
|
"""
|
||||||
|
A2A Protocol Providers.
|
||||||
|
|
||||||
|
This module contains provider-specific implementations for the A2A protocol.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from litellm.a2a_protocol.providers.base import BaseA2AProviderConfig
|
||||||
|
from litellm.a2a_protocol.providers.config_manager import A2AProviderConfigManager
|
||||||
|
|
||||||
|
__all__ = ["BaseA2AProviderConfig", "A2AProviderConfigManager"]
|
||||||
@@ -0,0 +1,62 @@
|
|||||||
|
"""
|
||||||
|
Base configuration for A2A protocol providers.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from abc import ABC, abstractmethod
|
||||||
|
from typing import Any, AsyncIterator, Dict
|
||||||
|
|
||||||
|
|
||||||
|
class BaseA2AProviderConfig(ABC):
|
||||||
|
"""
|
||||||
|
Base configuration class for A2A protocol providers.
|
||||||
|
|
||||||
|
Each provider should implement this interface to define how to handle
|
||||||
|
A2A requests for their specific agent type.
|
||||||
|
"""
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
async def handle_non_streaming(
|
||||||
|
self,
|
||||||
|
request_id: str,
|
||||||
|
params: Dict[str, Any],
|
||||||
|
api_base: str,
|
||||||
|
**kwargs,
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Handle non-streaming A2A request.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
request_id: A2A JSON-RPC request ID
|
||||||
|
params: A2A MessageSendParams containing the message
|
||||||
|
api_base: Base URL of the agent
|
||||||
|
**kwargs: Additional provider-specific parameters
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A2A SendMessageResponse dict
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
async def handle_streaming(
|
||||||
|
self,
|
||||||
|
request_id: str,
|
||||||
|
params: Dict[str, Any],
|
||||||
|
api_base: str,
|
||||||
|
**kwargs,
|
||||||
|
) -> AsyncIterator[Dict[str, Any]]:
|
||||||
|
"""
|
||||||
|
Handle streaming A2A request.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
request_id: A2A JSON-RPC request ID
|
||||||
|
params: A2A MessageSendParams containing the message
|
||||||
|
api_base: Base URL of the agent
|
||||||
|
**kwargs: Additional provider-specific parameters
|
||||||
|
|
||||||
|
Yields:
|
||||||
|
A2A streaming response events
|
||||||
|
"""
|
||||||
|
# This is an abstract method - subclasses must implement
|
||||||
|
# The yield is here to make this a generator function
|
||||||
|
if False: # pragma: no cover
|
||||||
|
yield {}
|
||||||
@@ -0,0 +1,47 @@
|
|||||||
|
"""
|
||||||
|
A2A Provider Config Manager.
|
||||||
|
|
||||||
|
Manages provider-specific configurations for A2A protocol.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from litellm.a2a_protocol.providers.base import BaseA2AProviderConfig
|
||||||
|
|
||||||
|
|
||||||
|
class A2AProviderConfigManager:
|
||||||
|
"""
|
||||||
|
Manager for A2A provider configurations.
|
||||||
|
|
||||||
|
Similar to ProviderConfigManager in litellm.utils but specifically for A2A providers.
|
||||||
|
"""
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_provider_config(
|
||||||
|
custom_llm_provider: Optional[str],
|
||||||
|
) -> Optional[BaseA2AProviderConfig]:
|
||||||
|
"""
|
||||||
|
Get the provider configuration for a given custom_llm_provider.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
custom_llm_provider: The provider identifier (e.g., "pydantic_ai_agents")
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Provider configuration instance or None if not found
|
||||||
|
"""
|
||||||
|
if custom_llm_provider is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
if custom_llm_provider == "pydantic_ai_agents":
|
||||||
|
from litellm.a2a_protocol.providers.pydantic_ai_agents.config import (
|
||||||
|
PydanticAIProviderConfig,
|
||||||
|
)
|
||||||
|
|
||||||
|
return PydanticAIProviderConfig()
|
||||||
|
|
||||||
|
# Add more providers here as needed
|
||||||
|
# elif custom_llm_provider == "another_provider":
|
||||||
|
# from litellm.a2a_protocol.providers.another_provider.config import AnotherProviderConfig
|
||||||
|
# return AnotherProviderConfig()
|
||||||
|
|
||||||
|
return None
|
||||||
@@ -0,0 +1,74 @@
|
|||||||
|
# A2A to LiteLLM Completion Bridge
|
||||||
|
|
||||||
|
Routes A2A protocol requests through `litellm.acompletion`, enabling any LiteLLM-supported provider to be invoked via A2A.
|
||||||
|
|
||||||
|
## Flow
|
||||||
|
|
||||||
|
```
|
||||||
|
A2A Request → Transform → litellm.acompletion → Transform → A2A Response
|
||||||
|
```
|
||||||
|
|
||||||
|
## SDK Usage
|
||||||
|
|
||||||
|
Use the existing `asend_message` and `asend_message_streaming` functions with `litellm_params`:
|
||||||
|
|
||||||
|
```python
|
||||||
|
from litellm.a2a_protocol import asend_message, asend_message_streaming
|
||||||
|
from a2a.types import SendMessageRequest, SendStreamingMessageRequest, MessageSendParams
|
||||||
|
from uuid import uuid4
|
||||||
|
|
||||||
|
# Non-streaming
|
||||||
|
request = SendMessageRequest(
|
||||||
|
id=str(uuid4()),
|
||||||
|
params=MessageSendParams(
|
||||||
|
message={"role": "user", "parts": [{"kind": "text", "text": "Hello!"}], "messageId": uuid4().hex}
|
||||||
|
)
|
||||||
|
)
|
||||||
|
response = await asend_message(
|
||||||
|
request=request,
|
||||||
|
api_base="http://localhost:2024",
|
||||||
|
litellm_params={"custom_llm_provider": "langgraph", "model": "agent"},
|
||||||
|
)
|
||||||
|
|
||||||
|
# Streaming
|
||||||
|
stream_request = SendStreamingMessageRequest(
|
||||||
|
id=str(uuid4()),
|
||||||
|
params=MessageSendParams(
|
||||||
|
message={"role": "user", "parts": [{"kind": "text", "text": "Hello!"}], "messageId": uuid4().hex}
|
||||||
|
)
|
||||||
|
)
|
||||||
|
async for chunk in asend_message_streaming(
|
||||||
|
request=stream_request,
|
||||||
|
api_base="http://localhost:2024",
|
||||||
|
litellm_params={"custom_llm_provider": "langgraph", "model": "agent"},
|
||||||
|
):
|
||||||
|
print(chunk)
|
||||||
|
```
|
||||||
|
|
||||||
|
## Proxy Usage
|
||||||
|
|
||||||
|
Configure an agent with `custom_llm_provider` in `litellm_params`:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
agents:
|
||||||
|
- agent_name: my-langgraph-agent
|
||||||
|
agent_card_params:
|
||||||
|
name: "LangGraph Agent"
|
||||||
|
url: "http://localhost:2024" # Used as api_base
|
||||||
|
litellm_params:
|
||||||
|
custom_llm_provider: langgraph
|
||||||
|
model: agent
|
||||||
|
```
|
||||||
|
|
||||||
|
When an A2A request hits `/a2a/{agent_id}/message/send`, the bridge:
|
||||||
|
|
||||||
|
1. Detects `custom_llm_provider` in agent's `litellm_params`
|
||||||
|
2. Transforms A2A message → OpenAI messages
|
||||||
|
3. Calls `litellm.acompletion(model="langgraph/agent", api_base="http://localhost:2024")`
|
||||||
|
4. Transforms response → A2A format
|
||||||
|
|
||||||
|
## Classes
|
||||||
|
|
||||||
|
- `A2ACompletionBridgeTransformation` - Static methods for message format conversion
|
||||||
|
- `A2ACompletionBridgeHandler` - Static methods for handling requests (streaming/non-streaming)
|
||||||
|
|
||||||
@@ -0,0 +1,5 @@
|
|||||||
|
"""
|
||||||
|
LiteLLM Completion bridge provider for A2A protocol.
|
||||||
|
|
||||||
|
Routes A2A requests through litellm.acompletion based on custom_llm_provider.
|
||||||
|
"""
|
||||||
@@ -0,0 +1,301 @@
|
|||||||
|
"""
|
||||||
|
Handler for A2A to LiteLLM completion bridge.
|
||||||
|
|
||||||
|
Routes A2A requests through litellm.acompletion based on custom_llm_provider.
|
||||||
|
|
||||||
|
A2A Streaming Events (in order):
|
||||||
|
1. Task event (kind: "task") - Initial task creation with status "submitted"
|
||||||
|
2. Status update (kind: "status-update") - Status change to "working"
|
||||||
|
3. Artifact update (kind: "artifact-update") - Content/artifact delivery
|
||||||
|
4. Status update (kind: "status-update") - Final status "completed" with final=true
|
||||||
|
"""
|
||||||
|
|
||||||
|
from typing import Any, AsyncIterator, Dict, Optional
|
||||||
|
|
||||||
|
import litellm
|
||||||
|
from litellm._logging import verbose_logger
|
||||||
|
from litellm.a2a_protocol.litellm_completion_bridge.pydantic_ai_transformation import (
|
||||||
|
PydanticAITransformation,
|
||||||
|
)
|
||||||
|
from litellm.a2a_protocol.litellm_completion_bridge.transformation import (
|
||||||
|
A2ACompletionBridgeTransformation,
|
||||||
|
A2AStreamingContext,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class A2ACompletionBridgeHandler:
|
||||||
|
"""
|
||||||
|
Static methods for handling A2A requests via LiteLLM completion.
|
||||||
|
"""
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
async def handle_non_streaming(
|
||||||
|
request_id: str,
|
||||||
|
params: Dict[str, Any],
|
||||||
|
litellm_params: Dict[str, Any],
|
||||||
|
api_base: Optional[str] = None,
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Handle non-streaming A2A request via litellm.acompletion.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
request_id: A2A JSON-RPC request ID
|
||||||
|
params: A2A MessageSendParams containing the message
|
||||||
|
litellm_params: Agent's litellm_params (custom_llm_provider, model, etc.)
|
||||||
|
api_base: API base URL from agent_card_params
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A2A SendMessageResponse dict
|
||||||
|
"""
|
||||||
|
# Check if this is a Pydantic AI agent request
|
||||||
|
custom_llm_provider = litellm_params.get("custom_llm_provider")
|
||||||
|
if custom_llm_provider == "pydantic_ai_agents":
|
||||||
|
if api_base is None:
|
||||||
|
raise ValueError("api_base is required for Pydantic AI agents")
|
||||||
|
|
||||||
|
verbose_logger.info(
|
||||||
|
f"Pydantic AI: Routing to Pydantic AI agent at {api_base}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Send request directly to Pydantic AI agent
|
||||||
|
response_data = await PydanticAITransformation.send_non_streaming_request(
|
||||||
|
api_base=api_base,
|
||||||
|
request_id=request_id,
|
||||||
|
params=params,
|
||||||
|
)
|
||||||
|
|
||||||
|
return response_data
|
||||||
|
|
||||||
|
# Extract message from params
|
||||||
|
message = params.get("message", {})
|
||||||
|
|
||||||
|
# Transform A2A message to OpenAI format
|
||||||
|
openai_messages = (
|
||||||
|
A2ACompletionBridgeTransformation.a2a_message_to_openai_messages(message)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Get completion params
|
||||||
|
custom_llm_provider = litellm_params.get("custom_llm_provider")
|
||||||
|
model = litellm_params.get("model", "agent")
|
||||||
|
|
||||||
|
# Build full model string if provider specified
|
||||||
|
# Skip prepending if model already starts with the provider prefix
|
||||||
|
if custom_llm_provider and not model.startswith(f"{custom_llm_provider}/"):
|
||||||
|
full_model = f"{custom_llm_provider}/{model}"
|
||||||
|
else:
|
||||||
|
full_model = model
|
||||||
|
|
||||||
|
verbose_logger.info(
|
||||||
|
f"A2A completion bridge: model={full_model}, api_base={api_base}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Build completion params dict
|
||||||
|
completion_params = {
|
||||||
|
"model": full_model,
|
||||||
|
"messages": openai_messages,
|
||||||
|
"api_base": api_base,
|
||||||
|
"stream": False,
|
||||||
|
}
|
||||||
|
# Add litellm_params (contains api_key, client_id, client_secret, tenant_id, etc.)
|
||||||
|
litellm_params_to_add = {
|
||||||
|
k: v
|
||||||
|
for k, v in litellm_params.items()
|
||||||
|
if k not in ("model", "custom_llm_provider")
|
||||||
|
}
|
||||||
|
completion_params.update(litellm_params_to_add)
|
||||||
|
|
||||||
|
# Call litellm.acompletion
|
||||||
|
response = await litellm.acompletion(**completion_params)
|
||||||
|
|
||||||
|
# Transform response to A2A format
|
||||||
|
a2a_response = (
|
||||||
|
A2ACompletionBridgeTransformation.openai_response_to_a2a_response(
|
||||||
|
response=response,
|
||||||
|
request_id=request_id,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
verbose_logger.info(f"A2A completion bridge completed: request_id={request_id}")
|
||||||
|
|
||||||
|
return a2a_response
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
async def handle_streaming(
|
||||||
|
request_id: str,
|
||||||
|
params: Dict[str, Any],
|
||||||
|
litellm_params: Dict[str, Any],
|
||||||
|
api_base: Optional[str] = None,
|
||||||
|
) -> AsyncIterator[Dict[str, Any]]:
|
||||||
|
"""
|
||||||
|
Handle streaming A2A request via litellm.acompletion with stream=True.
|
||||||
|
|
||||||
|
Emits proper A2A streaming events:
|
||||||
|
1. Task event (kind: "task") - Initial task with status "submitted"
|
||||||
|
2. Status update (kind: "status-update") - Status "working"
|
||||||
|
3. Artifact update (kind: "artifact-update") - Content delivery
|
||||||
|
4. Status update (kind: "status-update") - Final "completed" status
|
||||||
|
|
||||||
|
Args:
|
||||||
|
request_id: A2A JSON-RPC request ID
|
||||||
|
params: A2A MessageSendParams containing the message
|
||||||
|
litellm_params: Agent's litellm_params (custom_llm_provider, model, etc.)
|
||||||
|
api_base: API base URL from agent_card_params
|
||||||
|
|
||||||
|
Yields:
|
||||||
|
A2A streaming response events
|
||||||
|
"""
|
||||||
|
# Check if this is a Pydantic AI agent request
|
||||||
|
custom_llm_provider = litellm_params.get("custom_llm_provider")
|
||||||
|
if custom_llm_provider == "pydantic_ai_agents":
|
||||||
|
if api_base is None:
|
||||||
|
raise ValueError("api_base is required for Pydantic AI agents")
|
||||||
|
|
||||||
|
verbose_logger.info(
|
||||||
|
f"Pydantic AI: Faking streaming for Pydantic AI agent at {api_base}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Get non-streaming response first
|
||||||
|
response_data = await PydanticAITransformation.send_non_streaming_request(
|
||||||
|
api_base=api_base,
|
||||||
|
request_id=request_id,
|
||||||
|
params=params,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Convert to fake streaming
|
||||||
|
async for chunk in PydanticAITransformation.fake_streaming_from_response(
|
||||||
|
response_data=response_data,
|
||||||
|
request_id=request_id,
|
||||||
|
):
|
||||||
|
yield chunk
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
# Extract message from params
|
||||||
|
message = params.get("message", {})
|
||||||
|
|
||||||
|
# Create streaming context
|
||||||
|
ctx = A2AStreamingContext(
|
||||||
|
request_id=request_id,
|
||||||
|
input_message=message,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Transform A2A message to OpenAI format
|
||||||
|
openai_messages = (
|
||||||
|
A2ACompletionBridgeTransformation.a2a_message_to_openai_messages(message)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Get completion params
|
||||||
|
custom_llm_provider = litellm_params.get("custom_llm_provider")
|
||||||
|
model = litellm_params.get("model", "agent")
|
||||||
|
|
||||||
|
# Build full model string if provider specified
|
||||||
|
# Skip prepending if model already starts with the provider prefix
|
||||||
|
if custom_llm_provider and not model.startswith(f"{custom_llm_provider}/"):
|
||||||
|
full_model = f"{custom_llm_provider}/{model}"
|
||||||
|
else:
|
||||||
|
full_model = model
|
||||||
|
|
||||||
|
verbose_logger.info(
|
||||||
|
f"A2A completion bridge streaming: model={full_model}, api_base={api_base}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Build completion params dict
|
||||||
|
completion_params = {
|
||||||
|
"model": full_model,
|
||||||
|
"messages": openai_messages,
|
||||||
|
"api_base": api_base,
|
||||||
|
"stream": True,
|
||||||
|
}
|
||||||
|
# Add litellm_params (contains api_key, client_id, client_secret, tenant_id, etc.)
|
||||||
|
litellm_params_to_add = {
|
||||||
|
k: v
|
||||||
|
for k, v in litellm_params.items()
|
||||||
|
if k not in ("model", "custom_llm_provider")
|
||||||
|
}
|
||||||
|
completion_params.update(litellm_params_to_add)
|
||||||
|
|
||||||
|
# 1. Emit initial task event (kind: "task", status: "submitted")
|
||||||
|
task_event = A2ACompletionBridgeTransformation.create_task_event(ctx)
|
||||||
|
yield task_event
|
||||||
|
|
||||||
|
# 2. Emit status update (kind: "status-update", status: "working")
|
||||||
|
working_event = A2ACompletionBridgeTransformation.create_status_update_event(
|
||||||
|
ctx=ctx,
|
||||||
|
state="working",
|
||||||
|
final=False,
|
||||||
|
message_text="Processing request...",
|
||||||
|
)
|
||||||
|
yield working_event
|
||||||
|
|
||||||
|
# Call litellm.acompletion with streaming
|
||||||
|
response = await litellm.acompletion(**completion_params)
|
||||||
|
|
||||||
|
# 3. Accumulate content and emit artifact update
|
||||||
|
accumulated_text = ""
|
||||||
|
chunk_count = 0
|
||||||
|
async for chunk in response: # type: ignore[union-attr]
|
||||||
|
chunk_count += 1
|
||||||
|
|
||||||
|
# Extract delta content
|
||||||
|
content = ""
|
||||||
|
if chunk is not None and hasattr(chunk, "choices") and chunk.choices:
|
||||||
|
choice = chunk.choices[0]
|
||||||
|
if hasattr(choice, "delta") and choice.delta:
|
||||||
|
content = choice.delta.content or ""
|
||||||
|
|
||||||
|
if content:
|
||||||
|
accumulated_text += content
|
||||||
|
|
||||||
|
# Emit artifact update with accumulated content
|
||||||
|
if accumulated_text:
|
||||||
|
artifact_event = (
|
||||||
|
A2ACompletionBridgeTransformation.create_artifact_update_event(
|
||||||
|
ctx=ctx,
|
||||||
|
text=accumulated_text,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
yield artifact_event
|
||||||
|
|
||||||
|
# 4. Emit final status update (kind: "status-update", status: "completed", final: true)
|
||||||
|
completed_event = A2ACompletionBridgeTransformation.create_status_update_event(
|
||||||
|
ctx=ctx,
|
||||||
|
state="completed",
|
||||||
|
final=True,
|
||||||
|
)
|
||||||
|
yield completed_event
|
||||||
|
|
||||||
|
verbose_logger.info(
|
||||||
|
f"A2A completion bridge streaming completed: request_id={request_id}, chunks={chunk_count}"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# Convenience functions that delegate to the class methods
|
||||||
|
async def handle_a2a_completion(
|
||||||
|
request_id: str,
|
||||||
|
params: Dict[str, Any],
|
||||||
|
litellm_params: Dict[str, Any],
|
||||||
|
api_base: Optional[str] = None,
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""Convenience function for non-streaming A2A completion."""
|
||||||
|
return await A2ACompletionBridgeHandler.handle_non_streaming(
|
||||||
|
request_id=request_id,
|
||||||
|
params=params,
|
||||||
|
litellm_params=litellm_params,
|
||||||
|
api_base=api_base,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def handle_a2a_completion_streaming(
|
||||||
|
request_id: str,
|
||||||
|
params: Dict[str, Any],
|
||||||
|
litellm_params: Dict[str, Any],
|
||||||
|
api_base: Optional[str] = None,
|
||||||
|
) -> AsyncIterator[Dict[str, Any]]:
|
||||||
|
"""Convenience function for streaming A2A completion."""
|
||||||
|
async for chunk in A2ACompletionBridgeHandler.handle_streaming(
|
||||||
|
request_id=request_id,
|
||||||
|
params=params,
|
||||||
|
litellm_params=litellm_params,
|
||||||
|
api_base=api_base,
|
||||||
|
):
|
||||||
|
yield chunk
|
||||||
@@ -0,0 +1,284 @@
|
|||||||
|
"""
|
||||||
|
Transformation utilities for A2A <-> OpenAI message format conversion.
|
||||||
|
|
||||||
|
A2A Message Format:
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"parts": [{"kind": "text", "text": "Hello!"}],
|
||||||
|
"messageId": "abc123"
|
||||||
|
}
|
||||||
|
|
||||||
|
OpenAI Message Format:
|
||||||
|
{"role": "user", "content": "Hello!"}
|
||||||
|
|
||||||
|
A2A Streaming Events:
|
||||||
|
- Task event (kind: "task") - Initial task creation with status "submitted"
|
||||||
|
- Status update (kind: "status-update") - Status changes (working, completed)
|
||||||
|
- Artifact update (kind: "artifact-update") - Content/artifact delivery
|
||||||
|
"""
|
||||||
|
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from typing import Any, Dict, List, Optional
|
||||||
|
from uuid import uuid4
|
||||||
|
|
||||||
|
from litellm._logging import verbose_logger
|
||||||
|
|
||||||
|
|
||||||
|
class A2AStreamingContext:
|
||||||
|
"""
|
||||||
|
Context holder for A2A streaming state.
|
||||||
|
Tracks task_id, context_id, and message accumulation.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, request_id: str, input_message: Dict[str, Any]):
|
||||||
|
self.request_id = request_id
|
||||||
|
self.task_id = str(uuid4())
|
||||||
|
self.context_id = str(uuid4())
|
||||||
|
self.input_message = input_message
|
||||||
|
self.accumulated_text = ""
|
||||||
|
self.has_emitted_task = False
|
||||||
|
self.has_emitted_working = False
|
||||||
|
|
||||||
|
|
||||||
|
class A2ACompletionBridgeTransformation:
|
||||||
|
"""
|
||||||
|
Static methods for transforming between A2A and OpenAI message formats.
|
||||||
|
"""
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def a2a_message_to_openai_messages(
|
||||||
|
a2a_message: Dict[str, Any],
|
||||||
|
) -> List[Dict[str, str]]:
|
||||||
|
"""
|
||||||
|
Transform an A2A message to OpenAI message format.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
a2a_message: A2A message with role, parts, and messageId
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of OpenAI-format messages
|
||||||
|
"""
|
||||||
|
role = a2a_message.get("role", "user")
|
||||||
|
parts = a2a_message.get("parts", [])
|
||||||
|
|
||||||
|
# Map A2A roles to OpenAI roles
|
||||||
|
openai_role = role
|
||||||
|
if role == "user":
|
||||||
|
openai_role = "user"
|
||||||
|
elif role == "assistant":
|
||||||
|
openai_role = "assistant"
|
||||||
|
elif role == "system":
|
||||||
|
openai_role = "system"
|
||||||
|
|
||||||
|
# Extract text content from parts
|
||||||
|
content_parts = []
|
||||||
|
for part in parts:
|
||||||
|
kind = part.get("kind", "")
|
||||||
|
if kind == "text":
|
||||||
|
text = part.get("text", "")
|
||||||
|
content_parts.append(text)
|
||||||
|
|
||||||
|
content = "\n".join(content_parts) if content_parts else ""
|
||||||
|
|
||||||
|
verbose_logger.debug(
|
||||||
|
f"A2A -> OpenAI transform: role={role} -> {openai_role}, content_length={len(content)}"
|
||||||
|
)
|
||||||
|
|
||||||
|
return [{"role": openai_role, "content": content}]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def openai_response_to_a2a_response(
|
||||||
|
response: Any,
|
||||||
|
request_id: Optional[str] = None,
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Transform a LiteLLM ModelResponse to A2A SendMessageResponse format.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
response: LiteLLM ModelResponse object
|
||||||
|
request_id: Original A2A request ID
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A2A SendMessageResponse dict
|
||||||
|
"""
|
||||||
|
# Extract content from response
|
||||||
|
content = ""
|
||||||
|
if hasattr(response, "choices") and response.choices:
|
||||||
|
choice = response.choices[0]
|
||||||
|
if hasattr(choice, "message") and choice.message:
|
||||||
|
content = choice.message.content or ""
|
||||||
|
|
||||||
|
# Build A2A message
|
||||||
|
a2a_message = {
|
||||||
|
"role": "agent",
|
||||||
|
"parts": [{"kind": "text", "text": content}],
|
||||||
|
"messageId": uuid4().hex,
|
||||||
|
}
|
||||||
|
|
||||||
|
# Build A2A response
|
||||||
|
a2a_response = {
|
||||||
|
"jsonrpc": "2.0",
|
||||||
|
"id": request_id,
|
||||||
|
"result": {
|
||||||
|
"message": a2a_message,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
verbose_logger.debug(f"OpenAI -> A2A transform: content_length={len(content)}")
|
||||||
|
|
||||||
|
return a2a_response
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _get_timestamp() -> str:
|
||||||
|
"""Get current timestamp in ISO format with timezone."""
|
||||||
|
return datetime.now(timezone.utc).isoformat()
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def create_task_event(
|
||||||
|
ctx: A2AStreamingContext,
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Create the initial task event with status 'submitted'.
|
||||||
|
|
||||||
|
This is the first event emitted in an A2A streaming response.
|
||||||
|
"""
|
||||||
|
return {
|
||||||
|
"id": ctx.request_id,
|
||||||
|
"jsonrpc": "2.0",
|
||||||
|
"result": {
|
||||||
|
"contextId": ctx.context_id,
|
||||||
|
"history": [
|
||||||
|
{
|
||||||
|
"contextId": ctx.context_id,
|
||||||
|
"kind": "message",
|
||||||
|
"messageId": ctx.input_message.get("messageId", uuid4().hex),
|
||||||
|
"parts": ctx.input_message.get("parts", []),
|
||||||
|
"role": ctx.input_message.get("role", "user"),
|
||||||
|
"taskId": ctx.task_id,
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"id": ctx.task_id,
|
||||||
|
"kind": "task",
|
||||||
|
"status": {
|
||||||
|
"state": "submitted",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def create_status_update_event(
|
||||||
|
ctx: A2AStreamingContext,
|
||||||
|
state: str,
|
||||||
|
final: bool = False,
|
||||||
|
message_text: Optional[str] = None,
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Create a status update event.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
ctx: Streaming context
|
||||||
|
state: Status state ('working', 'completed')
|
||||||
|
final: Whether this is the final event
|
||||||
|
message_text: Optional message text for 'working' status
|
||||||
|
"""
|
||||||
|
status: Dict[str, Any] = {
|
||||||
|
"state": state,
|
||||||
|
"timestamp": A2ACompletionBridgeTransformation._get_timestamp(),
|
||||||
|
}
|
||||||
|
|
||||||
|
# Add message for 'working' status
|
||||||
|
if state == "working" and message_text:
|
||||||
|
status["message"] = {
|
||||||
|
"contextId": ctx.context_id,
|
||||||
|
"kind": "message",
|
||||||
|
"messageId": str(uuid4()),
|
||||||
|
"parts": [{"kind": "text", "text": message_text}],
|
||||||
|
"role": "agent",
|
||||||
|
"taskId": ctx.task_id,
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
"id": ctx.request_id,
|
||||||
|
"jsonrpc": "2.0",
|
||||||
|
"result": {
|
||||||
|
"contextId": ctx.context_id,
|
||||||
|
"final": final,
|
||||||
|
"kind": "status-update",
|
||||||
|
"status": status,
|
||||||
|
"taskId": ctx.task_id,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def create_artifact_update_event(
|
||||||
|
ctx: A2AStreamingContext,
|
||||||
|
text: str,
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Create an artifact update event with content.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
ctx: Streaming context
|
||||||
|
text: The text content for the artifact
|
||||||
|
"""
|
||||||
|
return {
|
||||||
|
"id": ctx.request_id,
|
||||||
|
"jsonrpc": "2.0",
|
||||||
|
"result": {
|
||||||
|
"artifact": {
|
||||||
|
"artifactId": str(uuid4()),
|
||||||
|
"name": "response",
|
||||||
|
"parts": [{"kind": "text", "text": text}],
|
||||||
|
},
|
||||||
|
"contextId": ctx.context_id,
|
||||||
|
"kind": "artifact-update",
|
||||||
|
"taskId": ctx.task_id,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def openai_chunk_to_a2a_chunk(
|
||||||
|
chunk: Any,
|
||||||
|
request_id: Optional[str] = None,
|
||||||
|
is_final: bool = False,
|
||||||
|
) -> Optional[Dict[str, Any]]:
|
||||||
|
"""
|
||||||
|
Transform a LiteLLM streaming chunk to A2A streaming format.
|
||||||
|
|
||||||
|
NOTE: This method is deprecated for streaming. Use the event-based
|
||||||
|
methods (create_task_event, create_status_update_event,
|
||||||
|
create_artifact_update_event) instead for proper A2A streaming.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
chunk: LiteLLM ModelResponse chunk
|
||||||
|
request_id: Original A2A request ID
|
||||||
|
is_final: Whether this is the final chunk
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A2A streaming chunk dict or None if no content
|
||||||
|
"""
|
||||||
|
# Extract delta content
|
||||||
|
content = ""
|
||||||
|
if chunk is not None and hasattr(chunk, "choices") and chunk.choices:
|
||||||
|
choice = chunk.choices[0]
|
||||||
|
if hasattr(choice, "delta") and choice.delta:
|
||||||
|
content = choice.delta.content or ""
|
||||||
|
|
||||||
|
if not content and not is_final:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Build A2A streaming chunk (legacy format)
|
||||||
|
a2a_chunk = {
|
||||||
|
"jsonrpc": "2.0",
|
||||||
|
"id": request_id,
|
||||||
|
"result": {
|
||||||
|
"message": {
|
||||||
|
"role": "agent",
|
||||||
|
"parts": [{"kind": "text", "text": content}],
|
||||||
|
"messageId": uuid4().hex,
|
||||||
|
},
|
||||||
|
"final": is_final,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
return a2a_chunk
|
||||||
@@ -0,0 +1,16 @@
|
|||||||
|
"""
|
||||||
|
Pydantic AI agent provider for A2A protocol.
|
||||||
|
|
||||||
|
Pydantic AI agents follow A2A protocol but don't support streaming natively.
|
||||||
|
This provider handles fake streaming by converting non-streaming responses into streaming chunks.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from litellm.a2a_protocol.providers.pydantic_ai_agents.config import (
|
||||||
|
PydanticAIProviderConfig,
|
||||||
|
)
|
||||||
|
from litellm.a2a_protocol.providers.pydantic_ai_agents.handler import PydanticAIHandler
|
||||||
|
from litellm.a2a_protocol.providers.pydantic_ai_agents.transformation import (
|
||||||
|
PydanticAITransformation,
|
||||||
|
)
|
||||||
|
|
||||||
|
__all__ = ["PydanticAIHandler", "PydanticAITransformation", "PydanticAIProviderConfig"]
|
||||||
@@ -0,0 +1,50 @@
|
|||||||
|
"""
|
||||||
|
Pydantic AI provider configuration.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from typing import Any, AsyncIterator, Dict
|
||||||
|
|
||||||
|
from litellm.a2a_protocol.providers.base import BaseA2AProviderConfig
|
||||||
|
from litellm.a2a_protocol.providers.pydantic_ai_agents.handler import PydanticAIHandler
|
||||||
|
|
||||||
|
|
||||||
|
class PydanticAIProviderConfig(BaseA2AProviderConfig):
|
||||||
|
"""
|
||||||
|
Provider configuration for Pydantic AI agents.
|
||||||
|
|
||||||
|
Pydantic AI agents follow A2A protocol but don't support streaming natively.
|
||||||
|
This config provides fake streaming by converting non-streaming responses into streaming chunks.
|
||||||
|
"""
|
||||||
|
|
||||||
|
async def handle_non_streaming(
|
||||||
|
self,
|
||||||
|
request_id: str,
|
||||||
|
params: Dict[str, Any],
|
||||||
|
api_base: str,
|
||||||
|
**kwargs,
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""Handle non-streaming request to Pydantic AI agent."""
|
||||||
|
return await PydanticAIHandler.handle_non_streaming(
|
||||||
|
request_id=request_id,
|
||||||
|
params=params,
|
||||||
|
api_base=api_base,
|
||||||
|
timeout=kwargs.get("timeout", 60.0),
|
||||||
|
)
|
||||||
|
|
||||||
|
async def handle_streaming(
|
||||||
|
self,
|
||||||
|
request_id: str,
|
||||||
|
params: Dict[str, Any],
|
||||||
|
api_base: str,
|
||||||
|
**kwargs,
|
||||||
|
) -> AsyncIterator[Dict[str, Any]]:
|
||||||
|
"""Handle streaming request with fake streaming."""
|
||||||
|
async for chunk in PydanticAIHandler.handle_streaming(
|
||||||
|
request_id=request_id,
|
||||||
|
params=params,
|
||||||
|
api_base=api_base,
|
||||||
|
timeout=kwargs.get("timeout", 60.0),
|
||||||
|
chunk_size=kwargs.get("chunk_size", 50),
|
||||||
|
delay_ms=kwargs.get("delay_ms", 10),
|
||||||
|
):
|
||||||
|
yield chunk
|
||||||
@@ -0,0 +1,102 @@
|
|||||||
|
"""
|
||||||
|
Handler for Pydantic AI agents.
|
||||||
|
|
||||||
|
Pydantic AI agents follow A2A protocol but don't support streaming natively.
|
||||||
|
This handler provides fake streaming by converting non-streaming responses into streaming chunks.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from typing import Any, AsyncIterator, Dict
|
||||||
|
|
||||||
|
from litellm._logging import verbose_logger
|
||||||
|
from litellm.a2a_protocol.providers.pydantic_ai_agents.transformation import (
|
||||||
|
PydanticAITransformation,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class PydanticAIHandler:
|
||||||
|
"""
|
||||||
|
Handler for Pydantic AI agent requests.
|
||||||
|
|
||||||
|
Provides:
|
||||||
|
- Direct non-streaming requests to Pydantic AI agents
|
||||||
|
- Fake streaming by converting non-streaming responses into streaming chunks
|
||||||
|
"""
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
async def handle_non_streaming(
|
||||||
|
request_id: str,
|
||||||
|
params: Dict[str, Any],
|
||||||
|
api_base: str,
|
||||||
|
timeout: float = 60.0,
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Handle non-streaming request to Pydantic AI agent.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
request_id: A2A JSON-RPC request ID
|
||||||
|
params: A2A MessageSendParams containing the message
|
||||||
|
api_base: Base URL of the Pydantic AI agent
|
||||||
|
timeout: Request timeout in seconds
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A2A SendMessageResponse dict
|
||||||
|
"""
|
||||||
|
verbose_logger.info(f"Pydantic AI: Routing to Pydantic AI agent at {api_base}")
|
||||||
|
|
||||||
|
# Send request directly to Pydantic AI agent
|
||||||
|
response_data = await PydanticAITransformation.send_non_streaming_request(
|
||||||
|
api_base=api_base,
|
||||||
|
request_id=request_id,
|
||||||
|
params=params,
|
||||||
|
timeout=timeout,
|
||||||
|
)
|
||||||
|
|
||||||
|
return response_data
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
async def handle_streaming(
|
||||||
|
request_id: str,
|
||||||
|
params: Dict[str, Any],
|
||||||
|
api_base: str,
|
||||||
|
timeout: float = 60.0,
|
||||||
|
chunk_size: int = 50,
|
||||||
|
delay_ms: int = 10,
|
||||||
|
) -> AsyncIterator[Dict[str, Any]]:
|
||||||
|
"""
|
||||||
|
Handle streaming request to Pydantic AI agent with fake streaming.
|
||||||
|
|
||||||
|
Since Pydantic AI agents don't support streaming natively, this method:
|
||||||
|
1. Makes a non-streaming request
|
||||||
|
2. Converts the response into streaming chunks
|
||||||
|
|
||||||
|
Args:
|
||||||
|
request_id: A2A JSON-RPC request ID
|
||||||
|
params: A2A MessageSendParams containing the message
|
||||||
|
api_base: Base URL of the Pydantic AI agent
|
||||||
|
timeout: Request timeout in seconds
|
||||||
|
chunk_size: Number of characters per chunk
|
||||||
|
delay_ms: Delay between chunks in milliseconds
|
||||||
|
|
||||||
|
Yields:
|
||||||
|
A2A streaming response events
|
||||||
|
"""
|
||||||
|
verbose_logger.info(
|
||||||
|
f"Pydantic AI: Faking streaming for Pydantic AI agent at {api_base}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Get raw task response first (not the transformed A2A format)
|
||||||
|
raw_response = await PydanticAITransformation.send_and_get_raw_response(
|
||||||
|
api_base=api_base,
|
||||||
|
request_id=request_id,
|
||||||
|
params=params,
|
||||||
|
timeout=timeout,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Convert raw task response to fake streaming chunks
|
||||||
|
async for chunk in PydanticAITransformation.fake_streaming_from_response(
|
||||||
|
response_data=raw_response,
|
||||||
|
request_id=request_id,
|
||||||
|
chunk_size=chunk_size,
|
||||||
|
delay_ms=delay_ms,
|
||||||
|
):
|
||||||
|
yield chunk
|
||||||
@@ -0,0 +1,530 @@
|
|||||||
|
"""
|
||||||
|
Transformation layer for Pydantic AI agents.
|
||||||
|
|
||||||
|
Pydantic AI agents follow A2A protocol but don't support streaming.
|
||||||
|
This module provides fake streaming by converting non-streaming responses into streaming chunks.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
from typing import Any, AsyncIterator, Dict, cast
|
||||||
|
from uuid import uuid4
|
||||||
|
|
||||||
|
from litellm._logging import verbose_logger
|
||||||
|
from litellm.llms.custom_httpx.http_handler import (
|
||||||
|
AsyncHTTPHandler,
|
||||||
|
get_async_httpx_client,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class PydanticAITransformation:
|
||||||
|
"""
|
||||||
|
Transformation layer for Pydantic AI agents.
|
||||||
|
|
||||||
|
Handles:
|
||||||
|
- Direct A2A requests to Pydantic AI endpoints
|
||||||
|
- Polling for task completion (since Pydantic AI doesn't support streaming)
|
||||||
|
- Fake streaming by chunking non-streaming responses
|
||||||
|
"""
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _remove_none_values(obj: Any) -> Any:
|
||||||
|
"""
|
||||||
|
Recursively remove None values from a dict/list structure.
|
||||||
|
|
||||||
|
FastA2A/Pydantic AI servers don't accept None values for optional fields -
|
||||||
|
they expect those fields to be omitted entirely.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
obj: Dict, list, or other value to clean
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Cleaned object with None values removed
|
||||||
|
"""
|
||||||
|
if isinstance(obj, dict):
|
||||||
|
return {
|
||||||
|
k: PydanticAITransformation._remove_none_values(v)
|
||||||
|
for k, v in obj.items()
|
||||||
|
if v is not None
|
||||||
|
}
|
||||||
|
elif isinstance(obj, list):
|
||||||
|
return [
|
||||||
|
PydanticAITransformation._remove_none_values(item)
|
||||||
|
for item in obj
|
||||||
|
if item is not None
|
||||||
|
]
|
||||||
|
else:
|
||||||
|
return obj
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _params_to_dict(params: Any) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Convert params to a dict, handling Pydantic models.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
params: Dict or Pydantic model
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict representation of params
|
||||||
|
"""
|
||||||
|
if hasattr(params, "model_dump"):
|
||||||
|
# Pydantic v2 model
|
||||||
|
return params.model_dump(mode="python", exclude_none=True)
|
||||||
|
elif hasattr(params, "dict"):
|
||||||
|
# Pydantic v1 model
|
||||||
|
return params.dict(exclude_none=True)
|
||||||
|
elif isinstance(params, dict):
|
||||||
|
return params
|
||||||
|
else:
|
||||||
|
# Try to convert to dict
|
||||||
|
return dict(params)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
async def _poll_for_completion(
|
||||||
|
client: AsyncHTTPHandler,
|
||||||
|
endpoint: str,
|
||||||
|
task_id: str,
|
||||||
|
request_id: str,
|
||||||
|
max_attempts: int = 30,
|
||||||
|
poll_interval: float = 0.5,
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Poll for task completion using tasks/get method.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
client: HTTPX async client
|
||||||
|
endpoint: API endpoint URL
|
||||||
|
task_id: Task ID to poll for
|
||||||
|
request_id: JSON-RPC request ID
|
||||||
|
max_attempts: Maximum polling attempts
|
||||||
|
poll_interval: Seconds between poll attempts
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Completed task response
|
||||||
|
"""
|
||||||
|
for attempt in range(max_attempts):
|
||||||
|
poll_request = {
|
||||||
|
"jsonrpc": "2.0",
|
||||||
|
"id": f"{request_id}-poll-{attempt}",
|
||||||
|
"method": "tasks/get",
|
||||||
|
"params": {"id": task_id},
|
||||||
|
}
|
||||||
|
|
||||||
|
response = await client.post(
|
||||||
|
endpoint,
|
||||||
|
json=poll_request,
|
||||||
|
headers={"Content-Type": "application/json"},
|
||||||
|
)
|
||||||
|
response.raise_for_status()
|
||||||
|
poll_data = response.json()
|
||||||
|
|
||||||
|
result = poll_data.get("result", {})
|
||||||
|
status = result.get("status", {})
|
||||||
|
state = status.get("state", "")
|
||||||
|
|
||||||
|
verbose_logger.debug(
|
||||||
|
f"Pydantic AI: Poll attempt {attempt + 1}/{max_attempts}, state={state}"
|
||||||
|
)
|
||||||
|
|
||||||
|
if state == "completed":
|
||||||
|
return poll_data
|
||||||
|
elif state in ("failed", "canceled"):
|
||||||
|
raise Exception(f"Task {task_id} ended with state: {state}")
|
||||||
|
|
||||||
|
await asyncio.sleep(poll_interval)
|
||||||
|
|
||||||
|
raise TimeoutError(
|
||||||
|
f"Task {task_id} did not complete within {max_attempts * poll_interval} seconds"
|
||||||
|
)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
async def _send_and_poll_raw(
|
||||||
|
api_base: str,
|
||||||
|
request_id: str,
|
||||||
|
params: Any,
|
||||||
|
timeout: float = 60.0,
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Send a request to Pydantic AI agent and return the raw task response.
|
||||||
|
|
||||||
|
This is an internal method used by both non-streaming and streaming handlers.
|
||||||
|
Returns the raw Pydantic AI task format with history/artifacts.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
api_base: Base URL of the Pydantic AI agent
|
||||||
|
request_id: A2A JSON-RPC request ID
|
||||||
|
params: A2A MessageSendParams containing the message
|
||||||
|
timeout: Request timeout in seconds
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Raw Pydantic AI task response (with history/artifacts)
|
||||||
|
"""
|
||||||
|
# Convert params to dict if it's a Pydantic model
|
||||||
|
params_dict = PydanticAITransformation._params_to_dict(params)
|
||||||
|
|
||||||
|
# Remove None values - FastA2A doesn't accept null for optional fields
|
||||||
|
params_dict = PydanticAITransformation._remove_none_values(params_dict)
|
||||||
|
|
||||||
|
# Ensure the message has 'kind': 'message' as required by FastA2A/Pydantic AI
|
||||||
|
if "message" in params_dict:
|
||||||
|
params_dict["message"]["kind"] = "message"
|
||||||
|
|
||||||
|
# Build A2A JSON-RPC request using message/send method for FastA2A compatibility
|
||||||
|
a2a_request = {
|
||||||
|
"jsonrpc": "2.0",
|
||||||
|
"id": request_id,
|
||||||
|
"method": "message/send",
|
||||||
|
"params": params_dict,
|
||||||
|
}
|
||||||
|
|
||||||
|
# FastA2A uses root endpoint (/) not /messages
|
||||||
|
endpoint = api_base.rstrip("/")
|
||||||
|
|
||||||
|
verbose_logger.info(f"Pydantic AI: Sending non-streaming request to {endpoint}")
|
||||||
|
|
||||||
|
# Send request to Pydantic AI agent using shared async HTTP client
|
||||||
|
client = get_async_httpx_client(
|
||||||
|
llm_provider=cast(Any, "pydantic_ai_agent"),
|
||||||
|
params={"timeout": timeout},
|
||||||
|
)
|
||||||
|
response = await client.post(
|
||||||
|
endpoint,
|
||||||
|
json=a2a_request,
|
||||||
|
headers={"Content-Type": "application/json"},
|
||||||
|
)
|
||||||
|
response.raise_for_status()
|
||||||
|
response_data = response.json()
|
||||||
|
|
||||||
|
# Check if task is already completed
|
||||||
|
result = response_data.get("result", {})
|
||||||
|
status = result.get("status", {})
|
||||||
|
state = status.get("state", "")
|
||||||
|
|
||||||
|
if state != "completed":
|
||||||
|
# Need to poll for completion
|
||||||
|
task_id = result.get("id")
|
||||||
|
if task_id:
|
||||||
|
verbose_logger.info(
|
||||||
|
f"Pydantic AI: Task {task_id} submitted, polling for completion..."
|
||||||
|
)
|
||||||
|
response_data = await PydanticAITransformation._poll_for_completion(
|
||||||
|
client=client,
|
||||||
|
endpoint=endpoint,
|
||||||
|
task_id=task_id,
|
||||||
|
request_id=request_id,
|
||||||
|
)
|
||||||
|
|
||||||
|
verbose_logger.info(
|
||||||
|
f"Pydantic AI: Received completed response for request_id={request_id}"
|
||||||
|
)
|
||||||
|
|
||||||
|
return response_data
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
async def send_non_streaming_request(
|
||||||
|
api_base: str,
|
||||||
|
request_id: str,
|
||||||
|
params: Any,
|
||||||
|
timeout: float = 60.0,
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Send a non-streaming A2A request to Pydantic AI agent and wait for completion.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
api_base: Base URL of the Pydantic AI agent (e.g., "http://localhost:9999")
|
||||||
|
request_id: A2A JSON-RPC request ID
|
||||||
|
params: A2A MessageSendParams containing the message (dict or Pydantic model)
|
||||||
|
timeout: Request timeout in seconds
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Standard A2A non-streaming response format with message
|
||||||
|
"""
|
||||||
|
# Get raw task response
|
||||||
|
raw_response = await PydanticAITransformation._send_and_poll_raw(
|
||||||
|
api_base=api_base,
|
||||||
|
request_id=request_id,
|
||||||
|
params=params,
|
||||||
|
timeout=timeout,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Transform to standard A2A non-streaming format
|
||||||
|
return PydanticAITransformation._transform_to_a2a_response(
|
||||||
|
response_data=raw_response,
|
||||||
|
request_id=request_id,
|
||||||
|
)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
async def send_and_get_raw_response(
|
||||||
|
api_base: str,
|
||||||
|
request_id: str,
|
||||||
|
params: Any,
|
||||||
|
timeout: float = 60.0,
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Send a request to Pydantic AI agent and return the raw task response.
|
||||||
|
|
||||||
|
Used by streaming handler to get raw response for fake streaming.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
api_base: Base URL of the Pydantic AI agent
|
||||||
|
request_id: A2A JSON-RPC request ID
|
||||||
|
params: A2A MessageSendParams containing the message
|
||||||
|
timeout: Request timeout in seconds
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Raw Pydantic AI task response (with history/artifacts)
|
||||||
|
"""
|
||||||
|
return await PydanticAITransformation._send_and_poll_raw(
|
||||||
|
api_base=api_base,
|
||||||
|
request_id=request_id,
|
||||||
|
params=params,
|
||||||
|
timeout=timeout,
|
||||||
|
)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _transform_to_a2a_response(
|
||||||
|
response_data: Dict[str, Any],
|
||||||
|
request_id: str,
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Transform Pydantic AI task response to standard A2A non-streaming format.
|
||||||
|
|
||||||
|
Pydantic AI returns a task with history/artifacts, but the standard A2A
|
||||||
|
non-streaming format expects:
|
||||||
|
{
|
||||||
|
"jsonrpc": "2.0",
|
||||||
|
"id": "...",
|
||||||
|
"result": {
|
||||||
|
"message": {
|
||||||
|
"role": "agent",
|
||||||
|
"parts": [{"kind": "text", "text": "..."}],
|
||||||
|
"messageId": "..."
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Args:
|
||||||
|
response_data: Pydantic AI task response
|
||||||
|
request_id: Original request ID
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Standard A2A non-streaming response format
|
||||||
|
"""
|
||||||
|
# Extract the agent response text
|
||||||
|
full_text, message_id, parts = PydanticAITransformation._extract_response_text(
|
||||||
|
response_data
|
||||||
|
)
|
||||||
|
|
||||||
|
# Build standard A2A message
|
||||||
|
a2a_message = {
|
||||||
|
"role": "agent",
|
||||||
|
"parts": parts if parts else [{"kind": "text", "text": full_text}],
|
||||||
|
"messageId": message_id,
|
||||||
|
}
|
||||||
|
|
||||||
|
# Return standard A2A non-streaming format
|
||||||
|
return {
|
||||||
|
"jsonrpc": "2.0",
|
||||||
|
"id": request_id,
|
||||||
|
"result": {
|
||||||
|
"message": a2a_message,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _extract_response_text(response_data: Dict[str, Any]) -> tuple[str, str, list]:
|
||||||
|
"""
|
||||||
|
Extract response text from completed task response.
|
||||||
|
|
||||||
|
Pydantic AI returns completed tasks with:
|
||||||
|
- history: list of messages (user and agent)
|
||||||
|
- artifacts: list of result artifacts
|
||||||
|
|
||||||
|
Args:
|
||||||
|
response_data: Completed task response
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tuple of (full_text, message_id, parts)
|
||||||
|
"""
|
||||||
|
result = response_data.get("result", {})
|
||||||
|
|
||||||
|
# Try to extract from artifacts first (preferred for results)
|
||||||
|
artifacts = result.get("artifacts", [])
|
||||||
|
if artifacts:
|
||||||
|
for artifact in artifacts:
|
||||||
|
parts = artifact.get("parts", [])
|
||||||
|
for part in parts:
|
||||||
|
if part.get("kind") == "text":
|
||||||
|
text = part.get("text", "")
|
||||||
|
if text:
|
||||||
|
return text, str(uuid4()), parts
|
||||||
|
|
||||||
|
# Fall back to history - get the last agent message
|
||||||
|
history = result.get("history", [])
|
||||||
|
for msg in reversed(history):
|
||||||
|
if msg.get("role") == "agent":
|
||||||
|
parts = msg.get("parts", [])
|
||||||
|
message_id = msg.get("messageId", str(uuid4()))
|
||||||
|
full_text = ""
|
||||||
|
for part in parts:
|
||||||
|
if part.get("kind") == "text":
|
||||||
|
full_text += part.get("text", "")
|
||||||
|
if full_text:
|
||||||
|
return full_text, message_id, parts
|
||||||
|
|
||||||
|
# Fall back to message field (original format)
|
||||||
|
message = result.get("message", {})
|
||||||
|
if message:
|
||||||
|
parts = message.get("parts", [])
|
||||||
|
message_id = message.get("messageId", str(uuid4()))
|
||||||
|
full_text = ""
|
||||||
|
for part in parts:
|
||||||
|
if part.get("kind") == "text":
|
||||||
|
full_text += part.get("text", "")
|
||||||
|
return full_text, message_id, parts
|
||||||
|
|
||||||
|
return "", str(uuid4()), []
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
async def fake_streaming_from_response(
|
||||||
|
response_data: Dict[str, Any],
|
||||||
|
request_id: str,
|
||||||
|
chunk_size: int = 50,
|
||||||
|
delay_ms: int = 10,
|
||||||
|
) -> AsyncIterator[Dict[str, Any]]:
|
||||||
|
"""
|
||||||
|
Convert a non-streaming A2A response into fake streaming chunks.
|
||||||
|
|
||||||
|
Emits proper A2A streaming events:
|
||||||
|
1. Task event (kind: "task") - Initial task with status "submitted"
|
||||||
|
2. Status update (kind: "status-update") - Status "working"
|
||||||
|
3. Artifact update chunks (kind: "artifact-update") - Content delivery in chunks
|
||||||
|
4. Status update (kind: "status-update") - Final "completed" status
|
||||||
|
|
||||||
|
Args:
|
||||||
|
response_data: Non-streaming A2A response dict (completed task)
|
||||||
|
request_id: A2A JSON-RPC request ID
|
||||||
|
chunk_size: Number of characters per chunk (default: 50)
|
||||||
|
delay_ms: Delay between chunks in milliseconds (default: 10)
|
||||||
|
|
||||||
|
Yields:
|
||||||
|
A2A streaming response events
|
||||||
|
"""
|
||||||
|
# Extract the response text from completed task
|
||||||
|
full_text, message_id, parts = PydanticAITransformation._extract_response_text(
|
||||||
|
response_data
|
||||||
|
)
|
||||||
|
|
||||||
|
# Extract input message from raw response for history
|
||||||
|
result = response_data.get("result", {})
|
||||||
|
history = result.get("history", [])
|
||||||
|
input_message = {}
|
||||||
|
for msg in history:
|
||||||
|
if msg.get("role") == "user":
|
||||||
|
input_message = msg
|
||||||
|
break
|
||||||
|
|
||||||
|
# Generate IDs for streaming events
|
||||||
|
task_id = str(uuid4())
|
||||||
|
context_id = str(uuid4())
|
||||||
|
artifact_id = str(uuid4())
|
||||||
|
input_message_id = input_message.get("messageId", str(uuid4()))
|
||||||
|
|
||||||
|
# 1. Emit initial task event (kind: "task", status: "submitted")
|
||||||
|
# Format matches A2ACompletionBridgeTransformation.create_task_event
|
||||||
|
task_event = {
|
||||||
|
"jsonrpc": "2.0",
|
||||||
|
"id": request_id,
|
||||||
|
"result": {
|
||||||
|
"contextId": context_id,
|
||||||
|
"history": [
|
||||||
|
{
|
||||||
|
"contextId": context_id,
|
||||||
|
"kind": "message",
|
||||||
|
"messageId": input_message_id,
|
||||||
|
"parts": input_message.get(
|
||||||
|
"parts", [{"kind": "text", "text": ""}]
|
||||||
|
),
|
||||||
|
"role": "user",
|
||||||
|
"taskId": task_id,
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"id": task_id,
|
||||||
|
"kind": "task",
|
||||||
|
"status": {
|
||||||
|
"state": "submitted",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
yield task_event
|
||||||
|
|
||||||
|
# 2. Emit status update (kind: "status-update", status: "working")
|
||||||
|
# Format matches A2ACompletionBridgeTransformation.create_status_update_event
|
||||||
|
working_event = {
|
||||||
|
"jsonrpc": "2.0",
|
||||||
|
"id": request_id,
|
||||||
|
"result": {
|
||||||
|
"contextId": context_id,
|
||||||
|
"final": False,
|
||||||
|
"kind": "status-update",
|
||||||
|
"status": {
|
||||||
|
"state": "working",
|
||||||
|
},
|
||||||
|
"taskId": task_id,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
yield working_event
|
||||||
|
|
||||||
|
# Small delay to simulate processing
|
||||||
|
await asyncio.sleep(delay_ms / 1000.0)
|
||||||
|
|
||||||
|
# 3. Emit artifact update chunks (kind: "artifact-update")
|
||||||
|
# Format matches A2ACompletionBridgeTransformation.create_artifact_update_event
|
||||||
|
if full_text:
|
||||||
|
# Split text into chunks
|
||||||
|
for i in range(0, len(full_text), chunk_size):
|
||||||
|
chunk_text = full_text[i : i + chunk_size]
|
||||||
|
is_last_chunk = (i + chunk_size) >= len(full_text)
|
||||||
|
|
||||||
|
artifact_event = {
|
||||||
|
"jsonrpc": "2.0",
|
||||||
|
"id": request_id,
|
||||||
|
"result": {
|
||||||
|
"contextId": context_id,
|
||||||
|
"kind": "artifact-update",
|
||||||
|
"taskId": task_id,
|
||||||
|
"artifact": {
|
||||||
|
"artifactId": artifact_id,
|
||||||
|
"parts": [
|
||||||
|
{
|
||||||
|
"kind": "text",
|
||||||
|
"text": chunk_text,
|
||||||
|
}
|
||||||
|
],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
yield artifact_event
|
||||||
|
|
||||||
|
# Add delay between chunks (except for last chunk)
|
||||||
|
if not is_last_chunk:
|
||||||
|
await asyncio.sleep(delay_ms / 1000.0)
|
||||||
|
|
||||||
|
# 4. Emit final status update (kind: "status-update", status: "completed", final: true)
|
||||||
|
completed_event = {
|
||||||
|
"jsonrpc": "2.0",
|
||||||
|
"id": request_id,
|
||||||
|
"result": {
|
||||||
|
"contextId": context_id,
|
||||||
|
"final": True,
|
||||||
|
"kind": "status-update",
|
||||||
|
"status": {
|
||||||
|
"state": "completed",
|
||||||
|
},
|
||||||
|
"taskId": task_id,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
yield completed_event
|
||||||
|
|
||||||
|
verbose_logger.info(
|
||||||
|
f"Pydantic AI: Fake streaming completed for request_id={request_id}"
|
||||||
|
)
|
||||||
@@ -0,0 +1,184 @@
|
|||||||
|
"""
|
||||||
|
A2A Streaming Iterator with token tracking and logging support.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
from datetime import datetime
|
||||||
|
from typing import TYPE_CHECKING, Any, AsyncIterator, Dict, List, Optional
|
||||||
|
|
||||||
|
import litellm
|
||||||
|
from litellm._logging import verbose_logger
|
||||||
|
from litellm.a2a_protocol.cost_calculator import A2ACostCalculator
|
||||||
|
from litellm.a2a_protocol.utils import A2ARequestUtils
|
||||||
|
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
|
||||||
|
from litellm.litellm_core_utils.thread_pool_executor import executor
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from a2a.types import SendStreamingMessageRequest, SendStreamingMessageResponse
|
||||||
|
|
||||||
|
|
||||||
|
class A2AStreamingIterator:
|
||||||
|
"""
|
||||||
|
Async iterator for A2A streaming responses with token tracking.
|
||||||
|
|
||||||
|
Collects chunks, extracts text, and logs usage on completion.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
stream: AsyncIterator["SendStreamingMessageResponse"],
|
||||||
|
request: "SendStreamingMessageRequest",
|
||||||
|
logging_obj: LiteLLMLoggingObj,
|
||||||
|
agent_name: str = "unknown",
|
||||||
|
):
|
||||||
|
self.stream = stream
|
||||||
|
self.request = request
|
||||||
|
self.logging_obj = logging_obj
|
||||||
|
self.agent_name = agent_name
|
||||||
|
self.start_time = datetime.now()
|
||||||
|
|
||||||
|
# Collect chunks for token counting
|
||||||
|
self.chunks: List[Any] = []
|
||||||
|
self.collected_text_parts: List[str] = []
|
||||||
|
self.final_chunk: Optional[Any] = None
|
||||||
|
|
||||||
|
def __aiter__(self):
|
||||||
|
return self
|
||||||
|
|
||||||
|
async def __anext__(self) -> "SendStreamingMessageResponse":
|
||||||
|
try:
|
||||||
|
chunk = await self.stream.__anext__()
|
||||||
|
|
||||||
|
# Store chunk
|
||||||
|
self.chunks.append(chunk)
|
||||||
|
|
||||||
|
# Extract text from chunk for token counting
|
||||||
|
self._collect_text_from_chunk(chunk)
|
||||||
|
|
||||||
|
# Check if this is the final chunk (completed status)
|
||||||
|
if self._is_completed_chunk(chunk):
|
||||||
|
self.final_chunk = chunk
|
||||||
|
|
||||||
|
return chunk
|
||||||
|
|
||||||
|
except StopAsyncIteration:
|
||||||
|
# Stream ended - handle logging
|
||||||
|
if self.final_chunk is None and self.chunks:
|
||||||
|
self.final_chunk = self.chunks[-1]
|
||||||
|
await self._handle_stream_complete()
|
||||||
|
raise
|
||||||
|
|
||||||
|
def _collect_text_from_chunk(self, chunk: Any) -> None:
|
||||||
|
"""Extract text from a streaming chunk and add to collected parts."""
|
||||||
|
try:
|
||||||
|
chunk_dict = (
|
||||||
|
chunk.model_dump(mode="json", exclude_none=True)
|
||||||
|
if hasattr(chunk, "model_dump")
|
||||||
|
else {}
|
||||||
|
)
|
||||||
|
text = A2ARequestUtils.extract_text_from_response(chunk_dict)
|
||||||
|
if text:
|
||||||
|
self.collected_text_parts.append(text)
|
||||||
|
except Exception:
|
||||||
|
verbose_logger.debug("Failed to extract text from A2A streaming chunk")
|
||||||
|
|
||||||
|
def _is_completed_chunk(self, chunk: Any) -> bool:
|
||||||
|
"""Check if chunk indicates stream completion."""
|
||||||
|
try:
|
||||||
|
chunk_dict = (
|
||||||
|
chunk.model_dump(mode="json", exclude_none=True)
|
||||||
|
if hasattr(chunk, "model_dump")
|
||||||
|
else {}
|
||||||
|
)
|
||||||
|
result = chunk_dict.get("result", {})
|
||||||
|
if isinstance(result, dict):
|
||||||
|
status = result.get("status", {})
|
||||||
|
if isinstance(status, dict):
|
||||||
|
return status.get("state") == "completed"
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return False
|
||||||
|
|
||||||
|
async def _handle_stream_complete(self) -> None:
|
||||||
|
"""Handle logging and token counting when stream completes."""
|
||||||
|
try:
|
||||||
|
end_time = datetime.now()
|
||||||
|
|
||||||
|
# Calculate tokens from collected text
|
||||||
|
input_message = A2ARequestUtils.get_input_message_from_request(self.request)
|
||||||
|
input_text = A2ARequestUtils.extract_text_from_message(input_message)
|
||||||
|
prompt_tokens = A2ARequestUtils.count_tokens(input_text)
|
||||||
|
|
||||||
|
# Use the last (most complete) text from chunks
|
||||||
|
output_text = (
|
||||||
|
self.collected_text_parts[-1] if self.collected_text_parts else ""
|
||||||
|
)
|
||||||
|
completion_tokens = A2ARequestUtils.count_tokens(output_text)
|
||||||
|
|
||||||
|
total_tokens = prompt_tokens + completion_tokens
|
||||||
|
|
||||||
|
# Create usage object
|
||||||
|
usage = litellm.Usage(
|
||||||
|
prompt_tokens=prompt_tokens,
|
||||||
|
completion_tokens=completion_tokens,
|
||||||
|
total_tokens=total_tokens,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Set usage on logging obj
|
||||||
|
self.logging_obj.model_call_details["usage"] = usage
|
||||||
|
# Mark stream flag for downstream callbacks
|
||||||
|
self.logging_obj.model_call_details["stream"] = False
|
||||||
|
|
||||||
|
# Calculate cost using A2ACostCalculator
|
||||||
|
response_cost = A2ACostCalculator.calculate_a2a_cost(self.logging_obj)
|
||||||
|
self.logging_obj.model_call_details["response_cost"] = response_cost
|
||||||
|
|
||||||
|
# Build result for logging
|
||||||
|
result = self._build_logging_result(usage)
|
||||||
|
|
||||||
|
# Call success handlers - they will build standard_logging_object
|
||||||
|
asyncio.create_task(
|
||||||
|
self.logging_obj.async_success_handler(
|
||||||
|
result=result,
|
||||||
|
start_time=self.start_time,
|
||||||
|
end_time=end_time,
|
||||||
|
cache_hit=None,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
executor.submit(
|
||||||
|
self.logging_obj.success_handler,
|
||||||
|
result=result,
|
||||||
|
cache_hit=None,
|
||||||
|
start_time=self.start_time,
|
||||||
|
end_time=end_time,
|
||||||
|
)
|
||||||
|
|
||||||
|
verbose_logger.info(
|
||||||
|
f"A2A streaming completed: prompt_tokens={prompt_tokens}, "
|
||||||
|
f"completion_tokens={completion_tokens}, total_tokens={total_tokens}, "
|
||||||
|
f"response_cost={response_cost}"
|
||||||
|
)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
verbose_logger.debug(f"Error in A2A streaming completion handler: {e}")
|
||||||
|
|
||||||
|
def _build_logging_result(self, usage: litellm.Usage) -> Dict[str, Any]:
|
||||||
|
"""Build a result dict for logging."""
|
||||||
|
result: Dict[str, Any] = {
|
||||||
|
"id": getattr(self.request, "id", "unknown"),
|
||||||
|
"jsonrpc": "2.0",
|
||||||
|
"usage": usage.model_dump()
|
||||||
|
if hasattr(usage, "model_dump")
|
||||||
|
else dict(usage),
|
||||||
|
}
|
||||||
|
|
||||||
|
# Add final chunk result if available
|
||||||
|
if self.final_chunk:
|
||||||
|
try:
|
||||||
|
chunk_dict = self.final_chunk.model_dump(mode="json", exclude_none=True)
|
||||||
|
result["result"] = chunk_dict.get("result", {})
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return result
|
||||||
@@ -0,0 +1,138 @@
|
|||||||
|
"""
|
||||||
|
Utility functions for A2A protocol.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from typing import TYPE_CHECKING, Any, Dict, List, Tuple, Union
|
||||||
|
|
||||||
|
import litellm
|
||||||
|
from litellm._logging import verbose_logger
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from a2a.types import SendMessageRequest, SendStreamingMessageRequest
|
||||||
|
|
||||||
|
|
||||||
|
class A2ARequestUtils:
|
||||||
|
"""Utility class for A2A request/response processing."""
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def extract_text_from_message(message: Any) -> str:
|
||||||
|
"""
|
||||||
|
Extract text content from A2A message parts.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
message: A2A message dict or object with 'parts' containing text parts
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Concatenated text from all text parts
|
||||||
|
"""
|
||||||
|
if message is None:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
# Handle both dict and object access
|
||||||
|
if isinstance(message, dict):
|
||||||
|
parts = message.get("parts", [])
|
||||||
|
else:
|
||||||
|
parts = getattr(message, "parts", []) or []
|
||||||
|
|
||||||
|
text_parts: List[str] = []
|
||||||
|
for part in parts:
|
||||||
|
if isinstance(part, dict):
|
||||||
|
if part.get("kind") == "text":
|
||||||
|
text_parts.append(part.get("text", ""))
|
||||||
|
else:
|
||||||
|
if getattr(part, "kind", None) == "text":
|
||||||
|
text_parts.append(getattr(part, "text", ""))
|
||||||
|
|
||||||
|
return " ".join(text_parts)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def extract_text_from_response(response_dict: Dict[str, Any]) -> str:
|
||||||
|
"""
|
||||||
|
Extract text content from A2A response result.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
response_dict: A2A response dict with 'result' containing message
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Text from response message parts
|
||||||
|
"""
|
||||||
|
result = response_dict.get("result", {})
|
||||||
|
if not isinstance(result, dict):
|
||||||
|
return ""
|
||||||
|
|
||||||
|
message = result.get("message", {})
|
||||||
|
return A2ARequestUtils.extract_text_from_message(message)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_input_message_from_request(
|
||||||
|
request: "Union[SendMessageRequest, SendStreamingMessageRequest]",
|
||||||
|
) -> Any:
|
||||||
|
"""
|
||||||
|
Extract the input message from an A2A request.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
request: The A2A SendMessageRequest or SendStreamingMessageRequest
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The message object/dict or None
|
||||||
|
"""
|
||||||
|
params = getattr(request, "params", None)
|
||||||
|
if params is None:
|
||||||
|
return None
|
||||||
|
return getattr(params, "message", None)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def count_tokens(text: str) -> int:
|
||||||
|
"""
|
||||||
|
Count tokens in text using litellm.token_counter.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
text: Text to count tokens for
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Token count, or 0 if counting fails
|
||||||
|
"""
|
||||||
|
if not text:
|
||||||
|
return 0
|
||||||
|
try:
|
||||||
|
return litellm.token_counter(text=text)
|
||||||
|
except Exception:
|
||||||
|
verbose_logger.debug("Failed to count tokens")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def calculate_usage_from_request_response(
|
||||||
|
request: "Union[SendMessageRequest, SendStreamingMessageRequest]",
|
||||||
|
response_dict: Dict[str, Any],
|
||||||
|
) -> Tuple[int, int, int]:
|
||||||
|
"""
|
||||||
|
Calculate token usage from A2A request and response.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
request: The A2A SendMessageRequest or SendStreamingMessageRequest
|
||||||
|
response_dict: The A2A response as a dict
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tuple of (prompt_tokens, completion_tokens, total_tokens)
|
||||||
|
"""
|
||||||
|
# Count input tokens
|
||||||
|
input_message = A2ARequestUtils.get_input_message_from_request(request)
|
||||||
|
input_text = A2ARequestUtils.extract_text_from_message(input_message)
|
||||||
|
prompt_tokens = A2ARequestUtils.count_tokens(input_text)
|
||||||
|
|
||||||
|
# Count output tokens
|
||||||
|
output_text = A2ARequestUtils.extract_text_from_response(response_dict)
|
||||||
|
completion_tokens = A2ARequestUtils.count_tokens(output_text)
|
||||||
|
|
||||||
|
total_tokens = prompt_tokens + completion_tokens
|
||||||
|
|
||||||
|
return prompt_tokens, completion_tokens, total_tokens
|
||||||
|
|
||||||
|
|
||||||
|
# Backwards compatibility aliases
|
||||||
|
def extract_text_from_a2a_message(message: Any) -> str:
|
||||||
|
return A2ARequestUtils.extract_text_from_message(message)
|
||||||
|
|
||||||
|
|
||||||
|
def extract_text_from_a2a_response(response_dict: Dict[str, Any]) -> str:
|
||||||
|
return A2ARequestUtils.extract_text_from_response(response_dict)
|
||||||
@@ -0,0 +1,182 @@
|
|||||||
|
{
|
||||||
|
"description": "Mapping of Anthropic beta headers for each provider. Keys are input header names, values are provider-specific header names (or null if unsupported). Only headers present in mapping keys with non-null values can be forwarded.",
|
||||||
|
"anthropic": {
|
||||||
|
"advanced-tool-use-2025-11-20": "advanced-tool-use-2025-11-20",
|
||||||
|
"bash_20241022": null,
|
||||||
|
"bash_20250124": null,
|
||||||
|
"code-execution-2025-08-25": "code-execution-2025-08-25",
|
||||||
|
"compact-2026-01-12": "compact-2026-01-12",
|
||||||
|
"computer-use-2025-01-24": "computer-use-2025-01-24",
|
||||||
|
"computer-use-2025-11-24": "computer-use-2025-11-24",
|
||||||
|
"context-1m-2025-08-07": "context-1m-2025-08-07",
|
||||||
|
"context-management-2025-06-27": "context-management-2025-06-27",
|
||||||
|
"effort-2025-11-24": "effort-2025-11-24",
|
||||||
|
"fast-mode-2026-02-01": "fast-mode-2026-02-01",
|
||||||
|
"files-api-2025-04-14": "files-api-2025-04-14",
|
||||||
|
"structured-output-2024-03-01": null,
|
||||||
|
"fine-grained-tool-streaming-2025-05-14": "fine-grained-tool-streaming-2025-05-14",
|
||||||
|
"interleaved-thinking-2025-05-14": "interleaved-thinking-2025-05-14",
|
||||||
|
"mcp-client-2025-11-20": "mcp-client-2025-11-20",
|
||||||
|
"mcp-client-2025-04-04": "mcp-client-2025-04-04",
|
||||||
|
"mcp-servers-2025-12-04": null,
|
||||||
|
"oauth-2025-04-20": "oauth-2025-04-20",
|
||||||
|
"output-128k-2025-02-19": "output-128k-2025-02-19",
|
||||||
|
"prompt-caching-scope-2026-01-05": "prompt-caching-scope-2026-01-05",
|
||||||
|
"skills-2025-10-02": "skills-2025-10-02",
|
||||||
|
"structured-outputs-2025-11-13": "structured-outputs-2025-11-13",
|
||||||
|
"text_editor_20241022": null,
|
||||||
|
"text_editor_20250124": null,
|
||||||
|
"token-efficient-tools-2025-02-19": "token-efficient-tools-2025-02-19",
|
||||||
|
"web-fetch-2025-09-10": "web-fetch-2025-09-10",
|
||||||
|
"web-search-2025-03-05": "web-search-2025-03-05"
|
||||||
|
},
|
||||||
|
"azure_ai": {
|
||||||
|
"advanced-tool-use-2025-11-20": "advanced-tool-use-2025-11-20",
|
||||||
|
"bash_20241022": null,
|
||||||
|
"bash_20250124": null,
|
||||||
|
"code-execution-2025-08-25": "code-execution-2025-08-25",
|
||||||
|
"compact-2026-01-12": null,
|
||||||
|
"computer-use-2025-01-24": "computer-use-2025-01-24",
|
||||||
|
"computer-use-2025-11-24": "computer-use-2025-11-24",
|
||||||
|
"context-1m-2025-08-07": "context-1m-2025-08-07",
|
||||||
|
"context-management-2025-06-27": "context-management-2025-06-27",
|
||||||
|
"effort-2025-11-24": "effort-2025-11-24",
|
||||||
|
"fast-mode-2026-02-01": null,
|
||||||
|
"files-api-2025-04-14": "files-api-2025-04-14",
|
||||||
|
"fine-grained-tool-streaming-2025-05-14": null,
|
||||||
|
"interleaved-thinking-2025-05-14": "interleaved-thinking-2025-05-14",
|
||||||
|
"mcp-client-2025-11-20": "mcp-client-2025-11-20",
|
||||||
|
"mcp-client-2025-04-04": "mcp-client-2025-04-04",
|
||||||
|
"mcp-servers-2025-12-04": null,
|
||||||
|
"output-128k-2025-02-19": null,
|
||||||
|
"structured-output-2024-03-01": null,
|
||||||
|
"prompt-caching-scope-2026-01-05": "prompt-caching-scope-2026-01-05",
|
||||||
|
"skills-2025-10-02": "skills-2025-10-02",
|
||||||
|
"structured-outputs-2025-11-13": "structured-outputs-2025-11-13",
|
||||||
|
"text_editor_20241022": null,
|
||||||
|
"text_editor_20250124": null,
|
||||||
|
"token-efficient-tools-2025-02-19": null,
|
||||||
|
"web-fetch-2025-09-10": "web-fetch-2025-09-10",
|
||||||
|
"web-search-2025-03-05": "web-search-2025-03-05"
|
||||||
|
},
|
||||||
|
"bedrock_converse": {
|
||||||
|
"advanced-tool-use-2025-11-20": null,
|
||||||
|
"bash_20241022": null,
|
||||||
|
"bash_20250124": null,
|
||||||
|
"code-execution-2025-08-25": null,
|
||||||
|
"compact-2026-01-12": null,
|
||||||
|
"computer-use-2025-01-24": "computer-use-2025-01-24",
|
||||||
|
"computer-use-2025-11-24": "computer-use-2025-11-24",
|
||||||
|
"context-1m-2025-08-07": "context-1m-2025-08-07",
|
||||||
|
"context-management-2025-06-27": "context-management-2025-06-27",
|
||||||
|
"effort-2025-11-24": null,
|
||||||
|
"fast-mode-2026-02-01": null,
|
||||||
|
"files-api-2025-04-14": null,
|
||||||
|
"fine-grained-tool-streaming-2025-05-14": null,
|
||||||
|
"interleaved-thinking-2025-05-14": "interleaved-thinking-2025-05-14",
|
||||||
|
"mcp-client-2025-11-20": null,
|
||||||
|
"mcp-client-2025-04-04": null,
|
||||||
|
"mcp-servers-2025-12-04": null,
|
||||||
|
"output-128k-2025-02-19": null,
|
||||||
|
"structured-output-2024-03-01": null,
|
||||||
|
"prompt-caching-scope-2026-01-05": null,
|
||||||
|
"skills-2025-10-02": null,
|
||||||
|
"structured-outputs-2025-11-13": "structured-outputs-2025-11-13",
|
||||||
|
"text_editor_20241022": null,
|
||||||
|
"text_editor_20250124": null,
|
||||||
|
"token-efficient-tools-2025-02-19": null,
|
||||||
|
"tool-search-tool-2025-10-19": null,
|
||||||
|
"web-fetch-2025-09-10": null,
|
||||||
|
"web-search-2025-03-05": null
|
||||||
|
},
|
||||||
|
"bedrock": {
|
||||||
|
"advanced-tool-use-2025-11-20": "tool-search-tool-2025-10-19",
|
||||||
|
"bash_20241022": null,
|
||||||
|
"bash_20250124": null,
|
||||||
|
"code-execution-2025-08-25": null,
|
||||||
|
"compact-2026-01-12": "compact-2026-01-12",
|
||||||
|
"computer-use-2025-01-24": "computer-use-2025-01-24",
|
||||||
|
"computer-use-2025-11-24": "computer-use-2025-11-24",
|
||||||
|
"context-1m-2025-08-07": "context-1m-2025-08-07",
|
||||||
|
"context-management-2025-06-27": "context-management-2025-06-27",
|
||||||
|
"effort-2025-11-24": null,
|
||||||
|
"fast-mode-2026-02-01": null,
|
||||||
|
"files-api-2025-04-14": null,
|
||||||
|
"fine-grained-tool-streaming-2025-05-14": null,
|
||||||
|
"interleaved-thinking-2025-05-14": "interleaved-thinking-2025-05-14",
|
||||||
|
"mcp-client-2025-11-20": null,
|
||||||
|
"mcp-client-2025-04-04": null,
|
||||||
|
"mcp-servers-2025-12-04": null,
|
||||||
|
"output-128k-2025-02-19": null,
|
||||||
|
"structured-output-2024-03-01": null,
|
||||||
|
"prompt-caching-scope-2026-01-05": null,
|
||||||
|
"skills-2025-10-02": null,
|
||||||
|
"structured-outputs-2025-11-13": null,
|
||||||
|
"text_editor_20241022": null,
|
||||||
|
"text_editor_20250124": null,
|
||||||
|
"token-efficient-tools-2025-02-19": null,
|
||||||
|
"tool-search-tool-2025-10-19": "tool-search-tool-2025-10-19",
|
||||||
|
"web-fetch-2025-09-10": null,
|
||||||
|
"web-search-2025-03-05": null
|
||||||
|
},
|
||||||
|
"vertex_ai": {
|
||||||
|
"advanced-tool-use-2025-11-20": "tool-search-tool-2025-10-19",
|
||||||
|
"bash_20241022": null,
|
||||||
|
"bash_20250124": null,
|
||||||
|
"code-execution-2025-08-25": null,
|
||||||
|
"compact-2026-01-12": null,
|
||||||
|
"computer-use-2025-01-24": "computer-use-2025-01-24",
|
||||||
|
"computer-use-2025-11-24": "computer-use-2025-11-24",
|
||||||
|
"context-1m-2025-08-07": "context-1m-2025-08-07",
|
||||||
|
"context-management-2025-06-27": "context-management-2025-06-27",
|
||||||
|
"effort-2025-11-24": null,
|
||||||
|
"fast-mode-2026-02-01": null,
|
||||||
|
"files-api-2025-04-14": null,
|
||||||
|
"fine-grained-tool-streaming-2025-05-14": null,
|
||||||
|
"interleaved-thinking-2025-05-14": "interleaved-thinking-2025-05-14",
|
||||||
|
"mcp-client-2025-11-20": null,
|
||||||
|
"mcp-client-2025-04-04": null,
|
||||||
|
"mcp-servers-2025-12-04": null,
|
||||||
|
"output-128k-2025-02-19": null,
|
||||||
|
"structured-output-2024-03-01": null,
|
||||||
|
"prompt-caching-scope-2026-01-05": null,
|
||||||
|
"skills-2025-10-02": null,
|
||||||
|
"structured-outputs-2025-11-13": null,
|
||||||
|
"text_editor_20241022": null,
|
||||||
|
"text_editor_20250124": null,
|
||||||
|
"token-efficient-tools-2025-02-19": null,
|
||||||
|
"tool-search-tool-2025-10-19": "tool-search-tool-2025-10-19",
|
||||||
|
"web-fetch-2025-09-10": null,
|
||||||
|
"web-search-2025-03-05": "web-search-2025-03-05"
|
||||||
|
},
|
||||||
|
"databricks": {
|
||||||
|
"advanced-tool-use-2025-11-20": "advanced-tool-use-2025-11-20",
|
||||||
|
"bash_20241022": null,
|
||||||
|
"bash_20250124": null,
|
||||||
|
"code-execution-2025-08-25": "code-execution-2025-08-25",
|
||||||
|
"compact-2026-01-12": "compact-2026-01-12",
|
||||||
|
"computer-use-2025-01-24": "computer-use-2025-01-24",
|
||||||
|
"computer-use-2025-11-24": "computer-use-2025-11-24",
|
||||||
|
"context-1m-2025-08-07": "context-1m-2025-08-07",
|
||||||
|
"context-management-2025-06-27": "context-management-2025-06-27",
|
||||||
|
"effort-2025-11-24": "effort-2025-11-24",
|
||||||
|
"fast-mode-2026-02-01": "fast-mode-2026-02-01",
|
||||||
|
"files-api-2025-04-14": "files-api-2025-04-14",
|
||||||
|
"structured-output-2024-03-01": null,
|
||||||
|
"fine-grained-tool-streaming-2025-05-14": "fine-grained-tool-streaming-2025-05-14",
|
||||||
|
"interleaved-thinking-2025-05-14": "interleaved-thinking-2025-05-14",
|
||||||
|
"mcp-client-2025-11-20": "mcp-client-2025-11-20",
|
||||||
|
"mcp-client-2025-04-04": "mcp-client-2025-04-04",
|
||||||
|
"mcp-servers-2025-12-04": null,
|
||||||
|
"oauth-2025-04-20": "oauth-2025-04-20",
|
||||||
|
"output-128k-2025-02-19": "output-128k-2025-02-19",
|
||||||
|
"prompt-caching-scope-2026-01-05": "prompt-caching-scope-2026-01-05",
|
||||||
|
"skills-2025-10-02": "skills-2025-10-02",
|
||||||
|
"structured-outputs-2025-11-13": "structured-outputs-2025-11-13",
|
||||||
|
"text_editor_20241022": null,
|
||||||
|
"text_editor_20250124": null,
|
||||||
|
"token-efficient-tools-2025-02-19": "token-efficient-tools-2025-02-19",
|
||||||
|
"web-fetch-2025-09-10": "web-fetch-2025-09-10",
|
||||||
|
"web-search-2025-03-05": "web-search-2025-03-05"
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,385 @@
|
|||||||
|
"""
|
||||||
|
Centralized manager for Anthropic beta headers across different providers.
|
||||||
|
|
||||||
|
This module provides utilities to:
|
||||||
|
1. Load beta header configuration from JSON (mapping of supported headers per provider)
|
||||||
|
2. Filter and map beta headers based on provider support
|
||||||
|
3. Handle provider-specific header name mappings (e.g., advanced-tool-use -> tool-search-tool)
|
||||||
|
4. Support remote fetching and caching similar to model cost map
|
||||||
|
|
||||||
|
Design:
|
||||||
|
- JSON config contains mapping of beta headers for each provider
|
||||||
|
- Keys are input header names, values are provider-specific header names (or null if unsupported)
|
||||||
|
- Only headers present in mapping keys with non-null values can be forwarded
|
||||||
|
- This enforces stricter validation than the previous unsupported list approach
|
||||||
|
|
||||||
|
Configuration can be loaded from:
|
||||||
|
- Remote URL (default): Fetches from GitHub repository
|
||||||
|
- Local file: Set LITELLM_LOCAL_ANTHROPIC_BETA_HEADERS=True to use bundled config only
|
||||||
|
|
||||||
|
Environment Variables:
|
||||||
|
- LITELLM_LOCAL_ANTHROPIC_BETA_HEADERS: Set to "True" to disable remote fetching
|
||||||
|
- LITELLM_ANTHROPIC_BETA_HEADERS_URL: Custom URL for remote config (optional)
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
from importlib.resources import files
|
||||||
|
from typing import Dict, List, Optional, Set
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
from litellm.litellm_core_utils.litellm_logging import verbose_logger
|
||||||
|
|
||||||
|
# Cache for the loaded configuration
|
||||||
|
_BETA_HEADERS_CONFIG: Optional[Dict] = None
|
||||||
|
|
||||||
|
|
||||||
|
class GetAnthropicBetaHeadersConfig:
|
||||||
|
"""
|
||||||
|
Handles fetching, validating, and loading the Anthropic beta headers configuration.
|
||||||
|
|
||||||
|
Similar to GetModelCostMap, this class manages the lifecycle of the beta headers
|
||||||
|
configuration with support for remote fetching and local fallback.
|
||||||
|
"""
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def load_local_beta_headers_config() -> Dict:
|
||||||
|
"""Load the local backup beta headers config bundled with the package."""
|
||||||
|
try:
|
||||||
|
content = json.loads(
|
||||||
|
files("litellm")
|
||||||
|
.joinpath("anthropic_beta_headers_config.json")
|
||||||
|
.read_text(encoding="utf-8")
|
||||||
|
)
|
||||||
|
return content
|
||||||
|
except Exception as e:
|
||||||
|
verbose_logger.error(f"Failed to load local beta headers config: {e}")
|
||||||
|
# Return empty config as fallback
|
||||||
|
return {
|
||||||
|
"anthropic": {},
|
||||||
|
"azure_ai": {},
|
||||||
|
"bedrock": {},
|
||||||
|
"bedrock_converse": {},
|
||||||
|
"vertex_ai": {},
|
||||||
|
"provider_aliases": {},
|
||||||
|
}
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _check_is_valid_dict(fetched_config: dict) -> bool:
|
||||||
|
"""Check if fetched config is a non-empty dict with expected structure."""
|
||||||
|
if not isinstance(fetched_config, dict):
|
||||||
|
verbose_logger.warning(
|
||||||
|
"LiteLLM: Fetched beta headers config is not a dict (type=%s). "
|
||||||
|
"Falling back to local backup.",
|
||||||
|
type(fetched_config).__name__,
|
||||||
|
)
|
||||||
|
return False
|
||||||
|
|
||||||
|
if len(fetched_config) == 0:
|
||||||
|
verbose_logger.warning(
|
||||||
|
"LiteLLM: Fetched beta headers config is empty. "
|
||||||
|
"Falling back to local backup.",
|
||||||
|
)
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Check for at least one provider key
|
||||||
|
provider_keys = [
|
||||||
|
"anthropic",
|
||||||
|
"azure_ai",
|
||||||
|
"bedrock",
|
||||||
|
"bedrock_converse",
|
||||||
|
"vertex_ai",
|
||||||
|
]
|
||||||
|
has_provider = any(key in fetched_config for key in provider_keys)
|
||||||
|
|
||||||
|
if not has_provider:
|
||||||
|
verbose_logger.warning(
|
||||||
|
"LiteLLM: Fetched beta headers config missing provider keys. "
|
||||||
|
"Falling back to local backup.",
|
||||||
|
)
|
||||||
|
return False
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def validate_beta_headers_config(cls, fetched_config: dict) -> bool:
|
||||||
|
"""
|
||||||
|
Validate the integrity of a fetched beta headers config.
|
||||||
|
|
||||||
|
Returns True if all checks pass, False otherwise.
|
||||||
|
"""
|
||||||
|
return cls._check_is_valid_dict(fetched_config)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def fetch_remote_beta_headers_config(url: str, timeout: int = 5) -> dict:
|
||||||
|
"""
|
||||||
|
Fetch the beta headers config from a remote URL.
|
||||||
|
|
||||||
|
Returns the parsed JSON dict. Raises on network/parse errors
|
||||||
|
(caller is expected to handle).
|
||||||
|
"""
|
||||||
|
response = httpx.get(url, timeout=timeout)
|
||||||
|
response.raise_for_status()
|
||||||
|
return response.json()
|
||||||
|
|
||||||
|
|
||||||
|
def get_beta_headers_config(url: str) -> dict:
|
||||||
|
"""
|
||||||
|
Public entry point — returns the beta headers config dict.
|
||||||
|
|
||||||
|
1. If ``LITELLM_LOCAL_ANTHROPIC_BETA_HEADERS`` is set, uses the local backup only.
|
||||||
|
2. Otherwise fetches from ``url``, validates integrity, and falls back
|
||||||
|
to the local backup on any failure.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
url: URL to fetch the remote beta headers configuration from
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict containing the beta headers configuration
|
||||||
|
"""
|
||||||
|
# Check if local-only mode is enabled
|
||||||
|
if os.getenv("LITELLM_LOCAL_ANTHROPIC_BETA_HEADERS", "").lower() == "true":
|
||||||
|
# verbose_logger.debug("Using local Anthropic beta headers config (LITELLM_LOCAL_ANTHROPIC_BETA_HEADERS=True)")
|
||||||
|
return GetAnthropicBetaHeadersConfig.load_local_beta_headers_config()
|
||||||
|
|
||||||
|
try:
|
||||||
|
content = GetAnthropicBetaHeadersConfig.fetch_remote_beta_headers_config(url)
|
||||||
|
except Exception as e:
|
||||||
|
verbose_logger.warning(
|
||||||
|
"LiteLLM: Failed to fetch remote beta headers config from %s: %s. "
|
||||||
|
"Falling back to local backup.",
|
||||||
|
url,
|
||||||
|
str(e),
|
||||||
|
)
|
||||||
|
return GetAnthropicBetaHeadersConfig.load_local_beta_headers_config()
|
||||||
|
|
||||||
|
# Validate the fetched config
|
||||||
|
if not GetAnthropicBetaHeadersConfig.validate_beta_headers_config(
|
||||||
|
fetched_config=content
|
||||||
|
):
|
||||||
|
verbose_logger.warning(
|
||||||
|
"LiteLLM: Fetched beta headers config failed integrity check. "
|
||||||
|
"Using local backup instead. url=%s",
|
||||||
|
url,
|
||||||
|
)
|
||||||
|
return GetAnthropicBetaHeadersConfig.load_local_beta_headers_config()
|
||||||
|
|
||||||
|
return content
|
||||||
|
|
||||||
|
|
||||||
|
def _load_beta_headers_config() -> Dict:
|
||||||
|
"""
|
||||||
|
Load the beta headers configuration.
|
||||||
|
Uses caching to avoid repeated fetches/file reads.
|
||||||
|
|
||||||
|
This function is called by all public API functions and manages the global cache.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict containing the beta headers configuration
|
||||||
|
"""
|
||||||
|
global _BETA_HEADERS_CONFIG
|
||||||
|
|
||||||
|
if _BETA_HEADERS_CONFIG is not None:
|
||||||
|
return _BETA_HEADERS_CONFIG
|
||||||
|
|
||||||
|
# Get the URL from environment or use default
|
||||||
|
from litellm import anthropic_beta_headers_url
|
||||||
|
|
||||||
|
_BETA_HEADERS_CONFIG = get_beta_headers_config(url=anthropic_beta_headers_url)
|
||||||
|
verbose_logger.debug("Loaded and cached beta headers config")
|
||||||
|
|
||||||
|
return _BETA_HEADERS_CONFIG
|
||||||
|
|
||||||
|
|
||||||
|
def reload_beta_headers_config() -> Dict:
|
||||||
|
"""
|
||||||
|
Force reload the beta headers configuration from source (remote or local).
|
||||||
|
Clears the cache and fetches fresh configuration.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict containing the newly loaded beta headers configuration
|
||||||
|
"""
|
||||||
|
global _BETA_HEADERS_CONFIG
|
||||||
|
_BETA_HEADERS_CONFIG = None
|
||||||
|
verbose_logger.info("Reloading beta headers config (cache cleared)")
|
||||||
|
return _load_beta_headers_config()
|
||||||
|
|
||||||
|
|
||||||
|
def get_provider_name(provider: str) -> str:
|
||||||
|
"""
|
||||||
|
Resolve provider aliases to canonical provider names.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
provider: Provider name (may be an alias)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Canonical provider name
|
||||||
|
"""
|
||||||
|
config = _load_beta_headers_config()
|
||||||
|
aliases = config.get("provider_aliases", {})
|
||||||
|
return aliases.get(provider, provider)
|
||||||
|
|
||||||
|
|
||||||
|
def filter_and_transform_beta_headers(
|
||||||
|
beta_headers: List[str],
|
||||||
|
provider: str,
|
||||||
|
) -> List[str]:
|
||||||
|
"""
|
||||||
|
Filter and transform beta headers based on provider's mapping configuration.
|
||||||
|
|
||||||
|
This function:
|
||||||
|
1. Only allows headers that are present in the provider's mapping keys
|
||||||
|
2. Filters out headers with null values (unsupported)
|
||||||
|
3. Maps headers to provider-specific names (e.g., advanced-tool-use -> tool-search-tool)
|
||||||
|
|
||||||
|
Args:
|
||||||
|
beta_headers: List of Anthropic beta header values
|
||||||
|
provider: Provider name (e.g., "anthropic", "bedrock", "vertex_ai")
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of filtered and transformed beta headers for the provider
|
||||||
|
"""
|
||||||
|
if not beta_headers:
|
||||||
|
return []
|
||||||
|
|
||||||
|
config = _load_beta_headers_config()
|
||||||
|
provider = get_provider_name(provider)
|
||||||
|
|
||||||
|
# Get the header mapping for this provider
|
||||||
|
provider_mapping = config.get(provider, {})
|
||||||
|
|
||||||
|
filtered_headers: Set[str] = set()
|
||||||
|
|
||||||
|
for header in beta_headers:
|
||||||
|
header = header.strip()
|
||||||
|
|
||||||
|
# Check if header is in the mapping
|
||||||
|
if header not in provider_mapping:
|
||||||
|
verbose_logger.debug(
|
||||||
|
f"Dropping unknown beta header '{header}' for provider '{provider}' (not in mapping)"
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Get the mapped header value
|
||||||
|
mapped_header = provider_mapping[header]
|
||||||
|
|
||||||
|
# Skip if header is unsupported (null value)
|
||||||
|
if mapped_header is None:
|
||||||
|
verbose_logger.debug(
|
||||||
|
f"Dropping unsupported beta header '{header}' for provider '{provider}'"
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Add the mapped header
|
||||||
|
filtered_headers.add(mapped_header)
|
||||||
|
|
||||||
|
return sorted(list(filtered_headers))
|
||||||
|
|
||||||
|
|
||||||
|
def is_beta_header_supported(
|
||||||
|
beta_header: str,
|
||||||
|
provider: str,
|
||||||
|
) -> bool:
|
||||||
|
"""
|
||||||
|
Check if a specific beta header is supported by a provider.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
beta_header: The Anthropic beta header value
|
||||||
|
provider: Provider name
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if the header is in the mapping with a non-null value, False otherwise
|
||||||
|
"""
|
||||||
|
config = _load_beta_headers_config()
|
||||||
|
provider = get_provider_name(provider)
|
||||||
|
provider_mapping = config.get(provider, {})
|
||||||
|
|
||||||
|
# Header is supported if it's in the mapping and has a non-null value
|
||||||
|
return beta_header in provider_mapping and provider_mapping[beta_header] is not None
|
||||||
|
|
||||||
|
|
||||||
|
def get_provider_beta_header(
|
||||||
|
anthropic_beta_header: str,
|
||||||
|
provider: str,
|
||||||
|
) -> Optional[str]:
|
||||||
|
"""
|
||||||
|
Get the provider-specific beta header name for a given Anthropic beta header.
|
||||||
|
|
||||||
|
This function handles header transformations/mappings (e.g., advanced-tool-use -> tool-search-tool).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
anthropic_beta_header: The Anthropic beta header value
|
||||||
|
provider: Provider name
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The provider-specific header name if supported, or None if unsupported/unknown
|
||||||
|
"""
|
||||||
|
config = _load_beta_headers_config()
|
||||||
|
provider = get_provider_name(provider)
|
||||||
|
|
||||||
|
# Get the header mapping for this provider
|
||||||
|
provider_mapping = config.get(provider, {})
|
||||||
|
|
||||||
|
# Check if header is in the mapping
|
||||||
|
if anthropic_beta_header not in provider_mapping:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Return the mapped value (could be None if unsupported)
|
||||||
|
return provider_mapping[anthropic_beta_header]
|
||||||
|
|
||||||
|
|
||||||
|
def update_headers_with_filtered_beta(
|
||||||
|
headers: dict,
|
||||||
|
provider: str,
|
||||||
|
) -> dict:
|
||||||
|
"""
|
||||||
|
Update headers dict by filtering and transforming anthropic-beta header values.
|
||||||
|
Modifies the headers dict in place and returns it.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
headers: Request headers dict (will be modified in place)
|
||||||
|
provider: Provider name
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Updated headers dict
|
||||||
|
"""
|
||||||
|
existing_beta = headers.get("anthropic-beta")
|
||||||
|
if not existing_beta:
|
||||||
|
return headers
|
||||||
|
|
||||||
|
# Parse existing beta headers
|
||||||
|
beta_values = [b.strip() for b in existing_beta.split(",") if b.strip()]
|
||||||
|
|
||||||
|
# Filter and transform based on provider
|
||||||
|
filtered_beta_values = filter_and_transform_beta_headers(
|
||||||
|
beta_headers=beta_values,
|
||||||
|
provider=provider,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Update or remove the header
|
||||||
|
if filtered_beta_values:
|
||||||
|
headers["anthropic-beta"] = ",".join(filtered_beta_values)
|
||||||
|
else:
|
||||||
|
# Remove the header if no values remain
|
||||||
|
headers.pop("anthropic-beta", None)
|
||||||
|
|
||||||
|
return headers
|
||||||
|
|
||||||
|
|
||||||
|
def get_unsupported_headers(provider: str) -> List[str]:
|
||||||
|
"""
|
||||||
|
Get all beta headers that are unsupported by a provider (have null values in mapping).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
provider: Provider name
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of unsupported Anthropic beta header names
|
||||||
|
"""
|
||||||
|
config = _load_beta_headers_config()
|
||||||
|
provider = get_provider_name(provider)
|
||||||
|
provider_mapping = config.get(provider, {})
|
||||||
|
|
||||||
|
# Return headers with null values
|
||||||
|
return [header for header, value in provider_mapping.items() if value is None]
|
||||||
@@ -0,0 +1,6 @@
|
|||||||
|
"""
|
||||||
|
Anthropic module for LiteLLM
|
||||||
|
"""
|
||||||
|
from .messages import acreate, create
|
||||||
|
|
||||||
|
__all__ = ["acreate", "create"]
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user