2026-05-13 14:42:45 +08:00
|
|
|
|
//go:build llm_script
|
|
|
|
|
|
|
|
|
|
|
|
package main
|
|
|
|
|
|
|
|
|
|
|
|
import (
|
|
|
|
|
|
"os"
|
|
|
|
|
|
"path/filepath"
|
|
|
|
|
|
"strings"
|
|
|
|
|
|
"testing"
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
func TestParseTasksParsesEvidenceFields(t *testing.T) {
|
|
|
|
|
|
md := `# Tasks
|
|
|
|
|
|
|
|
|
|
|
|
### T-1 ✅ Example
|
|
|
|
|
|
- **verification**:
|
|
|
|
|
|
- mode: ` + "`test_pass`" + `
|
|
|
|
|
|
- command: ` + "`echo ok`" + `
|
|
|
|
|
|
- expected_evidence: ` + "`ok`" + `
|
|
|
|
|
|
- evidence_grade: ` + "`runtime-verified`" + `
|
|
|
|
|
|
- task_type: ` + "`code`" + `
|
|
|
|
|
|
- timeout_seconds: 15
|
|
|
|
|
|
`
|
|
|
|
|
|
|
|
|
|
|
|
tmpFile, err := os.CreateTemp(t.TempDir(), "tasks-*.md")
|
|
|
|
|
|
if err != nil {
|
|
|
|
|
|
t.Fatalf("create temp file: %v", err)
|
|
|
|
|
|
}
|
|
|
|
|
|
defer tmpFile.Close()
|
|
|
|
|
|
|
|
|
|
|
|
if _, err := tmpFile.WriteString(md); err != nil {
|
|
|
|
|
|
t.Fatalf("write temp file: %v", err)
|
|
|
|
|
|
}
|
|
|
|
|
|
if _, err := tmpFile.Seek(0, 0); err != nil {
|
|
|
|
|
|
t.Fatalf("seek temp file: %v", err)
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
tasks := parseTasks(tmpFile)
|
|
|
|
|
|
if len(tasks) != 1 {
|
|
|
|
|
|
t.Fatalf("expected 1 task, got %d", len(tasks))
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
got := tasks[0].Verification
|
|
|
|
|
|
if got.Mode != "test_pass" {
|
|
|
|
|
|
t.Fatalf("expected mode test_pass, got %q", got.Mode)
|
|
|
|
|
|
}
|
|
|
|
|
|
if got.Command != "echo ok" {
|
|
|
|
|
|
t.Fatalf("expected command echo ok, got %q", got.Command)
|
|
|
|
|
|
}
|
|
|
|
|
|
if got.ExpectedEvidence != "ok" {
|
|
|
|
|
|
t.Fatalf("expected evidence ok, got %q", got.ExpectedEvidence)
|
|
|
|
|
|
}
|
|
|
|
|
|
if got.EvidenceGrade != "runtime-verified" {
|
|
|
|
|
|
t.Fatalf("expected evidence grade runtime-verified, got %q", got.EvidenceGrade)
|
|
|
|
|
|
}
|
|
|
|
|
|
if got.TaskType != "code" {
|
|
|
|
|
|
t.Fatalf("expected task type code, got %q", got.TaskType)
|
|
|
|
|
|
}
|
|
|
|
|
|
if got.TimeoutSeconds != 15 {
|
|
|
|
|
|
t.Fatalf("expected timeout 15, got %d", got.TimeoutSeconds)
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
func TestVerifyTaskRejectsSemanticOnlyForCodeTask(t *testing.T) {
|
|
|
|
|
|
task := taskEntry{
|
|
|
|
|
|
ID: "T-1",
|
|
|
|
|
|
Name: "semantic code task",
|
|
|
|
|
|
Verification: Verification{
|
|
|
|
|
|
Mode: "semantic",
|
|
|
|
|
|
Command: "echo ok",
|
|
|
|
|
|
TaskType: "code",
|
|
|
|
|
|
EvidenceGrade: "doc-claimed",
|
|
|
|
|
|
},
|
|
|
|
|
|
HasVerification: true,
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
result := verifyTask(task, true)
|
|
|
|
|
|
if result.Verified {
|
|
|
|
|
|
t.Fatalf("expected semantic-only code task to fail")
|
|
|
|
|
|
}
|
|
|
|
|
|
if !strings.Contains(result.Reason, "semantic-only") {
|
|
|
|
|
|
t.Fatalf("expected semantic-only rejection reason, got %q", result.Reason)
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
func TestVerifyTaskDefaultsEvidenceGradeFromMode(t *testing.T) {
|
|
|
|
|
|
task := taskEntry{
|
|
|
|
|
|
ID: "T-2",
|
|
|
|
|
|
Name: "artifact task",
|
|
|
|
|
|
Verification: Verification{
|
2026-05-29 18:48:48 +08:00
|
|
|
|
Mode: "artifact_present",
|
|
|
|
|
|
TaskType: "documentation",
|
2026-05-13 14:42:45 +08:00
|
|
|
|
},
|
|
|
|
|
|
HasVerification: true,
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
result := verifyTask(task, true)
|
|
|
|
|
|
if !result.Verified {
|
|
|
|
|
|
t.Fatalf("expected dry-run artifact task to pass, got reason %q", result.Reason)
|
|
|
|
|
|
}
|
|
|
|
|
|
if result.EvidenceGrade != "artifact-present" {
|
|
|
|
|
|
t.Fatalf("expected default evidence grade artifact-present, got %q", result.EvidenceGrade)
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-05-29 18:48:48 +08:00
|
|
|
|
|
2026-05-13 14:42:45 +08:00
|
|
|
|
func TestResolveTasksPathDoesNotImplicitlyFallbackToGlobal(t *testing.T) {
|
|
|
|
|
|
root := t.TempDir()
|
|
|
|
|
|
projectDir := filepath.Join(root, "project")
|
|
|
|
|
|
globalDir := filepath.Join(root, "workspace")
|
|
|
|
|
|
scriptDir := filepath.Join(projectDir, "scripts")
|
|
|
|
|
|
if err := os.MkdirAll(projectDir, 0o755); err != nil {
|
|
|
|
|
|
t.Fatalf("mkdir project dir: %v", err)
|
|
|
|
|
|
}
|
|
|
|
|
|
if err := os.MkdirAll(globalDir, 0o755); err != nil {
|
|
|
|
|
|
t.Fatalf("mkdir global dir: %v", err)
|
|
|
|
|
|
}
|
|
|
|
|
|
if err := os.MkdirAll(scriptDir, 0o755); err != nil {
|
|
|
|
|
|
t.Fatalf("mkdir script dir: %v", err)
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
projectTasks := filepath.Join(projectDir, "TASKS.md")
|
|
|
|
|
|
globalTasks := filepath.Join(globalDir, "TASKS.md")
|
|
|
|
|
|
if err := os.WriteFile(projectTasks, []byte("# project"), 0o644); err != nil {
|
|
|
|
|
|
t.Fatalf("write project tasks: %v", err)
|
|
|
|
|
|
}
|
|
|
|
|
|
if err := os.WriteFile(globalTasks, []byte("# global"), 0o644); err != nil {
|
|
|
|
|
|
t.Fatalf("write global tasks: %v", err)
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
got := resolveTasksPathWithContext("", "", filepath.Join(root, "outside"), scriptDir, globalTasks)
|
|
|
|
|
|
if got != projectTasks {
|
|
|
|
|
|
t.Fatalf("expected project tasks path, got %q", got)
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
func TestResolveTasksPathAllowsExplicitGlobalPath(t *testing.T) {
|
|
|
|
|
|
root := t.TempDir()
|
|
|
|
|
|
projectDir := filepath.Join(root, "project")
|
|
|
|
|
|
globalDir := filepath.Join(root, "workspace")
|
|
|
|
|
|
scriptDir := filepath.Join(projectDir, "scripts")
|
|
|
|
|
|
if err := os.MkdirAll(projectDir, 0o755); err != nil {
|
|
|
|
|
|
t.Fatalf("mkdir project dir: %v", err)
|
|
|
|
|
|
}
|
|
|
|
|
|
if err := os.MkdirAll(globalDir, 0o755); err != nil {
|
|
|
|
|
|
t.Fatalf("mkdir global dir: %v", err)
|
|
|
|
|
|
}
|
|
|
|
|
|
if err := os.MkdirAll(scriptDir, 0o755); err != nil {
|
|
|
|
|
|
t.Fatalf("mkdir script dir: %v", err)
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
projectTasks := filepath.Join(projectDir, "TASKS.md")
|
|
|
|
|
|
globalTasks := filepath.Join(globalDir, "TASKS.md")
|
|
|
|
|
|
if err := os.WriteFile(projectTasks, []byte("# project"), 0o644); err != nil {
|
|
|
|
|
|
t.Fatalf("write project tasks: %v", err)
|
|
|
|
|
|
}
|
|
|
|
|
|
if err := os.WriteFile(globalTasks, []byte("# global"), 0o644); err != nil {
|
|
|
|
|
|
t.Fatalf("write global tasks: %v", err)
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
got := resolveTasksPathWithContext(globalTasks, "", filepath.Join(root, "outside"), scriptDir, globalTasks)
|
|
|
|
|
|
if got != globalTasks {
|
|
|
|
|
|
t.Fatalf("expected explicit global tasks path, got %q", got)
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
func TestVerifyTaskCapturesFailureSummaries(t *testing.T) {
|
|
|
|
|
|
task := taskEntry{
|
|
|
|
|
|
ID: "T-3",
|
|
|
|
|
|
Name: "failing task",
|
|
|
|
|
|
Verification: Verification{
|
|
|
|
|
|
Mode: "test_pass",
|
|
|
|
|
|
Command: "echo standard-output && echo standard-error 1>&2 && exit 1",
|
|
|
|
|
|
ExpectedEvidence: "unused",
|
|
|
|
|
|
TaskType: "automation",
|
|
|
|
|
|
},
|
|
|
|
|
|
HasVerification: true,
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
result := verifyTask(task, false)
|
|
|
|
|
|
if result.Verified {
|
|
|
|
|
|
t.Fatalf("expected failing task to fail")
|
|
|
|
|
|
}
|
|
|
|
|
|
if !strings.Contains(result.StdoutSummary, "standard-output") {
|
|
|
|
|
|
t.Fatalf("expected stdout summary to contain command output, got %q", result.StdoutSummary)
|
|
|
|
|
|
}
|
|
|
|
|
|
if !strings.Contains(result.StderrSummary, "standard-error") {
|
|
|
|
|
|
t.Fatalf("expected stderr summary to contain command error, got %q", result.StderrSummary)
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
func TestParseTasksParsesNormalizedStatus(t *testing.T) {
|
|
|
|
|
|
md := `# Tasks
|
|
|
|
|
|
|
|
|
|
|
|
### T-1 ✅ Done task
|
|
|
|
|
|
- **状态**:✅ 完成(2026-05-11)
|
|
|
|
|
|
- **verification**:
|
|
|
|
|
|
- mode: ` + "`test_pass`" + `
|
|
|
|
|
|
- command: ` + "`echo ok`" + `
|
|
|
|
|
|
- expected_evidence: ` + "`ok`" + `
|
|
|
|
|
|
|
|
|
|
|
|
### T-2 🔶 Planned task
|
|
|
|
|
|
- **状态**:🔶 待启动
|
|
|
|
|
|
- **verification**:
|
|
|
|
|
|
- mode: ` + "`test_pass`" + `
|
|
|
|
|
|
- command: ` + "`echo ok`" + `
|
|
|
|
|
|
- expected_evidence: ` + "`ok`" + `
|
|
|
|
|
|
|
|
|
|
|
|
### T-3 ⏸️ Paused task
|
|
|
|
|
|
- **状态**:⏸️ 待规划
|
|
|
|
|
|
- **verification**:
|
|
|
|
|
|
- mode: ` + "`test_pass`" + `
|
|
|
|
|
|
- command: ` + "`echo ok`" + `
|
|
|
|
|
|
- expected_evidence: ` + "`ok`" + `
|
|
|
|
|
|
`
|
|
|
|
|
|
|
|
|
|
|
|
tmpFile, err := os.CreateTemp(t.TempDir(), "tasks-status-*.md")
|
|
|
|
|
|
if err != nil {
|
|
|
|
|
|
t.Fatalf("create temp file: %v", err)
|
|
|
|
|
|
}
|
|
|
|
|
|
defer tmpFile.Close()
|
|
|
|
|
|
|
|
|
|
|
|
if _, err := tmpFile.WriteString(md); err != nil {
|
|
|
|
|
|
t.Fatalf("write temp file: %v", err)
|
|
|
|
|
|
}
|
|
|
|
|
|
if _, err := tmpFile.Seek(0, 0); err != nil {
|
|
|
|
|
|
t.Fatalf("seek temp file: %v", err)
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
tasks := parseTasks(tmpFile)
|
|
|
|
|
|
if len(tasks) != 3 {
|
|
|
|
|
|
t.Fatalf("expected 3 tasks, got %d", len(tasks))
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if tasks[0].Status != "completed" {
|
|
|
|
|
|
t.Fatalf("expected first task status completed, got %q", tasks[0].Status)
|
|
|
|
|
|
}
|
|
|
|
|
|
if tasks[1].Status != "planned" {
|
|
|
|
|
|
t.Fatalf("expected second task status planned, got %q", tasks[1].Status)
|
|
|
|
|
|
}
|
|
|
|
|
|
if tasks[2].Status != "paused" {
|
|
|
|
|
|
t.Fatalf("expected third task status paused, got %q", tasks[2].Status)
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
func TestFilterTasksByStatus(t *testing.T) {
|
|
|
|
|
|
tasks := []taskEntry{
|
|
|
|
|
|
{ID: "T-1", Status: "completed"},
|
|
|
|
|
|
{ID: "T-2", Status: "planned"},
|
|
|
|
|
|
{ID: "T-3", Status: "in_progress"},
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
completed, err := filterTasksByStatus(tasks, "completed")
|
|
|
|
|
|
if err != nil {
|
|
|
|
|
|
t.Fatalf("unexpected error: %v", err)
|
|
|
|
|
|
}
|
|
|
|
|
|
if len(completed) != 1 || completed[0].ID != "T-1" {
|
|
|
|
|
|
t.Fatalf("expected only completed task, got %#v", completed)
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
all, err := filterTasksByStatus(tasks, "all")
|
|
|
|
|
|
if err != nil {
|
|
|
|
|
|
t.Fatalf("unexpected error for all: %v", err)
|
|
|
|
|
|
}
|
|
|
|
|
|
if len(all) != 3 {
|
|
|
|
|
|
t.Fatalf("expected all 3 tasks, got %d", len(all))
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
2026-05-29 18:48:48 +08:00
|
|
|
|
|
|
|
|
|
|
func TestDetermineProcessExitCode(t *testing.T) {
|
|
|
|
|
|
cases := []struct {
|
|
|
|
|
|
name string
|
|
|
|
|
|
results []TaskResult
|
|
|
|
|
|
want int
|
|
|
|
|
|
}{
|
|
|
|
|
|
{
|
|
|
|
|
|
name: "all pass",
|
|
|
|
|
|
results: []TaskResult{{Verified: true}, {Verified: true}},
|
|
|
|
|
|
want: 0,
|
|
|
|
|
|
},
|
|
|
|
|
|
{
|
|
|
|
|
|
name: "runtime failure",
|
|
|
|
|
|
results: []TaskResult{{Verified: false, EvidenceGrade: "runtime-verified", TaskType: "automation"}},
|
|
|
|
|
|
want: 2,
|
|
|
|
|
|
},
|
|
|
|
|
|
{
|
|
|
|
|
|
name: "artifact only failure",
|
|
|
|
|
|
results: []TaskResult{{Verified: false, EvidenceGrade: "artifact-present", TaskType: "documentation"}},
|
|
|
|
|
|
want: 3,
|
|
|
|
|
|
},
|
|
|
|
|
|
{
|
|
|
|
|
|
name: "mixed defaults to runtime",
|
|
|
|
|
|
results: []TaskResult{
|
|
|
|
|
|
{Verified: false, EvidenceGrade: "artifact-present", TaskType: "documentation"},
|
|
|
|
|
|
{Verified: false, EvidenceGrade: "runtime-verified", TaskType: "automation"},
|
|
|
|
|
|
},
|
|
|
|
|
|
want: 2,
|
|
|
|
|
|
},
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
for _, tc := range cases {
|
|
|
|
|
|
t.Run(tc.name, func(t *testing.T) {
|
|
|
|
|
|
if got := determineProcessExitCode(tc.results); got != tc.want {
|
|
|
|
|
|
t.Fatalf("exit code = %d, want %d", got, tc.want)
|
|
|
|
|
|
}
|
|
|
|
|
|
})
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
func TestClassifyFailureTier(t *testing.T) {
|
|
|
|
|
|
if got := classifyFailureTier(TaskResult{Verified: false, EvidenceGrade: "runtime-verified", TaskType: "automation"}); got != 2 {
|
|
|
|
|
|
t.Fatalf("runtime failure tier = %d, want 2", got)
|
|
|
|
|
|
}
|
|
|
|
|
|
if got := classifyFailureTier(TaskResult{Verified: false, EvidenceGrade: "artifact-present", TaskType: "documentation"}); got != 3 {
|
|
|
|
|
|
t.Fatalf("artifact failure tier = %d, want 3", got)
|
|
|
|
|
|
}
|
|
|
|
|
|
if got := classifyFailureTier(TaskResult{Verified: true, EvidenceGrade: "runtime-verified", TaskType: "automation"}); got != 0 {
|
|
|
|
|
|
t.Fatalf("verified tier = %d, want 0", got)
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
func TestVerifyTaskClassifiesToolExecutionFailure(t *testing.T) {
|
|
|
|
|
|
task := taskEntry{
|
|
|
|
|
|
ID: "T-4",
|
|
|
|
|
|
Name: "tool failure task",
|
|
|
|
|
|
Verification: Verification{
|
|
|
|
|
|
Mode: "test_pass",
|
|
|
|
|
|
Command: "echo tool-out && echo tool-err 1>&2 && exit 1",
|
|
|
|
|
|
ExpectedEvidence: "tool-out",
|
|
|
|
|
|
TaskType: "automation",
|
|
|
|
|
|
},
|
|
|
|
|
|
HasVerification: true,
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
result := verifyTask(task, false)
|
|
|
|
|
|
if result.Verified {
|
|
|
|
|
|
t.Fatalf("expected tool failure task to fail")
|
|
|
|
|
|
}
|
|
|
|
|
|
if result.FailureClass != "tool_execution_failure" {
|
|
|
|
|
|
t.Fatalf("failure class = %q, want tool_execution_failure", result.FailureClass)
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
func TestVerifyTaskArtifactPresentMisuseBecomesConfigFailure(t *testing.T) {
|
|
|
|
|
|
task := taskEntry{
|
|
|
|
|
|
ID: "T-5",
|
|
|
|
|
|
Name: "artifact misuse",
|
|
|
|
|
|
Verification: Verification{
|
|
|
|
|
|
Mode: "artifact_present",
|
|
|
|
|
|
Command: "echo actual-output",
|
|
|
|
|
|
ExpectedEvidence: "expected-output",
|
|
|
|
|
|
TaskType: "documentation",
|
|
|
|
|
|
},
|
|
|
|
|
|
HasVerification: true,
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
result := verifyTask(task, false)
|
|
|
|
|
|
if result.Verified {
|
|
|
|
|
|
t.Fatalf("expected artifact misuse to fail")
|
|
|
|
|
|
}
|
|
|
|
|
|
if result.FailureClass != "verification_config_failure" {
|
|
|
|
|
|
t.Fatalf("failure class = %q, want verification_config_failure", result.FailureClass)
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
func TestValidateVerificationRejectsArtifactPresentWithCommand(t *testing.T) {
|
|
|
|
|
|
got := validateVerification(Verification{
|
|
|
|
|
|
Mode: "artifact_present",
|
|
|
|
|
|
Command: "echo exists",
|
|
|
|
|
|
ExpectedEvidence: "exists",
|
|
|
|
|
|
TaskType: "documentation",
|
|
|
|
|
|
EvidenceGrade: "artifact-present",
|
|
|
|
|
|
})
|
|
|
|
|
|
if got == "" {
|
|
|
|
|
|
t.Fatalf("expected artifact_present with command to be rejected")
|
|
|
|
|
|
}
|
|
|
|
|
|
if !strings.Contains(got, "artifact_present") {
|
|
|
|
|
|
t.Fatalf("unexpected validation error: %q", got)
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
func TestValidateVerificationRejectsArtifactPresentForAutomation(t *testing.T) {
|
|
|
|
|
|
got := validateVerification(Verification{
|
|
|
|
|
|
Mode: "artifact_present",
|
|
|
|
|
|
TaskType: "automation",
|
|
|
|
|
|
EvidenceGrade: "artifact-present",
|
|
|
|
|
|
})
|
|
|
|
|
|
if got == "" {
|
|
|
|
|
|
t.Fatalf("expected artifact_present automation task to be rejected")
|
|
|
|
|
|
}
|
|
|
|
|
|
if !strings.Contains(got, "artifact_present") {
|
|
|
|
|
|
t.Fatalf("unexpected validation error: %q", got)
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|