Files
llm-intelligence/scripts/verification_executor_test.go

395 lines
11 KiB
Go
Raw Permalink Normal View History

//go:build llm_script
package main
import (
"os"
"path/filepath"
"strings"
"testing"
)
func TestParseTasksParsesEvidenceFields(t *testing.T) {
md := `# Tasks
### T-1 Example
- **verification**:
- mode: ` + "`test_pass`" + `
- command: ` + "`echo ok`" + `
- expected_evidence: ` + "`ok`" + `
- evidence_grade: ` + "`runtime-verified`" + `
- task_type: ` + "`code`" + `
- timeout_seconds: 15
`
tmpFile, err := os.CreateTemp(t.TempDir(), "tasks-*.md")
if err != nil {
t.Fatalf("create temp file: %v", err)
}
defer tmpFile.Close()
if _, err := tmpFile.WriteString(md); err != nil {
t.Fatalf("write temp file: %v", err)
}
if _, err := tmpFile.Seek(0, 0); err != nil {
t.Fatalf("seek temp file: %v", err)
}
tasks := parseTasks(tmpFile)
if len(tasks) != 1 {
t.Fatalf("expected 1 task, got %d", len(tasks))
}
got := tasks[0].Verification
if got.Mode != "test_pass" {
t.Fatalf("expected mode test_pass, got %q", got.Mode)
}
if got.Command != "echo ok" {
t.Fatalf("expected command echo ok, got %q", got.Command)
}
if got.ExpectedEvidence != "ok" {
t.Fatalf("expected evidence ok, got %q", got.ExpectedEvidence)
}
if got.EvidenceGrade != "runtime-verified" {
t.Fatalf("expected evidence grade runtime-verified, got %q", got.EvidenceGrade)
}
if got.TaskType != "code" {
t.Fatalf("expected task type code, got %q", got.TaskType)
}
if got.TimeoutSeconds != 15 {
t.Fatalf("expected timeout 15, got %d", got.TimeoutSeconds)
}
}
func TestVerifyTaskRejectsSemanticOnlyForCodeTask(t *testing.T) {
task := taskEntry{
ID: "T-1",
Name: "semantic code task",
Verification: Verification{
Mode: "semantic",
Command: "echo ok",
TaskType: "code",
EvidenceGrade: "doc-claimed",
},
HasVerification: true,
}
result := verifyTask(task, true)
if result.Verified {
t.Fatalf("expected semantic-only code task to fail")
}
if !strings.Contains(result.Reason, "semantic-only") {
t.Fatalf("expected semantic-only rejection reason, got %q", result.Reason)
}
}
func TestVerifyTaskDefaultsEvidenceGradeFromMode(t *testing.T) {
task := taskEntry{
ID: "T-2",
Name: "artifact task",
Verification: Verification{
Mode: "artifact_present",
TaskType: "documentation",
},
HasVerification: true,
}
result := verifyTask(task, true)
if !result.Verified {
t.Fatalf("expected dry-run artifact task to pass, got reason %q", result.Reason)
}
if result.EvidenceGrade != "artifact-present" {
t.Fatalf("expected default evidence grade artifact-present, got %q", result.EvidenceGrade)
}
}
func TestResolveTasksPathDoesNotImplicitlyFallbackToGlobal(t *testing.T) {
root := t.TempDir()
projectDir := filepath.Join(root, "project")
globalDir := filepath.Join(root, "workspace")
scriptDir := filepath.Join(projectDir, "scripts")
if err := os.MkdirAll(projectDir, 0o755); err != nil {
t.Fatalf("mkdir project dir: %v", err)
}
if err := os.MkdirAll(globalDir, 0o755); err != nil {
t.Fatalf("mkdir global dir: %v", err)
}
if err := os.MkdirAll(scriptDir, 0o755); err != nil {
t.Fatalf("mkdir script dir: %v", err)
}
projectTasks := filepath.Join(projectDir, "TASKS.md")
globalTasks := filepath.Join(globalDir, "TASKS.md")
if err := os.WriteFile(projectTasks, []byte("# project"), 0o644); err != nil {
t.Fatalf("write project tasks: %v", err)
}
if err := os.WriteFile(globalTasks, []byte("# global"), 0o644); err != nil {
t.Fatalf("write global tasks: %v", err)
}
got := resolveTasksPathWithContext("", "", filepath.Join(root, "outside"), scriptDir, globalTasks)
if got != projectTasks {
t.Fatalf("expected project tasks path, got %q", got)
}
}
func TestResolveTasksPathAllowsExplicitGlobalPath(t *testing.T) {
root := t.TempDir()
projectDir := filepath.Join(root, "project")
globalDir := filepath.Join(root, "workspace")
scriptDir := filepath.Join(projectDir, "scripts")
if err := os.MkdirAll(projectDir, 0o755); err != nil {
t.Fatalf("mkdir project dir: %v", err)
}
if err := os.MkdirAll(globalDir, 0o755); err != nil {
t.Fatalf("mkdir global dir: %v", err)
}
if err := os.MkdirAll(scriptDir, 0o755); err != nil {
t.Fatalf("mkdir script dir: %v", err)
}
projectTasks := filepath.Join(projectDir, "TASKS.md")
globalTasks := filepath.Join(globalDir, "TASKS.md")
if err := os.WriteFile(projectTasks, []byte("# project"), 0o644); err != nil {
t.Fatalf("write project tasks: %v", err)
}
if err := os.WriteFile(globalTasks, []byte("# global"), 0o644); err != nil {
t.Fatalf("write global tasks: %v", err)
}
got := resolveTasksPathWithContext(globalTasks, "", filepath.Join(root, "outside"), scriptDir, globalTasks)
if got != globalTasks {
t.Fatalf("expected explicit global tasks path, got %q", got)
}
}
func TestVerifyTaskCapturesFailureSummaries(t *testing.T) {
task := taskEntry{
ID: "T-3",
Name: "failing task",
Verification: Verification{
Mode: "test_pass",
Command: "echo standard-output && echo standard-error 1>&2 && exit 1",
ExpectedEvidence: "unused",
TaskType: "automation",
},
HasVerification: true,
}
result := verifyTask(task, false)
if result.Verified {
t.Fatalf("expected failing task to fail")
}
if !strings.Contains(result.StdoutSummary, "standard-output") {
t.Fatalf("expected stdout summary to contain command output, got %q", result.StdoutSummary)
}
if !strings.Contains(result.StderrSummary, "standard-error") {
t.Fatalf("expected stderr summary to contain command error, got %q", result.StderrSummary)
}
}
func TestParseTasksParsesNormalizedStatus(t *testing.T) {
md := `# Tasks
### T-1 Done task
- **状态** 完成2026-05-11
- **verification**:
- mode: ` + "`test_pass`" + `
- command: ` + "`echo ok`" + `
- expected_evidence: ` + "`ok`" + `
### T-2 🔶 Planned task
- **状态**🔶 待启动
- **verification**:
- mode: ` + "`test_pass`" + `
- command: ` + "`echo ok`" + `
- expected_evidence: ` + "`ok`" + `
### T-3 Paused task
- **状态** 待规划
- **verification**:
- mode: ` + "`test_pass`" + `
- command: ` + "`echo ok`" + `
- expected_evidence: ` + "`ok`" + `
`
tmpFile, err := os.CreateTemp(t.TempDir(), "tasks-status-*.md")
if err != nil {
t.Fatalf("create temp file: %v", err)
}
defer tmpFile.Close()
if _, err := tmpFile.WriteString(md); err != nil {
t.Fatalf("write temp file: %v", err)
}
if _, err := tmpFile.Seek(0, 0); err != nil {
t.Fatalf("seek temp file: %v", err)
}
tasks := parseTasks(tmpFile)
if len(tasks) != 3 {
t.Fatalf("expected 3 tasks, got %d", len(tasks))
}
if tasks[0].Status != "completed" {
t.Fatalf("expected first task status completed, got %q", tasks[0].Status)
}
if tasks[1].Status != "planned" {
t.Fatalf("expected second task status planned, got %q", tasks[1].Status)
}
if tasks[2].Status != "paused" {
t.Fatalf("expected third task status paused, got %q", tasks[2].Status)
}
}
func TestFilterTasksByStatus(t *testing.T) {
tasks := []taskEntry{
{ID: "T-1", Status: "completed"},
{ID: "T-2", Status: "planned"},
{ID: "T-3", Status: "in_progress"},
}
completed, err := filterTasksByStatus(tasks, "completed")
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if len(completed) != 1 || completed[0].ID != "T-1" {
t.Fatalf("expected only completed task, got %#v", completed)
}
all, err := filterTasksByStatus(tasks, "all")
if err != nil {
t.Fatalf("unexpected error for all: %v", err)
}
if len(all) != 3 {
t.Fatalf("expected all 3 tasks, got %d", len(all))
}
}
func TestDetermineProcessExitCode(t *testing.T) {
cases := []struct {
name string
results []TaskResult
want int
}{
{
name: "all pass",
results: []TaskResult{{Verified: true}, {Verified: true}},
want: 0,
},
{
name: "runtime failure",
results: []TaskResult{{Verified: false, EvidenceGrade: "runtime-verified", TaskType: "automation"}},
want: 2,
},
{
name: "artifact only failure",
results: []TaskResult{{Verified: false, EvidenceGrade: "artifact-present", TaskType: "documentation"}},
want: 3,
},
{
name: "mixed defaults to runtime",
results: []TaskResult{
{Verified: false, EvidenceGrade: "artifact-present", TaskType: "documentation"},
{Verified: false, EvidenceGrade: "runtime-verified", TaskType: "automation"},
},
want: 2,
},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
if got := determineProcessExitCode(tc.results); got != tc.want {
t.Fatalf("exit code = %d, want %d", got, tc.want)
}
})
}
}
func TestClassifyFailureTier(t *testing.T) {
if got := classifyFailureTier(TaskResult{Verified: false, EvidenceGrade: "runtime-verified", TaskType: "automation"}); got != 2 {
t.Fatalf("runtime failure tier = %d, want 2", got)
}
if got := classifyFailureTier(TaskResult{Verified: false, EvidenceGrade: "artifact-present", TaskType: "documentation"}); got != 3 {
t.Fatalf("artifact failure tier = %d, want 3", got)
}
if got := classifyFailureTier(TaskResult{Verified: true, EvidenceGrade: "runtime-verified", TaskType: "automation"}); got != 0 {
t.Fatalf("verified tier = %d, want 0", got)
}
}
func TestVerifyTaskClassifiesToolExecutionFailure(t *testing.T) {
task := taskEntry{
ID: "T-4",
Name: "tool failure task",
Verification: Verification{
Mode: "test_pass",
Command: "echo tool-out && echo tool-err 1>&2 && exit 1",
ExpectedEvidence: "tool-out",
TaskType: "automation",
},
HasVerification: true,
}
result := verifyTask(task, false)
if result.Verified {
t.Fatalf("expected tool failure task to fail")
}
if result.FailureClass != "tool_execution_failure" {
t.Fatalf("failure class = %q, want tool_execution_failure", result.FailureClass)
}
}
func TestVerifyTaskArtifactPresentMisuseBecomesConfigFailure(t *testing.T) {
task := taskEntry{
ID: "T-5",
Name: "artifact misuse",
Verification: Verification{
Mode: "artifact_present",
Command: "echo actual-output",
ExpectedEvidence: "expected-output",
TaskType: "documentation",
},
HasVerification: true,
}
result := verifyTask(task, false)
if result.Verified {
t.Fatalf("expected artifact misuse to fail")
}
if result.FailureClass != "verification_config_failure" {
t.Fatalf("failure class = %q, want verification_config_failure", result.FailureClass)
}
}
func TestValidateVerificationRejectsArtifactPresentWithCommand(t *testing.T) {
got := validateVerification(Verification{
Mode: "artifact_present",
Command: "echo exists",
ExpectedEvidence: "exists",
TaskType: "documentation",
EvidenceGrade: "artifact-present",
})
if got == "" {
t.Fatalf("expected artifact_present with command to be rejected")
}
if !strings.Contains(got, "artifact_present") {
t.Fatalf("unexpected validation error: %q", got)
}
}
func TestValidateVerificationRejectsArtifactPresentForAutomation(t *testing.T) {
got := validateVerification(Verification{
Mode: "artifact_present",
TaskType: "automation",
EvidenceGrade: "artifact-present",
})
if got == "" {
t.Fatalf("expected artifact_present automation task to be rejected")
}
if !strings.Contains(got, "artifact_present") {
t.Fatalf("unexpected validation error: %q", got)
}
}