llm-intelligence/scripts/verification_executor_test.go

//go:build llm_script

package main

import (
	"os"
	"path/filepath"
	"strings"
	"testing"
)

func TestParseTasksParsesEvidenceFields(t *testing.T) {
	md := `# Tasks

### T-1 ✅ Example
- **verification**:
  - mode: ` + "`test_pass`" + `
  - command: ` + "`echo ok`" + `
  - expected_evidence: ` + "`ok`" + `
  - evidence_grade: ` + "`runtime-verified`" + `
  - task_type: ` + "`code`" + `
  - timeout_seconds: 15
`

	tmpFile, err := os.CreateTemp(t.TempDir(), "tasks-*.md")
	if err != nil {
		t.Fatalf("create temp file: %v", err)
	}
	defer tmpFile.Close()

	if _, err := tmpFile.WriteString(md); err != nil {
		t.Fatalf("write temp file: %v", err)
	}
	if _, err := tmpFile.Seek(0, 0); err != nil {
		t.Fatalf("seek temp file: %v", err)
	}

	tasks := parseTasks(tmpFile)
	if len(tasks) != 1 {
		t.Fatalf("expected 1 task, got %d", len(tasks))
	}

	got := tasks[0].Verification
	if got.Mode != "test_pass" {
		t.Fatalf("expected mode test_pass, got %q", got.Mode)
	}
	if got.Command != "echo ok" {
		t.Fatalf("expected command echo ok, got %q", got.Command)
	}
	if got.ExpectedEvidence != "ok" {
		t.Fatalf("expected evidence ok, got %q", got.ExpectedEvidence)
	}
	if got.EvidenceGrade != "runtime-verified" {
		t.Fatalf("expected evidence grade runtime-verified, got %q", got.EvidenceGrade)
	}
	if got.TaskType != "code" {
		t.Fatalf("expected task type code, got %q", got.TaskType)
	}
	if got.TimeoutSeconds != 15 {
		t.Fatalf("expected timeout 15, got %d", got.TimeoutSeconds)
	}
}

func TestVerifyTaskRejectsSemanticOnlyForCodeTask(t *testing.T) {
	task := taskEntry{
		ID:   "T-1",
		Name: "semantic code task",
		Verification: Verification{
			Mode:          "semantic",
			Command:       "echo ok",
			TaskType:      "code",
			EvidenceGrade: "doc-claimed",
		},
		HasVerification: true,
	}

	result := verifyTask(task, true)
	if result.Verified {
		t.Fatalf("expected semantic-only code task to fail")
	}
	if !strings.Contains(result.Reason, "semantic-only") {
		t.Fatalf("expected semantic-only rejection reason, got %q", result.Reason)
	}
}

func TestVerifyTaskDefaultsEvidenceGradeFromMode(t *testing.T) {
	task := taskEntry{
		ID:   "T-2",
		Name: "artifact task",
		Verification: Verification{
			Mode:             "artifact_present",
			Command:          "echo exists",
			ExpectedEvidence: "exists",
		},
		HasVerification: true,
	}

	result := verifyTask(task, true)
	if !result.Verified {
		t.Fatalf("expected dry-run artifact task to pass, got reason %q", result.Reason)
	}
	if result.EvidenceGrade != "artifact-present" {
		t.Fatalf("expected default evidence grade artifact-present, got %q", result.EvidenceGrade)
	}
}

func TestResolveTasksPathDoesNotImplicitlyFallbackToGlobal(t *testing.T) {
	root := t.TempDir()
	projectDir := filepath.Join(root, "project")
	globalDir := filepath.Join(root, "workspace")
	scriptDir := filepath.Join(projectDir, "scripts")
	if err := os.MkdirAll(projectDir, 0o755); err != nil {
		t.Fatalf("mkdir project dir: %v", err)
	}
	if err := os.MkdirAll(globalDir, 0o755); err != nil {
		t.Fatalf("mkdir global dir: %v", err)
	}
	if err := os.MkdirAll(scriptDir, 0o755); err != nil {
		t.Fatalf("mkdir script dir: %v", err)
	}

	projectTasks := filepath.Join(projectDir, "TASKS.md")
	globalTasks := filepath.Join(globalDir, "TASKS.md")
	if err := os.WriteFile(projectTasks, []byte("# project"), 0o644); err != nil {
		t.Fatalf("write project tasks: %v", err)
	}
	if err := os.WriteFile(globalTasks, []byte("# global"), 0o644); err != nil {
		t.Fatalf("write global tasks: %v", err)
	}

	got := resolveTasksPathWithContext("", "", filepath.Join(root, "outside"), scriptDir, globalTasks)
	if got != projectTasks {
		t.Fatalf("expected project tasks path, got %q", got)
	}
}

func TestResolveTasksPathAllowsExplicitGlobalPath(t *testing.T) {
	root := t.TempDir()
	projectDir := filepath.Join(root, "project")
	globalDir := filepath.Join(root, "workspace")
	scriptDir := filepath.Join(projectDir, "scripts")
	if err := os.MkdirAll(projectDir, 0o755); err != nil {
		t.Fatalf("mkdir project dir: %v", err)
	}
	if err := os.MkdirAll(globalDir, 0o755); err != nil {
		t.Fatalf("mkdir global dir: %v", err)
	}
	if err := os.MkdirAll(scriptDir, 0o755); err != nil {
		t.Fatalf("mkdir script dir: %v", err)
	}

	projectTasks := filepath.Join(projectDir, "TASKS.md")
	globalTasks := filepath.Join(globalDir, "TASKS.md")
	if err := os.WriteFile(projectTasks, []byte("# project"), 0o644); err != nil {
		t.Fatalf("write project tasks: %v", err)
	}
	if err := os.WriteFile(globalTasks, []byte("# global"), 0o644); err != nil {
		t.Fatalf("write global tasks: %v", err)
	}

	got := resolveTasksPathWithContext(globalTasks, "", filepath.Join(root, "outside"), scriptDir, globalTasks)
	if got != globalTasks {
		t.Fatalf("expected explicit global tasks path, got %q", got)
	}
}

func TestVerifyTaskCapturesFailureSummaries(t *testing.T) {
	task := taskEntry{
		ID:   "T-3",
		Name: "failing task",
		Verification: Verification{
			Mode:             "test_pass",
			Command:          "echo standard-output && echo standard-error 1>&2 && exit 1",
			ExpectedEvidence: "unused",
			TaskType:         "automation",
		},
		HasVerification: true,
	}

	result := verifyTask(task, false)
	if result.Verified {
		t.Fatalf("expected failing task to fail")
	}
	if !strings.Contains(result.StdoutSummary, "standard-output") {
		t.Fatalf("expected stdout summary to contain command output, got %q", result.StdoutSummary)
	}
	if !strings.Contains(result.StderrSummary, "standard-error") {
		t.Fatalf("expected stderr summary to contain command error, got %q", result.StderrSummary)
	}
}

func TestParseTasksParsesNormalizedStatus(t *testing.T) {
	md := `# Tasks

### T-1 ✅ Done task
- **状态**：✅ 完成（2026-05-11）
- **verification**:
  - mode: ` + "`test_pass`" + `
  - command: ` + "`echo ok`" + `
  - expected_evidence: ` + "`ok`" + `

### T-2 🔶 Planned task
- **状态**：🔶 待启动
- **verification**:
  - mode: ` + "`test_pass`" + `
  - command: ` + "`echo ok`" + `
  - expected_evidence: ` + "`ok`" + `

### T-3 ⏸️ Paused task
- **状态**：⏸️ 待规划
- **verification**:
  - mode: ` + "`test_pass`" + `
  - command: ` + "`echo ok`" + `
  - expected_evidence: ` + "`ok`" + `
`

	tmpFile, err := os.CreateTemp(t.TempDir(), "tasks-status-*.md")
	if err != nil {
		t.Fatalf("create temp file: %v", err)
	}
	defer tmpFile.Close()

	if _, err := tmpFile.WriteString(md); err != nil {
		t.Fatalf("write temp file: %v", err)
	}
	if _, err := tmpFile.Seek(0, 0); err != nil {
		t.Fatalf("seek temp file: %v", err)
	}

	tasks := parseTasks(tmpFile)
	if len(tasks) != 3 {
		t.Fatalf("expected 3 tasks, got %d", len(tasks))
	}

	if tasks[0].Status != "completed" {
		t.Fatalf("expected first task status completed, got %q", tasks[0].Status)
	}
	if tasks[1].Status != "planned" {
		t.Fatalf("expected second task status planned, got %q", tasks[1].Status)
	}
	if tasks[2].Status != "paused" {
		t.Fatalf("expected third task status paused, got %q", tasks[2].Status)
	}
}

func TestFilterTasksByStatus(t *testing.T) {
	tasks := []taskEntry{
		{ID: "T-1", Status: "completed"},
		{ID: "T-2", Status: "planned"},
		{ID: "T-3", Status: "in_progress"},
	}

	completed, err := filterTasksByStatus(tasks, "completed")
	if err != nil {
		t.Fatalf("unexpected error: %v", err)
	}
	if len(completed) != 1 || completed[0].ID != "T-1" {
		t.Fatalf("expected only completed task, got %#v", completed)
	}

	all, err := filterTasksByStatus(tasks, "all")
	if err != nil {
		t.Fatalf("unexpected error for all: %v", err)
	}
	if len(all) != 3 {
		t.Fatalf("expected all 3 tasks, got %d", len(all))
	}
}