592 lines
15 KiB
Go
592 lines
15 KiB
Go
// verification_executor.go
|
||
// Reads TASKS.md, runs each task's verification.command,
|
||
// matches expected_evidence, outputs pass/fail report.
|
||
//
|
||
// Usage: go run scripts/verification_executor.go [--dry-run] [--task T-Q2-1.1]
|
||
//go:build llm_script
|
||
|
||
package main
|
||
|
||
import (
|
||
"bufio"
|
||
"bytes"
|
||
"context"
|
||
"flag"
|
||
"fmt"
|
||
"os"
|
||
"os/exec"
|
||
"path/filepath"
|
||
"regexp"
|
||
"runtime"
|
||
"strings"
|
||
"time"
|
||
)
|
||
|
||
type Verification struct {
|
||
Mode string
|
||
Command string
|
||
ExpectedEvidence string
|
||
TimeoutSeconds int
|
||
EvidenceGrade string
|
||
TaskType string
|
||
}
|
||
|
||
type TaskResult struct {
|
||
TaskID string
|
||
TaskName string
|
||
Verified bool
|
||
Command string
|
||
ExitCode int
|
||
Stdout string
|
||
Stderr string
|
||
StdoutSummary string
|
||
StderrSummary string
|
||
Error string
|
||
Reason string
|
||
FailureClass string
|
||
EvidenceGrade string
|
||
TaskType string
|
||
}
|
||
|
||
|
||
func main() {
|
||
dryRun := flag.Bool("dry-run", false, "print commands without executing")
|
||
taskFilter := flag.String("task", "", "filter by task ID (e.g. T-Q2-1.1)")
|
||
tasksPathFlag := flag.String("tasks", "", "path to TASKS.md")
|
||
statusFilter := flag.String("status", "all", "filter by normalized status: all|completed|in_progress|planned|paused|unknown")
|
||
completedOnly := flag.Bool("completed-only", false, "shortcut for --status completed")
|
||
flag.Parse()
|
||
|
||
tasksPath := resolveTasksPath(*tasksPathFlag)
|
||
|
||
f, err := os.Open(tasksPath)
|
||
if err != nil {
|
||
fmt.Fprintf(os.Stderr, "open TASKS.md: %v\n", err)
|
||
os.Exit(1)
|
||
}
|
||
defer f.Close()
|
||
|
||
tasks := parseTasks(f)
|
||
if *taskFilter != "" {
|
||
var filtered []taskEntry
|
||
for _, t := range tasks {
|
||
if t.ID == *taskFilter {
|
||
filtered = append(filtered, t)
|
||
}
|
||
}
|
||
tasks = filtered
|
||
}
|
||
|
||
effectiveStatus := *statusFilter
|
||
if *completedOnly {
|
||
effectiveStatus = "completed"
|
||
}
|
||
tasks, err = filterTasksByStatus(tasks, effectiveStatus)
|
||
if err != nil {
|
||
fmt.Fprintf(os.Stderr, "filter tasks: %v\n", err)
|
||
os.Exit(1)
|
||
}
|
||
|
||
fmt.Printf("=== Verification Report (%s) ===\n", time.Now().Format("2006-01-02 15:04"))
|
||
fmt.Printf("Tasks checked: %d | Dry-run: %v | Status: %s | TASKS: %s\n\n", len(tasks), *dryRun, effectiveStatus, tasksPath)
|
||
|
||
var passed, failed int
|
||
var results []TaskResult
|
||
|
||
for _, t := range tasks {
|
||
r := verifyTask(t, *dryRun)
|
||
results = append(results, r)
|
||
if r.Verified {
|
||
passed++
|
||
} else {
|
||
failed++
|
||
}
|
||
}
|
||
|
||
for _, r := range results {
|
||
icon := "✅"
|
||
if !r.Verified {
|
||
icon = "❌"
|
||
}
|
||
fmt.Printf("%s [%s] %s\n", icon, r.TaskID, r.TaskName)
|
||
if r.Command != "" {
|
||
fmt.Printf(" cmd: %s\n", r.Command)
|
||
}
|
||
if r.EvidenceGrade != "" || r.TaskType != "" {
|
||
fmt.Printf(" grade: %s | type: %s\n", r.EvidenceGrade, r.TaskType)
|
||
}
|
||
if r.StderrSummary != "" {
|
||
fmt.Printf(" stderr: %s\n", r.StderrSummary)
|
||
}
|
||
if r.StdoutSummary != "" && (!r.Verified || r.Reason != "" || r.Error != "") {
|
||
fmt.Printf(" stdout: %s\n", r.StdoutSummary)
|
||
}
|
||
if r.FailureClass != "" {
|
||
fmt.Printf(" class: %s\n", r.FailureClass)
|
||
}
|
||
if r.Error != "" {
|
||
fmt.Printf(" ERROR: %s\n", r.Error)
|
||
} else if r.ExitCode != 0 && r.Stdout != "" {
|
||
fmt.Printf(" output: %s\n", strings.TrimSpace(r.Stdout))
|
||
} else if r.Reason != "" {
|
||
fmt.Printf(" reason: %s\n", r.Reason)
|
||
}
|
||
|
||
}
|
||
|
||
fmt.Printf("\n=== Summary: %d passed, %d failed ===\n", passed, failed)
|
||
os.Exit(determineProcessExitCode(results))
|
||
}
|
||
|
||
|
||
func resolveTasksPath(flagValue string) string {
|
||
envValue := os.Getenv("TASKS_PATH")
|
||
wd := ""
|
||
if currentWD, err := os.Getwd(); err == nil {
|
||
wd = currentWD
|
||
}
|
||
sourceDir := ""
|
||
if _, sourcePath, _, ok := runtime.Caller(0); ok {
|
||
sourceDir = filepath.Dir(sourcePath)
|
||
}
|
||
return resolveTasksPathWithContext(flagValue, envValue, wd, sourceDir, "/home/long/.openclaw/workspace/TASKS.md")
|
||
}
|
||
|
||
func resolveTasksPathWithContext(flagValue, envValue, wd, sourceDir, globalTasksPath string) string {
|
||
candidates := []string{}
|
||
if flagValue != "" {
|
||
candidates = append(candidates, flagValue)
|
||
}
|
||
if envValue != "" {
|
||
candidates = append(candidates, envValue)
|
||
}
|
||
|
||
if wd != "" {
|
||
candidates = append(candidates,
|
||
filepath.Join(wd, "TASKS.md"),
|
||
filepath.Join(wd, "..", "TASKS.md"),
|
||
)
|
||
}
|
||
|
||
defaultProjectTasks := ""
|
||
if sourceDir != "" {
|
||
defaultProjectTasks = filepath.Join(sourceDir, "..", "TASKS.md")
|
||
candidates = append(candidates, defaultProjectTasks)
|
||
}
|
||
|
||
seen := map[string]struct{}{}
|
||
for _, candidate := range candidates {
|
||
if candidate == "" {
|
||
continue
|
||
}
|
||
cleaned := filepath.Clean(candidate)
|
||
if _, ok := seen[cleaned]; ok {
|
||
continue
|
||
}
|
||
seen[cleaned] = struct{}{}
|
||
if _, err := os.Stat(cleaned); err == nil {
|
||
return cleaned
|
||
}
|
||
}
|
||
|
||
if flagValue != "" {
|
||
return filepath.Clean(flagValue)
|
||
}
|
||
if envValue != "" {
|
||
return filepath.Clean(envValue)
|
||
}
|
||
if defaultProjectTasks != "" {
|
||
return filepath.Clean(defaultProjectTasks)
|
||
}
|
||
if wd != "" {
|
||
return filepath.Clean(filepath.Join(wd, "TASKS.md"))
|
||
}
|
||
if globalTasksPath != "" {
|
||
return filepath.Clean(globalTasksPath)
|
||
}
|
||
return "TASKS.md"
|
||
}
|
||
|
||
type taskEntry struct {
|
||
ID string
|
||
Name string
|
||
Status string
|
||
Verification Verification
|
||
HasVerification bool
|
||
}
|
||
|
||
func parseTasks(f *os.File) []taskEntry {
|
||
var tasks []taskEntry
|
||
var currentTask *taskEntry
|
||
inVerification := false
|
||
scanner := bufio.NewScanner(f)
|
||
|
||
for scanner.Scan() {
|
||
line := scanner.Text()
|
||
|
||
// Match task header: ### T-1.1 🔶 Phase 1 范围冻结
|
||
taskRe := regexp.MustCompile(`^### (T-[A-Za-z0-9.-]+)\s+[^\s]+\s+(.+)`)
|
||
if m := taskRe.FindStringSubmatch(line); m != nil {
|
||
if currentTask != nil {
|
||
tasks = append(tasks, *currentTask)
|
||
}
|
||
currentTask = &taskEntry{ID: m[1], Name: m[2], Status: normalizeStatusFromText(line)}
|
||
inVerification = false
|
||
continue
|
||
}
|
||
|
||
if currentTask == nil {
|
||
continue
|
||
}
|
||
|
||
// Check for verification block
|
||
if strings.Contains(line, "**verification**") || strings.Contains(line, "**verification**:") {
|
||
inVerification = true
|
||
currentTask.HasVerification = true
|
||
continue
|
||
}
|
||
|
||
if !inVerification {
|
||
statusRe := regexp.MustCompile(`^\s*-\s+\*\*状态\*\*:(.+)$`)
|
||
if m := statusRe.FindStringSubmatch(line); m != nil {
|
||
currentTask.Status = normalizeStatusFromText(m[1])
|
||
}
|
||
continue
|
||
}
|
||
|
||
// Parse verification fields (indented under **verification**)
|
||
// - mode: `artifact_present`
|
||
modeRe := regexp.MustCompile(`^\s+- mode:\s+` + "`" + `([^` + "`" + `]+)` + "`")
|
||
if m := modeRe.FindStringSubmatch(line); m != nil {
|
||
currentTask.Verification.Mode = m[1]
|
||
continue
|
||
}
|
||
|
||
cmdRe := regexp.MustCompile(`^\s+- command:\s+` + "`" + `([^` + "`" + `]+)` + "`")
|
||
if m := cmdRe.FindStringSubmatch(line); m != nil {
|
||
currentTask.Verification.Command = m[1]
|
||
continue
|
||
}
|
||
|
||
expRe := regexp.MustCompile(`^\s+- expected_evidence:\s+` + "`" + `([^` + "`" + `]+)` + "`")
|
||
if m := expRe.FindStringSubmatch(line); m != nil {
|
||
currentTask.Verification.ExpectedEvidence = m[1]
|
||
continue
|
||
}
|
||
|
||
evidenceGradeRe := regexp.MustCompile(`^\s+- evidence_grade:\s+` + "`" + `([^` + "`" + `]+)` + "`")
|
||
if m := evidenceGradeRe.FindStringSubmatch(line); m != nil {
|
||
currentTask.Verification.EvidenceGrade = m[1]
|
||
continue
|
||
}
|
||
|
||
taskTypeRe := regexp.MustCompile(`^\s+- task_type:\s+` + "`" + `([^` + "`" + `]+)` + "`")
|
||
if m := taskTypeRe.FindStringSubmatch(line); m != nil {
|
||
currentTask.Verification.TaskType = m[1]
|
||
continue
|
||
}
|
||
|
||
timeoutRe := regexp.MustCompile(`^\s+- timeout_seconds:\s+(\d+)`)
|
||
if m := timeoutRe.FindStringSubmatch(line); m != nil {
|
||
fmt.Sscanf(m[1], "%d", ¤tTask.Verification.TimeoutSeconds)
|
||
continue
|
||
}
|
||
|
||
// Blank line or new top-level field ends verification block
|
||
if strings.TrimSpace(line) == "" || (strings.HasPrefix(strings.TrimSpace(line), "**") && !strings.Contains(line, "verification")) {
|
||
inVerification = false
|
||
}
|
||
}
|
||
|
||
if currentTask != nil {
|
||
tasks = append(tasks, *currentTask)
|
||
}
|
||
|
||
return tasks
|
||
}
|
||
|
||
func verifyTask(t taskEntry, dryRun bool) TaskResult {
|
||
r := TaskResult{TaskID: t.ID, TaskName: t.Name}
|
||
|
||
if !t.HasVerification {
|
||
r.Reason = "no verification block"
|
||
r.FailureClass = "missing_verification"
|
||
r.Verified = true // No verification = trivially pass
|
||
return r
|
||
}
|
||
|
||
t.Verification.Mode = strings.TrimSpace(t.Verification.Mode)
|
||
t.Verification.TaskType = normalizeTaskType(t.Verification.TaskType)
|
||
t.Verification.EvidenceGrade = normalizeEvidenceGrade(t.Verification.Mode, t.Verification.EvidenceGrade)
|
||
r.TaskType = t.Verification.TaskType
|
||
r.EvidenceGrade = t.Verification.EvidenceGrade
|
||
|
||
if validationErr := validateVerification(t.Verification); validationErr != "" {
|
||
r.Verified = false
|
||
r.Reason = validationErr
|
||
r.FailureClass = "verification_config_failure"
|
||
return r
|
||
}
|
||
|
||
if t.Verification.Command == "" {
|
||
if t.Verification.Mode == "artifact_present" {
|
||
r.Verified = true
|
||
return r
|
||
}
|
||
r.Reason = "verification.command is empty"
|
||
r.FailureClass = "verification_config_failure"
|
||
r.Verified = false
|
||
return r
|
||
}
|
||
|
||
r.Command = t.Verification.Command
|
||
|
||
if t.Verification.TimeoutSeconds == 0 {
|
||
t.Verification.TimeoutSeconds = 30
|
||
}
|
||
|
||
if dryRun {
|
||
r.Stdout = "(dry-run, command not executed)"
|
||
r.Verified = true
|
||
return r
|
||
}
|
||
|
||
ctx, cancel := context.WithTimeout(context.Background(), time.Duration(t.Verification.TimeoutSeconds)*time.Second)
|
||
defer cancel()
|
||
|
||
cmd := exec.CommandContext(ctx, "sh", "-c", t.Verification.Command)
|
||
var stdout, stderr bytes.Buffer
|
||
cmd.Stdout = &stdout
|
||
cmd.Stderr = &stderr
|
||
|
||
err := cmd.Run()
|
||
r.ExitCode = 0
|
||
if err != nil {
|
||
r.ExitCode = -1
|
||
r.FailureClass = "tool_execution_failure"
|
||
if ctx.Err() == context.DeadlineExceeded {
|
||
r.Error = fmt.Sprintf("timeout after %ds", t.Verification.TimeoutSeconds)
|
||
} else {
|
||
r.Error = err.Error()
|
||
}
|
||
}
|
||
|
||
r.Stdout = stdout.String()
|
||
r.Stderr = stderr.String()
|
||
r.StdoutSummary = summarizeOutput(r.Stdout)
|
||
r.StderrSummary = summarizeOutput(r.Stderr)
|
||
|
||
if r.ExitCode != 0 && t.Verification.Mode == "test_pass" {
|
||
r.Verified = false
|
||
return r
|
||
}
|
||
|
||
// Match expected_evidence
|
||
if t.Verification.ExpectedEvidence != "" {
|
||
evidence := t.Verification.ExpectedEvidence
|
||
matched := false
|
||
|
||
if strings.HasPrefix(evidence, "[") && strings.HasSuffix(evidence, "]") {
|
||
// Regex range like [4-9]
|
||
re := regexp.MustCompile(`\[(\d+)-(\d+)\]`)
|
||
if m := re.FindStringSubmatch(evidence); m != nil {
|
||
var lo, hi int
|
||
fmt.Sscanf(m[1], "%d", &lo)
|
||
fmt.Sscanf(m[2], "%d", &hi)
|
||
reOut := regexp.MustCompile(fmt.Sprintf(`^\s*(\d+)\s*$`))
|
||
if numMatch := reOut.FindStringSubmatch(strings.TrimSpace(r.Stdout)); numMatch != nil {
|
||
var n int
|
||
fmt.Sscanf(numMatch[1], "%d", &n)
|
||
matched = n >= lo && n <= hi
|
||
}
|
||
}
|
||
} else if strings.Contains(r.Stdout, evidence) {
|
||
matched = true
|
||
}
|
||
|
||
r.Verified = matched
|
||
if !matched {
|
||
r.Reason = fmt.Sprintf("expected_evidence '%s' not found in output", evidence)
|
||
r.FailureClass = "business_assertion_failure"
|
||
}
|
||
} else if r.ExitCode == 0 {
|
||
r.Verified = true
|
||
} else {
|
||
r.Verified = false
|
||
r.Reason = fmt.Sprintf("exit code %d", r.ExitCode)
|
||
r.FailureClass = "tool_execution_failure"
|
||
}
|
||
|
||
return r
|
||
}
|
||
|
||
func classifyFailureTier(r TaskResult) int {
|
||
if r.Verified {
|
||
return 0
|
||
}
|
||
if r.EvidenceGrade == "runtime-verified" {
|
||
return 2
|
||
}
|
||
return 3
|
||
}
|
||
|
||
func determineProcessExitCode(results []TaskResult) int {
|
||
hasRuntimeFailure := false
|
||
hasLowerTierFailure := false
|
||
for _, r := range results {
|
||
tier := classifyFailureTier(r)
|
||
switch tier {
|
||
case 2:
|
||
hasRuntimeFailure = true
|
||
case 3:
|
||
hasLowerTierFailure = true
|
||
}
|
||
}
|
||
if hasRuntimeFailure {
|
||
return 2
|
||
}
|
||
if hasLowerTierFailure {
|
||
return 3
|
||
}
|
||
return 0
|
||
}
|
||
|
||
func normalizeEvidenceGrade(mode, explicit string) string {
|
||
if explicit = strings.TrimSpace(explicit); explicit != "" {
|
||
return explicit
|
||
}
|
||
|
||
switch strings.TrimSpace(mode) {
|
||
case "test_pass":
|
||
return "runtime-verified"
|
||
case "artifact_present":
|
||
return "artifact-present"
|
||
case "semantic":
|
||
return "doc-claimed"
|
||
default:
|
||
return ""
|
||
}
|
||
}
|
||
|
||
func normalizeTaskType(raw string) string {
|
||
raw = strings.TrimSpace(raw)
|
||
if raw == "" {
|
||
return "unspecified"
|
||
}
|
||
return raw
|
||
}
|
||
|
||
func normalizeStatusFromText(raw string) string {
|
||
lower := strings.ToLower(strings.TrimSpace(raw))
|
||
switch {
|
||
case strings.Contains(raw, "✅") || strings.Contains(raw, "完成"):
|
||
return "completed"
|
||
case strings.Contains(raw, "🟡") || strings.Contains(raw, "进行中"):
|
||
return "in_progress"
|
||
case strings.Contains(raw, "🔶") || strings.Contains(raw, "🔴") || strings.Contains(raw, "待启动") || strings.Contains(raw, "未开始"):
|
||
return "planned"
|
||
case strings.Contains(raw, "⏸️") || strings.Contains(raw, "待规划") || strings.Contains(raw, "暂停"):
|
||
return "paused"
|
||
case lower == "":
|
||
return "unknown"
|
||
default:
|
||
return "unknown"
|
||
}
|
||
}
|
||
|
||
func filterTasksByStatus(tasks []taskEntry, filter string) ([]taskEntry, error) {
|
||
filter = strings.TrimSpace(filter)
|
||
if filter == "" {
|
||
filter = "all"
|
||
}
|
||
|
||
valid := map[string]struct{}{
|
||
"all": {},
|
||
"completed": {},
|
||
"in_progress": {},
|
||
"planned": {},
|
||
"paused": {},
|
||
"unknown": {},
|
||
}
|
||
if _, ok := valid[filter]; !ok {
|
||
return nil, fmt.Errorf("unsupported status filter: %s", filter)
|
||
}
|
||
if filter == "all" {
|
||
return tasks, nil
|
||
}
|
||
|
||
filtered := make([]taskEntry, 0, len(tasks))
|
||
for _, t := range tasks {
|
||
status := t.Status
|
||
if status == "" {
|
||
status = "unknown"
|
||
}
|
||
if status == filter {
|
||
filtered = append(filtered, t)
|
||
}
|
||
}
|
||
return filtered, nil
|
||
}
|
||
|
||
func summarizeOutput(raw string) string {
|
||
cleaned := strings.TrimSpace(raw)
|
||
if cleaned == "" {
|
||
return ""
|
||
}
|
||
cleaned = strings.Join(strings.Fields(cleaned), " ")
|
||
const limit = 220
|
||
if len(cleaned) <= limit {
|
||
return cleaned
|
||
}
|
||
return cleaned[:limit] + "..."
|
||
}
|
||
|
||
func validateVerification(v Verification) string {
|
||
validModes := map[string]struct{}{
|
||
"test_pass": {},
|
||
"artifact_present": {},
|
||
"semantic": {},
|
||
}
|
||
if _, ok := validModes[v.Mode]; !ok {
|
||
return fmt.Sprintf("unsupported verification mode: %s", v.Mode)
|
||
}
|
||
|
||
validGrades := map[string]struct{}{
|
||
"runtime-verified": {},
|
||
"artifact-present": {},
|
||
"doc-claimed": {},
|
||
}
|
||
if v.EvidenceGrade != "" {
|
||
if _, ok := validGrades[v.EvidenceGrade]; !ok {
|
||
return fmt.Sprintf("unsupported evidence grade: %s", v.EvidenceGrade)
|
||
}
|
||
}
|
||
|
||
validTaskTypes := map[string]struct{}{
|
||
"unspecified": {},
|
||
"code": {},
|
||
"automation": {},
|
||
"documentation": {},
|
||
"configuration": {},
|
||
"data": {},
|
||
"analysis": {},
|
||
}
|
||
if _, ok := validTaskTypes[v.TaskType]; !ok {
|
||
return fmt.Sprintf("unsupported task type: %s", v.TaskType)
|
||
}
|
||
|
||
if (v.TaskType == "code" || v.TaskType == "automation") && v.Mode == "semantic" {
|
||
return fmt.Sprintf("semantic-only verification is not allowed for %s tasks", v.TaskType)
|
||
}
|
||
if v.Mode == "artifact_present" {
|
||
if strings.TrimSpace(v.Command) != "" || strings.TrimSpace(v.ExpectedEvidence) != "" {
|
||
return "artifact_present does not allow command or expected_evidence; use test_pass for executable verification"
|
||
}
|
||
if v.TaskType == "code" || v.TaskType == "automation" || v.TaskType == "data" || v.TaskType == "analysis" {
|
||
return fmt.Sprintf("artifact_present is not allowed for %s tasks", v.TaskType)
|
||
}
|
||
}
|
||
|
||
return ""
|
||
}
|