Cradicle Explorer

/ src / lib / server / tasks / task-validation.ts
task-validation.ts
  1  import type { AppSettings, BoardTask } from '@/types'
  2  import type { TaskReportArtifact } from '@/lib/server/tasks/task-reports'
  3  import { normalizeTaskQualityGate } from '@/lib/server/tasks/task-quality-gate'
  4  
  5  export interface TaskCompletionValidation {
  6    ok: boolean
  7    reasons: string[]
  8    checkedAt: number
  9  }
 10  
 11  interface TaskCompletionValidationOptions {
 12    report?: TaskReportArtifact | null
 13    settings?: AppSettings | Record<string, unknown> | null
 14  }
 15  
 16  const MIN_RESULT_CHARS_IMPLEMENTATION = 40
 17  const MIN_RESULT_CHARS_GENERIC = 20
 18  
 19  const SHORT_ANSWER_REQUEST_PATTERNS: RegExp[] = [
 20    /\bone[\s-]word\b/i,
 21    /\bsingle[\s-]word\b/i,
 22    /\byes\s+or\s+no\b/i,
 23    /\bin\s+(?:a\s+)?single\s+(?:word|line|sentence)\b/i,
 24    /\bin\s+\d+\s+(?:word|words|chars?|characters?)\b/i,
 25    /\breply\s+with\s+(?:the\s+(?:word|number|letter|character|string)\s+)?["'`]?[\w-]{1,40}["'`]?\s+only\b/i,
 26    /\brespond\s+with\s+(?:the\s+(?:word|number|letter|character|string)\s+)?["'`]?[\w-]{1,40}["'`]?\s+only\b/i,
 27    /\b(?:answer|reply|respond)\s+(?:with\s+)?["'`][^"'`]{1,40}["'`]\s+only\b/i,
 28    /\bjust\s+(?:say|reply|return|output|answer)\b/i,
 29  ]
 30  
 31  function promptRequestsShortAnswer(text: string): boolean {
 32    if (!text) return false
 33    return SHORT_ANSWER_REQUEST_PATTERNS.some((rx) => rx.test(text))
 34  }
 35  
 36  const WEAK_RESULT_PATTERNS: RegExp[] = [
 37    /what can i help you with/i,
 38    /waiting for approval/i,
 39    /now let me write/i,
 40    /what'?s the play/i,
 41    /\bthe plan covers\b/i,
 42    /now update the agent/i,
 43    /\bzero typescript errors\b/i,
 44  ]
 45  
 46  const INCOMPLETE_RESULT_PATTERNS: RegExp[] = [
 47    /\b(?:next|then)\s*,?\s*i\s+(?:will|can|am going to)\b/i,
 48    /\b(?:i(?:'| a)?ll|let me)\s+(?:start|begin|proceed|continue)\b/i,
 49    /\b(?:once|when|after)\s+(?:the\s+)?(?:access|approval|permission)\s+(?:is|has been)\s+granted\b/i,
 50    /\bneed (?:more )?(?:details|information|context)\b/i,
 51    /\b(?:i|we)\s+(?:need|require)\s+(?:access|approval|permission)\b/i,
 52    /\brequested\s+(?:access|approval|permission)\b/i,
 53    /\bneed access to (?:the )?(?:shell|terminal|command line)\b/i,
 54    /\battempted to\b[^.]{0,120}\b(?:but|however)\b/i,
 55    /\bcould you provide\b/i,
 56    /\blet me know once\b/i,
 57    /\bthere (?:aren't|are not) any specific details\b/i,
 58  ]
 59  
 60  const IMPLEMENTATION_HINT = /\b(add|build|create|fix|implement|integrat|refactor|update|write)\b/i
 61  const EXECUTION_ACTION_HINT = /\b(changed|updated|added|modified|implemented|refactored|fixed|ran|executed|verified)\b/i
 62  const COMMAND_EVIDENCE_HINT = /\b(npm|pnpm|yarn|bun|node|npx|pytest|vitest|jest|playwright|go test|cargo test|deno test|python|pip|uv|docker|git)\b/i
 63  const FILE_PATH_EVIDENCE_HINT = /\b[\w./-]+\.(ts|tsx|js|jsx|mjs|cjs|json|md|css|scss|html|yml|yaml|sh|py|go|rs|java|kt|swift|rb|php|sql|txt)\b/i
 64  const ARTIFACT_EVIDENCE_HINT = /(?:sandbox:)?\/api\/uploads\/[^\s)\]]+|https?:\/\/[^\s)\]]+\.(?:png|jpe?g|webp|gif|pdf|zip)\b/i
 65  const VERIFICATION_EVIDENCE_HINT = /\b(test|tests|lint|typecheck|build)\b[^.]{0,40}\b(pass(?:ed)?|fail(?:ed)?|ok|success)\b/i
 66  const SCREENSHOT_HINT = /\b(screenshot|screen shot|snapshot|capture)\b/i
 67  const DELIVERY_HINT = /\b(send|deliver|return|share|upload|post|message)\b/i
 68  const SCREENSHOT_ARTIFACT_HINT = /(?:sandbox:)?\/api\/uploads\/[^\s)\]]+|https?:\/\/[^\s)\]]+\.(?:png|jpe?g|webp|gif|pdf)\b/i
 69  const SENT_SCREENSHOT_HINT = /\b(sent|shared|uploaded|returned)\b[^.]*\b(screenshot|snapshot|image)\b/i
 70  
 71  function normalizeText(value: unknown): string {
 72    if (typeof value !== 'string') return ''
 73    return value.replace(/\s+/g, ' ').trim()
 74  }
 75  
 76  export function validateTaskCompletion(
 77    task: Partial<BoardTask>,
 78    options: TaskCompletionValidationOptions = {},
 79  ): TaskCompletionValidation {
 80    const reasons: string[] = []
 81    const title = normalizeText(task.title)
 82    const description = normalizeText(task.description)
 83    const result = normalizeText(task.result)
 84    const error = normalizeText(task.error)
 85    const report = options.report || null
 86    const hasExplicitQualityGate = !!task.qualityGate && typeof task.qualityGate === 'object'
 87    const qualityGate = normalizeTaskQualityGate(task.qualityGate || null, options.settings || null)
 88    const implementationTask = IMPLEMENTATION_HINT.test(title) || IMPLEMENTATION_HINT.test(description)
 89  
 90    if (error) reasons.push('Task has a non-empty error field.')
 91    if (/^untitled task$/i.test(title) && !description) {
 92      reasons.push('Task metadata is too vague (untitled title with empty description).')
 93    }
 94  
 95    const shortAnswerRequested = promptRequestsShortAnswer(`${title} ${description}`)
 96  
 97    if (!result) reasons.push('Result summary is empty.')
 98    else if (!shortAnswerRequested) {
 99      const minChars = implementationTask ? MIN_RESULT_CHARS_IMPLEMENTATION : MIN_RESULT_CHARS_GENERIC
100      if (result.length < minChars) reasons.push(`Result summary is too short (${result.length} chars; min ${minChars}).`)
101      if (WEAK_RESULT_PATTERNS.some((rx) => rx.test(result))) {
102        reasons.push('Result contains placeholder/planning language instead of completion evidence.')
103      }
104      if (INCOMPLETE_RESULT_PATTERNS.some((rx) => rx.test(result))) {
105        reasons.push('Result indicates unfinished work or missing inputs instead of completed execution.')
106      }
107    }
108  
109    // If task description/title suggests implementation work, require concrete evidence in
110    // the result summary OR task report.
111    const hasResultEvidence = (
112      COMMAND_EVIDENCE_HINT.test(result)
113      || ARTIFACT_EVIDENCE_HINT.test(result)
114      || VERIFICATION_EVIDENCE_HINT.test(result)
115      || (EXECUTION_ACTION_HINT.test(result)
116        && (/\b(command|test|lint|typecheck|build|file|artifact)\b/i.test(result) || FILE_PATH_EVIDENCE_HINT.test(result)))
117    )
118    const hasReportEvidence = report?.evidence.hasEvidence === true
119    if (implementationTask && !shortAnswerRequested && !hasResultEvidence && !hasReportEvidence) {
120      if (report?.relativePath) {
121        reasons.push(`Implementation task is missing concrete execution evidence in result or ${report.relativePath}.`)
122      } else {
123        reasons.push('Implementation task is missing concrete execution evidence in result.')
124      }
125    }
126  
127    const screenshotTask = SCREENSHOT_HINT.test(title) || SCREENSHOT_HINT.test(description)
128    const screenshotDeliveryTask = screenshotTask && (DELIVERY_HINT.test(title) || DELIVERY_HINT.test(description))
129    if (screenshotDeliveryTask) {
130      const hasScreenshotArtifact = SCREENSHOT_ARTIFACT_HINT.test(result) || SENT_SCREENSHOT_HINT.test(result)
131      if (!hasScreenshotArtifact) {
132        reasons.push('Screenshot delivery task is missing artifact evidence (upload link or explicit sent screenshot confirmation).')
133      }
134    }
135  
136    if (qualityGate.enabled && (implementationTask || hasExplicitQualityGate)) {
137      if (result && result.length < qualityGate.minResultChars) {
138        reasons.push(`Quality gate: result summary is shorter than required minimum (${result.length} chars; min ${qualityGate.minResultChars}).`)
139      }
140  
141      const hasCommandEvidence = COMMAND_EVIDENCE_HINT.test(result) || (report?.evidence.commandsRun.length || 0) > 0
142      const hasFileEvidence = FILE_PATH_EVIDENCE_HINT.test(result) || (report?.evidence.changedFiles.length || 0) > 0
143      const hasVerificationEvidence = VERIFICATION_EVIDENCE_HINT.test(result) || (report?.evidence.verification.length || 0) > 0
144      const hasArtifactEvidence = ARTIFACT_EVIDENCE_HINT.test(result) || ((task.artifacts?.length || 0) > 0)
145  
146      const evidenceSignals = [
147        hasCommandEvidence,
148        hasFileEvidence,
149        hasVerificationEvidence,
150        hasArtifactEvidence,
151      ].filter(Boolean).length
152  
153      if (evidenceSignals < qualityGate.minEvidenceItems) {
154        reasons.push(`Quality gate: insufficient completion evidence (${evidenceSignals}/${qualityGate.minEvidenceItems} required evidence signals).`)
155      }
156      if (qualityGate.requireVerification && !hasVerificationEvidence) {
157        reasons.push('Quality gate: verification evidence is required (tests/lint/build/check output missing).')
158      }
159      if (qualityGate.requireArtifact && !hasArtifactEvidence) {
160        reasons.push('Quality gate: artifact evidence is required (artifact URL/upload or structured artifacts list missing).')
161      }
162      if (qualityGate.requireReport && !report?.relativePath) {
163        reasons.push('Quality gate: task completion report is required but missing.')
164      }
165    }
166  
167    return {
168      ok: reasons.length === 0,
169      reasons,
170      checkedAt: Date.now(),
171    }
172  }
173  
174  export function formatValidationFailure(reasons: string[]): string {
175    return `Completion validation failed: ${reasons.join(' ')}`
176  }