/ src / lib / server / autonomy / supervisor-reflection.ts
supervisor-reflection.ts
   1  import crypto from 'node:crypto'
   2  import { HumanMessage } from '@langchain/core/messages'
   3  
   4  import { genId } from '@/lib/id'
   5  import { buildLLM } from '@/lib/server/build-llm'
   6  import { getMemoryDb } from '@/lib/server/memory/memory-db'
   7  import {
   8    loadRunReflections,
   9    loadSessions,
  10    loadSettings,
  11    loadSupervisorIncidents,
  12    saveRunReflections,
  13    saveSupervisorIncidents,
  14  } from '@/lib/server/storage'
  15  import type {
  16    AppSettings,
  17    GoalContract,
  18    MessageToolEvent,
  19    RunReflection,
  20    Session,
  21    SessionRunStatus,
  22    SupervisorIncident,
  23    SupervisorIncidentSeverity,
  24  } from '@/types'
  25  import {
  26    normalizeSupervisorSettings,
  27    runtimeScopeIncludes,
  28    type AutonomyRuntimeScope,
  29    type NormalizedSupervisorSettings,
  30  } from '@/lib/autonomy/supervisor-settings'
  31  import { notifyOrchestrators } from '@/lib/server/runtime/orchestrator-events'
  32  import { log } from '@/lib/server/logger'
  33  import { logExecution } from '@/lib/server/execution-log'
  34  import { logActivity } from '@/lib/server/storage'
  35  import { createNotification } from '@/lib/server/create-notification'
  36  import { foldReflectionIntoRunContext } from '@/lib/server/run-context'
  37  import { getSession, saveSession } from '@/lib/server/sessions/session-repository'
  38  import { cleanText } from '@/lib/server/text-normalization'
  39  import { getMessages, getMessageCount, getRecentMessages } from '@/lib/server/messages/message-repository'
  40  
  41  const TAG = 'supervisor-reflection'
  42  
  43  const MAIN_LOOP_META_LINE_RE = /\[(?:MAIN_LOOP_META|MAIN_LOOP_PLAN|MAIN_LOOP_REVIEW|AGENT_HEARTBEAT_META)\]\s*(\{[^\n]*\})?/i
  44  const DEFAULT_TRANSCRIPT_MESSAGES = 12
  45  const DEFAULT_SNIPPET_CHARS = 800
  46  
  47  export interface SupervisorStateSnapshot {
  48    followupChainCount?: number | null
  49    goalContract?: GoalContract | null
  50    missionCostUsd?: number | null
  51    status?: string | null
  52    nextAction?: string | null
  53    summary?: string | null
  54    metaMissCount?: number | null
  55  }
  56  
  57  export interface AutonomyAssessment {
  58    incidents: Array<Omit<SupervisorIncident, 'id' | 'createdAt'>>
  59    interventionPrompt: string | null
  60    shouldBlock: boolean
  61    autoActions: Array<{
  62      action: 'block' | 'replan' | 'compact' | 'budget_trim'
  63      prompt?: string
  64      budgetWarning?: string
  65    }>
  66  }
  67  
  68  export interface ObserveAutonomyRunInput {
  69    runId: string
  70    sessionId: string
  71    taskId?: string | null
  72    agentId?: string | null
  73    source: string
  74    status: SessionRunStatus
  75    resultText?: string | null
  76    error?: string | null
  77    toolEvents?: MessageToolEvent[]
  78    mainLoopState?: SupervisorStateSnapshot | null
  79    sourceMessage?: string | null
  80  }
  81  
  82  function now(): number {
  83    return Date.now()
  84  }
  85  
  86  function looksLikeHtmlErrorPayload(value: string): boolean {
  87    const normalized = value.toLowerCase()
  88    let matches = 0
  89    for (const marker of ['<!doctype html', '<html', '<head', '<body', '<script', '/_next/static/', '__next_error__']) {
  90      if (!normalized.includes(marker)) continue
  91      matches += 1
  92      if (matches >= 2) return true
  93    }
  94    return false
  95  }
  96  
  97  function stripMarkup(value: string): string {
  98    return value.replace(/<[^>]+>/g, ' ')
  99  }
 100  
 101  function extractHtmlErrorSignal(value: string): string | null {
 102    const compact = stripMarkup(value).replace(/\s+/g, ' ').trim()
 103    if (!compact) return null
 104    for (const token of ['ReferenceError', 'TypeError', 'SyntaxError', 'RangeError', 'EvalError', 'URIError', 'Error:']) {
 105      const start = compact.indexOf(token)
 106      if (start < 0) continue
 107      return trimText(compact.slice(start, start + 180), 180)
 108    }
 109    const messageToken = '"message":"'
 110    const messageStart = compact.indexOf(messageToken)
 111    if (messageStart >= 0) {
 112      const body = compact.slice(messageStart + messageToken.length)
 113      const messageEnd = body.indexOf('"')
 114      if (messageEnd > 0) return trimText(body.slice(0, messageEnd), 180)
 115    }
 116    return null
 117  }
 118  
 119  function sanitizeIncidentText(value: unknown, max = 320): string | null {
 120    const compact = cleanText(value, Math.max(max * 2, 600))
 121    if (!compact) return null
 122    if (!looksLikeHtmlErrorPayload(compact)) return compact.slice(0, max)
 123    const signal = extractHtmlErrorSignal(compact)
 124    const summary = signal
 125      ? `Runtime returned an HTML error payload (${signal}).`
 126      : 'Runtime returned an HTML error payload (possible Next.js or server error page).'
 127    return summary.slice(0, max)
 128  }
 129  
 130  function stripMainLoopMeta(text: string): string {
 131    return (text || '')
 132      .split('\n')
 133      .filter((line) => !MAIN_LOOP_META_LINE_RE.test(line))
 134      .join('\n')
 135      .trim()
 136  }
 137  
 138  function trimText(value: string, max = 400): string {
 139    const compact = value.replace(/\s+/g, ' ').trim()
 140    if (compact.length <= max) return compact
 141    return `${compact.slice(0, Math.max(0, max - 1)).trimEnd()}…`
 142  }
 143  
 144  function classifySurface(source: string): 'chat' | 'task' | null {
 145    const normalized = (source || '').trim().toLowerCase()
 146    if (!normalized || normalized.includes('heartbeat')) return null
 147    if (normalized === 'task' || normalized.startsWith('task-') || normalized === 'schedule' || normalized === 'delegation') {
 148      return 'task'
 149    }
 150    return 'chat'
 151  }
 152  
 153  function severityRank(severity: SupervisorIncidentSeverity): number {
 154    return severity === 'high' ? 3 : severity === 'medium' ? 2 : 1
 155  }
 156  
 157  function summarizeToolNames(toolEvents: MessageToolEvent[]): { repeatedTool: string | null; counts: Map<string, number> } {
 158    const counts = new Map<string, number>()
 159    for (const event of toolEvents) {
 160      const name = cleanText(event?.name, 80)
 161      if (!name) continue
 162      counts.set(name, (counts.get(name) || 0) + 1)
 163    }
 164    let repeatedTool: string | null = null
 165    let repeatedCount = 0
 166    for (const [name, count] of counts.entries()) {
 167      if (count > repeatedCount) {
 168        repeatedTool = name
 169        repeatedCount = count
 170      }
 171    }
 172    return { repeatedTool, counts }
 173  }
 174  
 175  function buildIncident(
 176    input: Omit<SupervisorIncident, 'id' | 'createdAt'>,
 177  ): Omit<SupervisorIncident, 'id' | 'createdAt'> {
 178    return {
 179      ...input,
 180      summary: sanitizeIncidentText(input.summary, 320) || 'Autonomy supervisor incident',
 181      details: sanitizeIncidentText(input.details, 500),
 182      toolName: cleanText(input.toolName, 120),
 183    }
 184  }
 185  
 186  function sessionContextPressure(session: Session | null): boolean {
 187    if (!session) return false
 188    const msgCount = getMessageCount(session.id)
 189    if (msgCount >= 60) return true
 190    const messages = getMessages(session.id)
 191    const totalChars = messages.reduce((sum, message) => sum + String(message?.text || '').length, 0)
 192    return totalChars >= 18_000
 193  }
 194  
 195  export function assessAutonomyRun(input: {
 196    runId: string
 197    sessionId: string
 198    taskId?: string | null
 199    agentId?: string | null
 200    source: string
 201    status?: SessionRunStatus | null
 202    resultText?: string | null
 203    error?: string | null
 204    toolEvents?: MessageToolEvent[]
 205    mainLoopState?: SupervisorStateSnapshot | null
 206    session?: Session | null
 207    settings?: Partial<AppSettings> | NormalizedSupervisorSettings | null
 208  }): AutonomyAssessment {
 209    const settings = normalizeSupervisorSettings(input.settings || loadSettings())
 210    const surface = classifySurface(input.source)
 211    if (!surface || !settings.supervisorEnabled || !runtimeScopeIncludes(settings.supervisorRuntimeScope, surface)) {
 212      return { incidents: [], interventionPrompt: null, shouldBlock: false, autoActions: [] }
 213    }
 214  
 215    const session = input.session || null
 216    const toolEvents = Array.isArray(input.toolEvents) ? input.toolEvents : []
 217    const stripped = stripMainLoopMeta(String(input.resultText || ''))
 218    const incidents: Array<Omit<SupervisorIncident, 'id' | 'createdAt'>> = []
 219    const { repeatedTool, counts } = summarizeToolNames(toolEvents)
 220    const repeatedCount = repeatedTool ? (counts.get(repeatedTool) || 0) : 0
 221    const state = input.mainLoopState || null
 222    const cost = typeof state?.missionCostUsd === 'number' ? state.missionCostUsd : 0
 223    const budgetUsd = typeof state?.goalContract?.budgetUsd === 'number' ? state.goalContract.budgetUsd : null
 224    const normalizedResult = stripped.toLowerCase()
 225    const repeatedSummary = Boolean(state?.summary && stripped && trimText(stripped, 220) === trimText(String(state.summary || ''), 220))
 226    const shortOrEmpty = !stripped || stripped.length < 80
 227    const status = input.status || 'completed'
 228  
 229    if (input.error) {
 230      incidents.push(buildIncident({
 231        runId: input.runId,
 232        sessionId: input.sessionId,
 233        taskId: input.taskId || null,
 234        agentId: input.agentId || null,
 235        source: input.source,
 236        kind: 'run_error',
 237        severity: 'high',
 238        summary: `Run failed: ${input.error}`,
 239        details: stripped || null,
 240        autoAction: 'block',
 241      }))
 242    }
 243  
 244    if (repeatedTool && repeatedCount >= settings.supervisorRepeatedToolLimit) {
 245      incidents.push(buildIncident({
 246        runId: input.runId,
 247        sessionId: input.sessionId,
 248        taskId: input.taskId || null,
 249        agentId: input.agentId || null,
 250        source: input.source,
 251        kind: 'repeated_tool',
 252        severity: repeatedCount >= settings.supervisorRepeatedToolLimit + 1 ? 'high' : 'medium',
 253        summary: `Repeated tool use detected: ${repeatedTool} ran ${repeatedCount} times in one run.`,
 254        details: stripped || null,
 255        toolName: repeatedTool,
 256        autoAction: 'replan',
 257      }))
 258    }
 259  
 260    if (
 261      !input.error
 262      && state
 263      && (state.followupChainCount || 0) >= settings.supervisorNoProgressLimit
 264      && (shortOrEmpty || repeatedSummary || /still working|continuing|trying again|retry/i.test(normalizedResult))
 265    ) {
 266      incidents.push(buildIncident({
 267        runId: input.runId,
 268        sessionId: input.sessionId,
 269        taskId: input.taskId || null,
 270        agentId: input.agentId || null,
 271        source: input.source,
 272        kind: 'no_progress',
 273        severity: 'medium',
 274        summary: 'No progress detected across recent autonomous turns.',
 275        details: stripped || null,
 276        autoAction: 'replan',
 277      }))
 278    }
 279  
 280    // Output diversity check — catches "different inputs, same output" stall pattern
 281    if (
 282      !input.error
 283      && toolEvents.length >= 6
 284      && !incidents.some((i) => i.kind === 'no_progress')
 285    ) {
 286      const outputHashes = new Set<string>()
 287      for (const event of toolEvents) {
 288        const output = String(event?.output || '').trim()
 289        if (output) {
 290          outputHashes.add(crypto.createHash('sha1').update(output.slice(0, 500)).digest('hex').slice(0, 12))
 291        }
 292      }
 293      if (outputHashes.size <= 2) {
 294        incidents.push(buildIncident({
 295          runId: input.runId,
 296          sessionId: input.sessionId,
 297          taskId: input.taskId || null,
 298          agentId: input.agentId || null,
 299          source: input.source,
 300          kind: 'no_progress',
 301          severity: 'medium',
 302          summary: `Output stagnation: ${toolEvents.length} tool calls produced only ${outputHashes.size} distinct output(s). The agent appears stuck.`,
 303          details: stripped || null,
 304          autoAction: 'replan',
 305        }))
 306      }
 307    }
 308  
 309    if (budgetUsd && Number.isFinite(budgetUsd) && budgetUsd > 0) {
 310      if (cost >= budgetUsd) {
 311        incidents.push(buildIncident({
 312          runId: input.runId,
 313          sessionId: input.sessionId,
 314          taskId: input.taskId || null,
 315          agentId: input.agentId || null,
 316          source: input.source,
 317          kind: 'budget_pressure',
 318          severity: 'high',
 319          summary: `Goal budget reached or exceeded ($${cost.toFixed(2)} / $${budgetUsd.toFixed(2)}).`,
 320          details: stripped || null,
 321          autoAction: 'budget_trim',
 322        }))
 323      } else if (cost >= budgetUsd * 0.9) {
 324        incidents.push(buildIncident({
 325          runId: input.runId,
 326          sessionId: input.sessionId,
 327          taskId: input.taskId || null,
 328          agentId: input.agentId || null,
 329          source: input.source,
 330          kind: 'budget_pressure',
 331          severity: 'medium',
 332          summary: `Goal budget nearly exhausted ($${cost.toFixed(2)} / $${budgetUsd.toFixed(2)}).`,
 333          details: stripped || null,
 334          autoAction: 'budget_trim',
 335        }))
 336      }
 337    }
 338  
 339    if (!input.error && sessionContextPressure(session)) {
 340      incidents.push(buildIncident({
 341        runId: input.runId,
 342        sessionId: input.sessionId,
 343        taskId: input.taskId || null,
 344        agentId: input.agentId || null,
 345        source: input.source,
 346        kind: 'context_pressure',
 347        severity: 'medium',
 348        summary: 'Context pressure detected; the session transcript is getting large.',
 349        details: null,
 350        autoAction: 'compact',
 351      }))
 352    }
 353  
 354    const strongest = [...incidents].sort((left, right) => severityRank(right.severity) - severityRank(left.severity))[0] || null
 355    let interventionPrompt: string | null = null
 356    let shouldBlock = false
 357    if (strongest?.kind === 'repeated_tool') {
 358      interventionPrompt = `Supervisor intervention: stop repeating ${strongest.toolName || 'the same tool'}. Summarize what has already been tried, identify why it is failing, then choose one materially different next step. Do not re-run the same tool call unless a concrete input has changed.`
 359    } else if (strongest?.kind === 'no_progress') {
 360      interventionPrompt = 'Supervisor intervention: no real progress has been made across recent autonomous turns. Summarize completed work, state the current blocker clearly, then either pick one different recovery step or mark the run blocked with the exact missing input.'
 361    } else if (strongest?.kind === 'budget_pressure' && strongest.severity !== 'high') {
 362      interventionPrompt = 'Supervisor intervention: budget is nearly exhausted. Skip exploratory work, avoid broad searches, and execute only the single highest-value remaining step needed to finish or surface the blocker.'
 363    } else if (strongest?.kind === 'context_pressure') {
 364      interventionPrompt = 'Supervisor intervention: compact context before continuing. Summarize objective, completed work, blocker state, and next action in a short execution brief, then continue from that brief only.'
 365    }
 366  
 367    if (strongest?.kind === 'budget_pressure' && strongest.severity === 'high') shouldBlock = true
 368    if (strongest?.kind === 'run_error' && (status === 'failed' || status === 'cancelled')) shouldBlock = true
 369  
 370    // Block after 3+ no_progress or repeated_tool incidents within 30 minutes for the same session
 371    if (!shouldBlock && strongest && (strongest.kind === 'no_progress' || strongest.kind === 'repeated_tool')) {
 372      const existingIncidents = Object.values(loadSupervisorIncidents()) as SupervisorIncident[]
 373      const recentSame = existingIncidents.filter((i) =>
 374        i.sessionId === input.sessionId
 375        && i.createdAt > Date.now() - 30 * 60_000
 376        && (i.kind === 'no_progress' || i.kind === 'repeated_tool'),
 377      )
 378      if (recentSame.length >= 3) shouldBlock = true
 379    }
 380  
 381    const seen = new Set<string>()
 382    const autoActions: AutonomyAssessment['autoActions'] = []
 383    for (const incident of incidents) {
 384      const autoAction = incident.autoAction
 385      if (!autoAction || seen.has(autoAction)) continue
 386      seen.add(autoAction)
 387      autoActions.push({
 388        action: autoAction as AutonomyAssessment['autoActions'][number]['action'],
 389        prompt: autoAction === 'replan' ? interventionPrompt || undefined : undefined,
 390        budgetWarning: autoAction === 'budget_trim' ? incident.summary : undefined,
 391      })
 392    }
 393  
 394    return { incidents, interventionPrompt, shouldBlock, autoActions }
 395  }
 396  
 397  export async function executeSupervisorAutoActions(params: {
 398    actions: AutonomyAssessment['autoActions']
 399    sessionId: string
 400    agentId?: string | null
 401  }): Promise<{ blocked: boolean }> {
 402    let blocked = false
 403    for (const action of params.actions) {
 404      if (action.action === 'block') {
 405        blocked = true
 406        try {
 407          const { upsertStoredItem } = await import('@/lib/server/storage')
 408          const id = genId()
 409          upsertStoredItem('notifications', id, {
 410            id,
 411            type: 'warning',
 412            title: 'Supervisor blocked autonomous run',
 413            message: `Session ${params.sessionId} blocked by autonomy supervisor.`,
 414            sessionId: params.sessionId,
 415            agentId: params.agentId,
 416            createdAt: Date.now(),
 417            read: false,
 418          })
 419          const { notify } = await import('@/lib/server/ws-hub')
 420          notify('notifications')
 421        } catch {}
 422      }
 423    }
 424    return { blocked }
 425  }
 426  
 427  function buildSessionTranscript(session: Session, maxMessages = DEFAULT_TRANSCRIPT_MESSAGES): string {
 428    const messages = getRecentMessages(session.id, maxMessages)
 429    const lines: string[] = []
 430    for (const message of messages) {
 431      if (!message || message.suppressed) continue
 432      const text = trimText(stripMainLoopMeta(String(message.text || '')), 700)
 433      const toolSummary = Array.isArray(message.toolEvents) && message.toolEvents.length > 0
 434        ? `\nTools: ${message.toolEvents.slice(0, 5).map((event) => {
 435          const status = event.error ? 'error' : 'ok'
 436          return `${event.name}(${status})`
 437        }).join(', ')}`
 438        : ''
 439      if (!text && !toolSummary) continue
 440      lines.push(`${message.role.toUpperCase()}: ${text}${toolSummary}`)
 441    }
 442    return lines.join('\n\n')
 443  }
 444  
 445  function buildReflectionPrompt(params: {
 446    session: Session
 447    source: string
 448    status: SessionRunStatus
 449    resultText: string
 450    incidents: SupervisorIncident[]
 451    sourceMessage?: string | null
 452  }): string {
 453    const incidentLines = params.incidents.length > 0
 454      ? params.incidents.map((incident) => `- ${incident.kind}: ${incident.summary}`).join('\n')
 455      : '- none'
 456    const transcript = buildSessionTranscript(params.session)
 457    return [
 458      'You are the SwarmClaw autonomy distiller.',
 459      'Turn one completed run into reflection memory that improves future runs.',
 460      'Return JSON only.',
 461      'If there is nothing worth storing, return {"skip":true,"reason":"..."}',
 462      '',
 463      'Required JSON fields when useful learning exists:',
 464      '- summary: one sentence',
 465      '- invariants: 0-4 stable rules that should remain true for future similar work',
 466      '- derived: 0-4 short-lived heuristics or next-run adjustments',
 467      '- failures: 0-3 concise failure patterns to avoid repeating',
 468      '- lessons: 0-4 reusable lessons',
 469      '- communication: 0-4 communication or tone preferences about the user/person worth remembering',
 470      '- relationship: 0-4 durable human context notes such as trust boundaries, personal priorities, or recurring sensitivities',
 471      '- significant_events: 0-4 notable life/work events or milestones that should be remembered later; include timing when the transcript states it',
 472      '- profile: 0-4 stable profile facts stated explicitly in the transcript, such as role, pronouns, timezone, or family context',
 473      '- boundaries: 0-4 explicit do/don\'t rules, sensitive topics, consent limits, or interaction boundaries',
 474      '- open_loops: 0-4 ongoing human situations, promised follow-ups, or check-back items that should resurface later',
 475      '- quality_score: number from 0.0 to 1.0 rating overall run quality (0=total failure, 0.5=mediocre, 0.8+=excellent)',
 476      '- quality_reasoning: one sentence justifying the quality_score',
 477      '',
 478      'Rules:',
 479      '- Remove secrets, tokens, hostnames, and one-off identifiers.',
 480      '- Prefer operational guidance over transcript recap.',
 481      '- Only include communication, relationship, or significant event notes when the transcript genuinely supports them.',
 482      '- Do not repeat the same sentence across sections.',
 483      '- Leave arrays empty when not applicable.',
 484      '',
 485      `Source: ${params.source}`,
 486      `Status: ${params.status}`,
 487      params.sourceMessage ? `Run prompt: ${trimText(params.sourceMessage, 240)}` : '',
 488      `Supervisor incidents:\n${incidentLines}`,
 489      '',
 490      'Result:',
 491      trimText(stripMainLoopMeta(params.resultText), 900),
 492      '',
 493      'Transcript:',
 494      transcript,
 495    ].filter(Boolean).join('\n')
 496  }
 497  
 498  function maybeParseJson(text: string): Record<string, unknown> | null {
 499    const raw = text.trim()
 500    const candidates = [
 501      raw,
 502      raw.replace(/^```(?:json)?\s*/i, '').replace(/\s*```$/, ''),
 503    ]
 504    for (const candidate of candidates) {
 505      try {
 506        const parsed = JSON.parse(candidate)
 507        if (parsed && typeof parsed === 'object' && !Array.isArray(parsed)) return parsed as Record<string, unknown>
 508      } catch {
 509        // ignore and continue
 510      }
 511    }
 512    const match = raw.match(/\{[\s\S]*\}/)
 513    if (!match) return null
 514    try {
 515      const parsed = JSON.parse(match[0])
 516      return parsed && typeof parsed === 'object' && !Array.isArray(parsed) ? parsed as Record<string, unknown> : null
 517    } catch {
 518      return null
 519    }
 520  }
 521  
 522  function normalizeQualityScore(value: unknown): number | null {
 523    if (typeof value !== 'number' || !Number.isFinite(value)) return null
 524    return Math.max(0, Math.min(1, value))
 525  }
 526  
 527  function normalizeNoteArray(value: unknown, limit = 4): string[] {
 528    if (!Array.isArray(value)) return []
 529    const out: string[] = []
 530    const seen = new Set<string>()
 531    for (const entry of value) {
 532      const note = cleanText(entry, 220)
 533      if (!note) continue
 534      const key = note.toLowerCase()
 535      if (seen.has(key)) continue
 536      seen.add(key)
 537      out.push(note)
 538      if (out.length >= limit) break
 539    }
 540    return out
 541  }
 542  
 543  function transcriptHasSemanticSignal(
 544    session: Session | null,
 545    signal: 'hasHumanSignals' | 'hasSignificantEvent',
 546  ): boolean {
 547    if (!session) return false
 548    const recentMessages = getRecentMessages(session.id, 8)
 549    return recentMessages.some((message) => message?.role === 'user' && message?.semantics?.[signal] === true)
 550  }
 551  
 552  function transcriptHasHumanSignals(session: Session | null): boolean {
 553    return transcriptHasSemanticSignal(session, 'hasHumanSignals')
 554  }
 555  
 556  function transcriptHasSignificantEvents(session: Session | null): boolean {
 557    return transcriptHasSemanticSignal(session, 'hasSignificantEvent')
 558  }
 559  
 560  function parseReflectionResponse(raw: string): {
 561    skip: boolean
 562    reason?: string
 563    summary?: string
 564    invariants: string[]
 565    derived: string[]
 566    failures: string[]
 567    lessons: string[]
 568    communication: string[]
 569    relationship: string[]
 570    significantEvents: string[]
 571    profile: string[]
 572    boundaries: string[]
 573    openLoops: string[]
 574    qualityScore: number | null
 575    qualityReasoning: string | null
 576  } {
 577    const parsed = maybeParseJson(raw)
 578    if (!parsed) throw new Error('Model did not return valid JSON for the run reflection.')
 579    if (parsed.skip === true) {
 580      return {
 581        skip: true,
 582        reason: cleanText(parsed.reason, 220) || 'No durable learning found for this run.',
 583        invariants: [],
 584        derived: [],
 585        failures: [],
 586        lessons: [],
 587        communication: [],
 588        relationship: [],
 589        significantEvents: [],
 590        profile: [],
 591        boundaries: [],
 592        openLoops: [],
 593        qualityScore: normalizeQualityScore(parsed.quality_score),
 594        qualityReasoning: cleanText(parsed.quality_reasoning, 220),
 595      }
 596    }
 597    const summary = cleanText(parsed.summary, 220)
 598    const invariants = normalizeNoteArray(parsed.invariants, 4)
 599    const derived = normalizeNoteArray(parsed.derived, 4)
 600    const failures = normalizeNoteArray(parsed.failures, 3)
 601    const lessons = normalizeNoteArray(parsed.lessons, 4)
 602    const communication = normalizeNoteArray(parsed.communication, 4)
 603    const relationship = normalizeNoteArray(parsed.relationship, 4)
 604    const significantEvents = normalizeNoteArray(parsed.significant_events, 4)
 605    const profile = normalizeNoteArray(parsed.profile, 4)
 606    const boundaries = normalizeNoteArray(parsed.boundaries, 4)
 607    const openLoops = normalizeNoteArray(parsed.open_loops, 4)
 608    if (
 609      !summary
 610      && invariants.length === 0
 611      && derived.length === 0
 612      && failures.length === 0
 613      && lessons.length === 0
 614      && communication.length === 0
 615      && relationship.length === 0
 616      && significantEvents.length === 0
 617      && profile.length === 0
 618      && boundaries.length === 0
 619      && openLoops.length === 0
 620    ) {
 621      return {
 622        skip: true,
 623        reason: 'No durable learning found for this run.',
 624        invariants: [],
 625        derived: [],
 626        failures: [],
 627        lessons: [],
 628        communication: [],
 629        relationship: [],
 630        significantEvents: [],
 631        profile: [],
 632        boundaries: [],
 633        openLoops: [],
 634        qualityScore: normalizeQualityScore(parsed.quality_score),
 635        qualityReasoning: cleanText(parsed.quality_reasoning, 220),
 636      }
 637    }
 638    return {
 639      skip: false,
 640      summary: summary || 'Autonomy reflection',
 641      invariants,
 642      derived,
 643      failures,
 644      lessons,
 645      communication,
 646      relationship,
 647      significantEvents,
 648      profile,
 649      boundaries,
 650      openLoops,
 651      qualityScore: normalizeQualityScore(parsed.quality_score),
 652      qualityReasoning: cleanText(parsed.quality_reasoning, 220),
 653    }
 654  }
 655  
 656  function shouldReflectRun(params: {
 657    source: string
 658    status: SessionRunStatus
 659    resultText: string
 660    incidents: SupervisorIncident[]
 661    toolEvents: MessageToolEvent[]
 662    session: Session | null
 663    runtimeScope: AutonomyRuntimeScope
 664  }): boolean {
 665    const surface = classifySurface(params.source)
 666    if (!surface || !runtimeScopeIncludes(params.runtimeScope, surface)) return false
 667    if (params.status === 'cancelled') return false
 668    if (surface === 'task') return Boolean(params.resultText.trim() || params.incidents.length > 0)
 669    const meaningfulMessages = params.session
 670      ? getMessages(params.session.id).filter((message) => message && !message.suppressed && (message.text || message.toolEvents?.length)).length
 671      : 0
 672    if (transcriptHasHumanSignals(params.session)) return true
 673    if (transcriptHasSignificantEvents(params.session)) return true
 674    if (params.incidents.length > 0) return true
 675    if (params.toolEvents.length > 0) return true
 676    if (params.resultText.trim().length >= 180) return true
 677    if (meaningfulMessages >= 6 && params.resultText.trim().length >= 100) return true
 678    if (meaningfulMessages >= 4 && params.resultText.trim().length >= 60) return true
 679    return false
 680  }
 681  
 682  type ReflectionMemoryKind =
 683    | 'invariant'
 684    | 'derived'
 685    | 'failure'
 686    | 'lesson'
 687    | 'communication'
 688    | 'relationship'
 689    | 'significant_event'
 690    | 'profile'
 691    | 'boundary'
 692    | 'open_loop'
 693  
 694  function buildMemoryTitle(kind: ReflectionMemoryKind, summary: string): string {
 695    const prefix = kind === 'invariant'
 696      ? 'Reflection Invariant'
 697      : kind === 'derived'
 698        ? 'Reflection Heuristic'
 699        : kind === 'failure'
 700          ? 'Reflection Failure'
 701          : kind === 'lesson'
 702            ? 'Reflection Lesson'
 703            : kind === 'communication'
 704            ? 'Communication Preference'
 705            : kind === 'relationship'
 706              ? 'Relationship Context'
 707              : kind === 'significant_event'
 708                ? 'Significant Event'
 709                : kind === 'profile'
 710                  ? 'Profile Context'
 711                  : kind === 'boundary'
 712                    ? 'Interaction Boundary'
 713                    : 'Open Loop'
 714    return `${prefix}: ${trimText(summary, 100)}`
 715  }
 716  
 717  function memoryCategoryForKind(kind: ReflectionMemoryKind): string {
 718    if (kind === 'invariant') return 'reflection/invariant'
 719    if (kind === 'derived') return 'reflection/derived'
 720    if (kind === 'failure') return 'reflection/failure'
 721    if (kind === 'lesson') return 'reflection/lesson'
 722    if (kind === 'communication') return 'reflection/communication'
 723    if (kind === 'relationship') return 'reflection/relationship'
 724    if (kind === 'significant_event') return 'reflection/significant_event'
 725    if (kind === 'profile') return 'reflection/profile'
 726    if (kind === 'boundary') return 'reflection/boundary'
 727    return 'reflection/open_loop'
 728  }
 729  
 730  function resolveReflectionMemoryConfidence(kind: ReflectionMemoryKind): number {
 731    const configured = Number(loadSettings().memoryDefaultConfidence)
 732    const fallback = Number.isFinite(configured) && configured > 0 ? configured : 0.72
 733    if (kind === 'profile' || kind === 'boundary' || kind === 'significant_event') return Math.max(fallback, 0.82)
 734    if (kind === 'communication' || kind === 'relationship' || kind === 'open_loop') return Math.max(fallback, 0.76)
 735    return fallback
 736  }
 737  
 738  function inferFollowUpAt(note: string, createdAt: number): number {
 739    const explicitDate = Date.parse(note)
 740    if (Number.isFinite(explicitDate) && explicitDate > createdAt) return explicitDate
 741    if (/\btomorrow\b/i.test(note)) return createdAt + 24 * 3600_000
 742    if (/\bnext week\b|\bin a week\b/i.test(note)) return createdAt + 7 * 24 * 3600_000
 743    if (/\bnext month\b|\bin a month\b/i.test(note)) return createdAt + 30 * 24 * 3600_000
 744    return createdAt + 7 * 24 * 3600_000
 745  }
 746  
 747  function writeReflectionMemories(params: {
 748    reflectionId: string
 749    runId: string
 750    sessionId: string
 751    agentId?: string | null
 752    incidents: SupervisorIncident[]
 753    summary: string
 754    invariants: string[]
 755    derived: string[]
 756    failures: string[]
 757    lessons: string[]
 758    communication: string[]
 759    relationship: string[]
 760    significantEvents: string[]
 761    profile: string[]
 762    boundaries: string[]
 763    openLoops: string[]
 764  }): string[] {
 765    const memoryDb = getMemoryDb()
 766    const memoryIds: string[] = []
 767    const incidentIds = params.incidents.map((incident) => incident.id)
 768    const createdAt = now()
 769    const groups: Array<{ kind: ReflectionMemoryKind; notes: string[] }> = [
 770      { kind: 'invariant', notes: params.invariants },
 771      { kind: 'derived', notes: params.derived },
 772      { kind: 'failure', notes: params.failures },
 773      { kind: 'lesson', notes: params.lessons },
 774      { kind: 'communication', notes: params.communication },
 775      { kind: 'relationship', notes: params.relationship },
 776      { kind: 'significant_event', notes: params.significantEvents },
 777      { kind: 'profile', notes: params.profile },
 778      { kind: 'boundary', notes: params.boundaries },
 779      { kind: 'open_loop', notes: params.openLoops },
 780    ]
 781  
 782    // Cross-kind dedup: skip notes whose normalized text we've already stored
 783    // this run. The reflection classifier often produces near-identical notes
 784    // under multiple kinds (e.g. "successfully completed a multi-step task…"
 785    // appearing as both invariant and lesson), which floods memory with noise.
 786    const seenNormalized = new Set<string>()
 787    const normalizeNote = (note: string): string =>
 788      note.toLowerCase().replace(/\s+/g, ' ').trim().slice(0, 240)
 789  
 790    // Cross-run dedup: skip notes that already exist as a recent reflection
 791    // memory for this agent. Different reflection runs over successive turns
 792    // often rediscover the same invariant/lesson because the model re-derives
 793    // them from the same pattern. Without this guard the reflection table
 794    // grows ~7 entries per test turn; with it, repeat reflections are absorbed.
 795    const CROSS_RUN_DEDUP_WINDOW_MS = 7 * 24 * 3600_000 // 7 days
 796    const crossRunDedupCutoff = createdAt - CROSS_RUN_DEDUP_WINDOW_MS
 797    try {
 798      if (params.agentId) {
 799        const recent = memoryDb.list(params.agentId, 500)
 800        for (const entry of recent) {
 801          if (!entry.category || !entry.category.startsWith('reflection/')) continue
 802          if ((entry.updatedAt || 0) < crossRunDedupCutoff) continue
 803          const norm = normalizeNote(entry.content || '')
 804          if (norm) seenNormalized.add(norm)
 805        }
 806      }
 807    } catch {
 808      // Memory DB lookup is best-effort — if it fails, fall back to within-run
 809      // dedup only rather than blocking the reflection write.
 810    }
 811  
 812    for (const group of groups) {
 813      for (const note of group.notes) {
 814        const norm = normalizeNote(note)
 815        if (!norm) continue
 816        if (seenNormalized.has(norm)) continue
 817        seenNormalized.add(norm)
 818        const metadata: Record<string, unknown> = {
 819          origin: 'autonomy-reflection',
 820          reflectionId: params.reflectionId,
 821          reflectionKind: group.kind,
 822          runId: params.runId,
 823          incidentIds,
 824          autoWritten: true,
 825          tier: 'durable',
 826          confidence: resolveReflectionMemoryConfidence(group.kind),
 827          sourceRunId: params.runId,
 828        }
 829        if (group.kind === 'communication' || group.kind === 'relationship' || group.kind === 'profile' || group.kind === 'boundary') {
 830          metadata.memoryFacet = 'human'
 831        }
 832        if (group.kind === 'significant_event') {
 833          metadata.memoryFacet = 'event'
 834          metadata.eventSalience = 'high'
 835        }
 836        if (group.kind === 'open_loop') {
 837          metadata.memoryFacet = 'followup'
 838          metadata.followUpAt = inferFollowUpAt(note, createdAt)
 839          metadata.resolvedAt = null
 840        }
 841        const entry = memoryDb.add({
 842          agentId: params.agentId || null,
 843          sessionId: params.sessionId,
 844          category: memoryCategoryForKind(group.kind),
 845          title: buildMemoryTitle(group.kind, params.summary),
 846          content: note,
 847          metadata,
 848        })
 849        memoryIds.push(entry.id)
 850      }
 851    }
 852  
 853    return [...new Set(memoryIds)]
 854  }
 855  
 856  export function listSupervisorIncidents(filters?: { sessionId?: string; taskId?: string; limit?: number }): SupervisorIncident[] {
 857    const sessionId = cleanText(filters?.sessionId, 120)
 858    const taskId = cleanText(filters?.taskId, 120)
 859    const limit = Math.max(1, Math.min(200, Number(filters?.limit || 100)))
 860    return Object.values(loadSupervisorIncidents())
 861      .filter((incident) => (!sessionId || incident.sessionId === sessionId) && (!taskId || incident.taskId === taskId))
 862      .sort((left, right) => right.createdAt - left.createdAt)
 863      .slice(0, limit)
 864  }
 865  
 866  export function listRunReflections(filters?: { sessionId?: string; taskId?: string; limit?: number }): RunReflection[] {
 867    const sessionId = cleanText(filters?.sessionId, 120)
 868    const taskId = cleanText(filters?.taskId, 120)
 869    const limit = Math.max(1, Math.min(200, Number(filters?.limit || 100)))
 870    return Object.values(loadRunReflections())
 871      .filter((reflection) => (!sessionId || reflection.sessionId === sessionId) && (!taskId || reflection.taskId === taskId))
 872      .sort((left, right) => right.updatedAt - left.updatedAt)
 873      .slice(0, limit)
 874  }
 875  
 876  export function classifyRuntimeFailure(params: {
 877    source: string
 878    message: string
 879  }): {
 880    family: NonNullable<SupervisorIncident['failureFamily']>
 881    severity: SupervisorIncidentSeverity
 882    remediation: string
 883    repairPrompt: string
 884  } {
 885    const normalized = `${params.source} ${params.message}`.toLowerCase()
 886    if (/unauthorized|forbidden|invalid api key|authentication|401\b|403\b/.test(normalized)) {
 887      return {
 888        family: 'provider_auth',
 889        severity: 'high',
 890        remediation: 'Check credential selection, key validity, and provider account access before retrying.',
 891        repairPrompt: 'Verify the configured credential and retry only after the provider auth issue is fixed.',
 892      }
 893    }
 894    if (/rate limit|too many requests|429\b/.test(normalized)) {
 895      return {
 896        family: 'rate_limit',
 897        severity: 'medium',
 898        remediation: 'Back off, reduce concurrency, or switch providers before retrying.',
 899        repairPrompt: 'Pause retries and retry later with lower concurrency or an alternate provider.',
 900      }
 901    }
 902    if (/gateway|socket|disconnected|not connected|connection closed/.test(normalized)) {
 903      return {
 904        family: 'gateway_disconnected',
 905        severity: 'high',
 906        remediation: 'Reconnect the gateway/runtime bridge and confirm health before resuming queued work.',
 907        repairPrompt: 'Re-establish the gateway connection, then replay the blocked work once connectivity is healthy.',
 908      }
 909    }
 910    if (/browser.*failed|playwright|chromium|launch/.test(normalized)) {
 911      return {
 912        family: 'browser_boot',
 913        severity: 'medium',
 914        remediation: 'Check browser dependencies, profile locks, and local sandbox constraints.',
 915        repairPrompt: 'Repair the browser runtime and rerun the step only after the browser can launch cleanly.',
 916      }
 917    }
 918    if (/enoent|command not found|not installed|missing binary/.test(normalized)) {
 919      return {
 920        family: 'cli_missing',
 921        severity: 'medium',
 922        remediation: 'Install or configure the missing CLI/binary before retrying.',
 923        repairPrompt: 'Install the missing tool or update PATH before retrying the task.',
 924      }
 925    }
 926    if (/webhook/.test(normalized)) {
 927      return {
 928        family: 'webhook_delivery',
 929        severity: 'medium',
 930        remediation: 'Inspect webhook target health, retry policy, and payload expectations before retrying.',
 931        repairPrompt: 'Validate the webhook target and payload contract, then retry delivery once fixed.',
 932      }
 933    }
 934    if (/connector|outbox|delivery/.test(normalized)) {
 935      return {
 936        family: 'connector_delivery',
 937        severity: 'medium',
 938        remediation: 'Check connector credentials, destination addressing, and platform health before retrying.',
 939        repairPrompt: 'Repair connector delivery state and resend the message once the platform is healthy.',
 940      }
 941    }
 942    return {
 943      family: 'provider_transport',
 944      severity: 'medium',
 945      remediation: 'Inspect upstream connectivity and transient provider errors before retrying.',
 946      repairPrompt: 'Retry only after verifying the upstream transport path is healthy.',
 947    }
 948  }
 949  
 950  export function recordSupervisorIncident(
 951    incident: Omit<SupervisorIncident, 'id' | 'createdAt'>,
 952  ): SupervisorIncident {
 953    return persistIncidents([incident])[0]
 954  }
 955  
 956  function persistIncidents(incidents: Array<Omit<SupervisorIncident, 'id' | 'createdAt'>>): SupervisorIncident[] {
 957    if (!incidents.length) return []
 958    const store = loadSupervisorIncidents()
 959    const created: SupervisorIncident[] = []
 960    const createdAt = now()
 961    for (const incident of incidents) {
 962      const signature = crypto.createHash('sha1')
 963        .update(JSON.stringify({
 964          runId: incident.runId,
 965          sessionId: incident.sessionId,
 966          kind: incident.kind,
 967          summary: incident.summary,
 968          toolName: incident.toolName || null,
 969        }))
 970        .digest('hex')
 971      const existing = Object.values(store).find((entry) =>
 972        entry.runId === incident.runId
 973        && entry.kind === incident.kind
 974        && entry.summary === incident.summary
 975        && (entry.toolName || null) === (incident.toolName || null),
 976      )
 977      const next: SupervisorIncident = existing || {
 978        ...incident,
 979        id: `sup-${signature.slice(0, 12)}`,
 980        createdAt,
 981      }
 982      store[next.id] = next
 983      created.push(next)
 984    }
 985    saveSupervisorIncidents(store)
 986    for (const incident of created) {
 987      const sid = incident.sessionId || ''
 988      const summary = (incident.summary || '').slice(0, 100)
 989      log.warn('supervisor', 'Incident', { kind: incident.kind, severity: incident.severity, summary })
 990      logExecution(sid, 'supervisor_incident', summary, {
 991        detail: { kind: incident.kind, severity: incident.severity, incidentId: incident.id },
 992      })
 993  
 994      if (incident.severity === 'high' || incident.severity === 'medium') {
 995        logActivity({
 996          entityType: 'agent',
 997          entityId: incident.agentId || sid,
 998          action: 'incident',
 999          actor: 'system',
1000          summary: `Supervisor incident [${incident.severity}]: ${summary}`,
1001        })
1002        createNotification({
1003          type: incident.severity === 'high' ? 'error' : 'warning',
1004          title: `Supervisor: ${incident.kind}`,
1005          message: summary,
1006          entityType: 'agent',
1007          entityId: incident.agentId || sid,
1008          dedupKey: `supervisor_incident:${incident.id || incident.kind}`,
1009        })
1010      }
1011  
1012      log.info('supervisor', 'Notifying orchestrators', { text: summary })
1013      notifyOrchestrators(`Incident [${incident.severity}]: ${summary}`, `incident:${incident.id || incident.kind}`)
1014    }
1015    return created
1016  }
1017  
1018  export async function observeAutonomyRunOutcome(
1019    input: ObserveAutonomyRunInput,
1020    options?: { generateText?: (prompt: string) => Promise<string> },
1021  ): Promise<{ incidents: SupervisorIncident[]; reflection: RunReflection | null }> {
1022    const settings = normalizeSupervisorSettings(loadSettings())
1023    const surface = classifySurface(input.source)
1024    if (!surface || !runtimeScopeIncludes(settings.supervisorRuntimeScope, surface)) {
1025      return { incidents: [], reflection: null }
1026    }
1027  
1028    const sessions = loadSessions()
1029    const session = sessions[input.sessionId] as unknown as Session | undefined
1030    const toolEvents = Array.isArray(input.toolEvents) ? input.toolEvents : []
1031    const resultText = String(input.resultText || input.error || '')
1032    const assessment = assessAutonomyRun({
1033      ...input,
1034      resultText,
1035      toolEvents,
1036      session: session || null,
1037      settings,
1038    })
1039    const incidents = persistIncidents(assessment.incidents)
1040  
1041    if (!settings.reflectionEnabled || !shouldReflectRun({
1042      source: input.source,
1043      status: input.status,
1044      resultText,
1045      incidents,
1046      toolEvents,
1047      session: session || null,
1048      runtimeScope: settings.supervisorRuntimeScope,
1049    })) {
1050      return { incidents, reflection: null }
1051    }
1052  
1053    const existing = Object.values(loadRunReflections()).find((entry) => entry.runId === input.runId)
1054    if (existing) return { incidents, reflection: existing }
1055    if (!session) return { incidents, reflection: null }
1056  
1057    const prompt = buildReflectionPrompt({
1058      session,
1059      source: input.source,
1060      status: input.status,
1061      resultText,
1062      incidents,
1063      sourceMessage: input.sourceMessage,
1064    })
1065    const responseText = options?.generateText
1066      ? await options.generateText(prompt)
1067      : await (async () => {
1068        const { llm } = await buildLLM({ sessionId: input.sessionId, agentId: input.agentId || session.agentId || null })
1069        const response = await llm.invoke([new HumanMessage(prompt)])
1070        if (typeof response.content === 'string') return response.content
1071        if (Array.isArray(response.content)) {
1072          return response.content
1073            .map((part) => (part && typeof part === 'object' && 'text' in part && typeof part.text === 'string') ? part.text : '')
1074            .join('')
1075        }
1076        return ''
1077      })()
1078    let parsed: ReturnType<typeof parseReflectionResponse>
1079    try {
1080      parsed = parseReflectionResponse(responseText)
1081    } catch {
1082      log.warn(TAG, `Reflection parse failed for run ${input.runId}, skipping reflection`)
1083      return { incidents, reflection: null }
1084    }
1085    if (parsed.skip) return { incidents, reflection: null }
1086  
1087    const reflectionId = genId()
1088    const autoMemoryIds = settings.reflectionAutoWriteMemory
1089      ? writeReflectionMemories({
1090          reflectionId,
1091          runId: input.runId,
1092          sessionId: input.sessionId,
1093          agentId: input.agentId || null,
1094          incidents,
1095          summary: parsed.summary || 'Autonomy reflection',
1096          invariants: parsed.invariants,
1097          derived: parsed.derived,
1098          failures: parsed.failures,
1099          lessons: parsed.lessons,
1100          communication: parsed.communication,
1101          relationship: parsed.relationship,
1102          significantEvents: parsed.significantEvents,
1103          profile: parsed.profile,
1104          boundaries: parsed.boundaries,
1105          openLoops: parsed.openLoops,
1106        })
1107      : []
1108  
1109    const reflection: RunReflection = {
1110      id: reflectionId,
1111      runId: input.runId,
1112      sessionId: input.sessionId,
1113      taskId: input.taskId || null,
1114      agentId: input.agentId || null,
1115      source: input.source,
1116      status: input.status,
1117      summary: parsed.summary || 'Autonomy reflection',
1118      sourceSnippet: trimText(buildSessionTranscript(session), DEFAULT_SNIPPET_CHARS),
1119      invariantNotes: parsed.invariants,
1120      derivedNotes: parsed.derived,
1121      failureNotes: parsed.failures,
1122      lessonNotes: parsed.lessons,
1123      communicationNotes: parsed.communication,
1124      relationshipNotes: parsed.relationship,
1125      significantEventNotes: parsed.significantEvents,
1126      profileNotes: parsed.profile,
1127      boundaryNotes: parsed.boundaries,
1128      openLoopNotes: parsed.openLoops,
1129      incidentIds: incidents.map((incident) => incident.id),
1130      autoMemoryIds,
1131      qualityScore: parsed.qualityScore ?? null,
1132      qualityReasoning: parsed.qualityReasoning ?? null,
1133      createdAt: now(),
1134      updatedAt: now(),
1135    }
1136  
1137    const reflections = loadRunReflections()
1138    reflections[reflection.id] = reflection
1139    saveRunReflections(reflections)
1140  
1141    // Fold reflection notes into session RunContext (non-critical)
1142    try {
1143      const freshSession = getSession(input.sessionId) as Session | undefined
1144      if (freshSession) {
1145        freshSession.runContext = foldReflectionIntoRunContext(freshSession.runContext, reflection)
1146        saveSession(input.sessionId, freshSession)
1147      }
1148    } catch (err: unknown) {
1149      log.warn(TAG, 'RunContext reflection folding failed:', err instanceof Error ? err.message : String(err))
1150    }
1151  
1152    // Quality degradation alert — if recent quality trend drops below 0.5
1153    if (typeof reflection.qualityScore === 'number' && input.agentId) {
1154      checkQualityDegradation(input.agentId).catch(() => {})
1155    }
1156  
1157    return { incidents, reflection }
1158  }
1159  
1160  // ---------------------------------------------------------------------------
1161  // Quality trend monitoring
1162  // ---------------------------------------------------------------------------
1163  
1164  const QUALITY_DEGRADATION_THRESHOLD = 0.5
1165  const QUALITY_TREND_WINDOW = 5
1166  
1167  export function getAgentQualityTrend(agentId: string, windowDays = 14): {
1168    scores: Array<{ date: string; avg: number; count: number }>
1169    recentAvg: number | null
1170    recentCount: number
1171  } {
1172    const cutoff = Date.now() - windowDays * 24 * 3600_000
1173    const reflections = Object.values(loadRunReflections())
1174      .filter((r) => r.agentId === agentId && r.createdAt >= cutoff && r.qualityScore !== null)
1175      .sort((a, b) => a.createdAt - b.createdAt)
1176  
1177    // Bucket by day
1178    const buckets = new Map<string, number[]>()
1179    for (const r of reflections) {
1180      const day = new Date(r.createdAt).toISOString().slice(0, 10)
1181      const bucket = buckets.get(day) || []
1182      bucket.push(r.qualityScore!)
1183      buckets.set(day, bucket)
1184    }
1185  
1186    const scores = Array.from(buckets.entries()).map(([date, values]) => ({
1187      date,
1188      avg: values.reduce((s, v) => s + v, 0) / values.length,
1189      count: values.length,
1190    }))
1191  
1192    // Rolling average over last N runs
1193    const recentScores = reflections
1194      .slice(-QUALITY_TREND_WINDOW)
1195      .map((r) => r.qualityScore!)
1196    const recentAvg = recentScores.length > 0
1197      ? recentScores.reduce((s, v) => s + v, 0) / recentScores.length
1198      : null
1199    return { scores, recentAvg, recentCount: recentScores.length }
1200  }
1201  
1202  async function checkQualityDegradation(agentId: string): Promise<void> {
1203    const { recentAvg, recentCount } = getAgentQualityTrend(agentId, 14)
1204    if (recentAvg === null || recentCount < QUALITY_TREND_WINDOW) return
1205    if (recentAvg >= QUALITY_DEGRADATION_THRESHOLD) return
1206  
1207    try {
1208      const { upsertStoredItem } = await import('@/lib/server/storage')
1209      const id = genId()
1210      upsertStoredItem('notifications', id, {
1211        id,
1212        type: 'warning',
1213        title: 'Agent quality degradation',
1214        message: `Agent ${agentId} quality score has dropped to ${recentAvg.toFixed(2)} avg over the last ${recentCount} runs. Review recent reflections and consider adjusting the agent's configuration.`,
1215        agentId,
1216        createdAt: Date.now(),
1217        read: false,
1218      })
1219      const { notify } = await import('@/lib/server/ws-hub')
1220      notify('notifications')
1221    } catch {
1222      // Non-critical — notification is best-effort
1223    }
1224  }