/ utils / cronTasks.ts
cronTasks.ts
  1  // Scheduled prompts, stored in <project>/.claude/scheduled_tasks.json.
  2  //
  3  // Tasks come in two flavors:
  4  //   - One-shot (recurring: false/undefined) — fire once, then auto-delete.
  5  //   - Recurring (recurring: true) — fire on schedule, reschedule from now,
  6  //     persist until explicitly deleted via CronDelete or auto-expire after
  7  //     a configurable limit (DEFAULT_CRON_JITTER_CONFIG.recurringMaxAgeMs).
  8  //
  9  // File format:
 10  //   { "tasks": [{ id, cron, prompt, createdAt, recurring?, permanent? }] }
 11  
 12  import { randomUUID } from 'crypto'
 13  import { readFileSync } from 'fs'
 14  import { mkdir, writeFile } from 'fs/promises'
 15  import { join } from 'path'
 16  import {
 17    addSessionCronTask,
 18    getProjectRoot,
 19    getSessionCronTasks,
 20    removeSessionCronTasks,
 21  } from '../bootstrap/state.js'
 22  import { computeNextCronRun, parseCronExpression } from './cron.js'
 23  import { logForDebugging } from './debug.js'
 24  import { isFsInaccessible } from './errors.js'
 25  import { getFsImplementation } from './fsOperations.js'
 26  import { safeParseJSON } from './json.js'
 27  import { logError } from './log.js'
 28  import { jsonStringify } from './slowOperations.js'
 29  
 30  export type CronTask = {
 31    id: string
 32    /** 5-field cron string (local time) — validated on write, re-validated on read. */
 33    cron: string
 34    /** Prompt to enqueue when the task fires. */
 35    prompt: string
 36    /** Epoch ms when the task was created. Anchor for missed-task detection. */
 37    createdAt: number
 38    /**
 39     * Epoch ms of the most recent fire. Written back by the scheduler after
 40     * each recurring fire so next-fire computation survives process restarts.
 41     * The scheduler anchors first-sight from `lastFiredAt ?? createdAt` — a
 42     * never-fired task uses createdAt (correct for pinned crons like
 43     * `30 14 27 2 *` whose next-from-now is next year); a fired-before task
 44     * reconstructs the same `nextFireAt` the prior process had in memory.
 45     * Never set for one-shots (they're deleted on fire).
 46     */
 47    lastFiredAt?: number
 48    /** When true, the task reschedules after firing instead of being deleted. */
 49    recurring?: boolean
 50    /**
 51     * When true, the task is exempt from recurringMaxAgeMs auto-expiry.
 52     * System escape hatch for assistant mode's built-in tasks (catch-up/
 53     * morning-checkin/dream) — the installer's writeIfMissing() skips existing
 54     * files so re-install can't recreate them. Not settable via CronCreateTool;
 55     * only written directly to scheduled_tasks.json by src/assistant/install.ts.
 56     */
 57    permanent?: boolean
 58    /**
 59     * Runtime-only flag. false → session-scoped (never written to disk).
 60     * File-backed tasks leave this undefined; writeCronTasks strips it so
 61     * the on-disk shape stays { id, cron, prompt, createdAt, lastFiredAt?, recurring?, permanent? }.
 62     */
 63    durable?: boolean
 64    /**
 65     * Runtime-only. When set, the task was created by an in-process teammate.
 66     * The scheduler routes fires to that teammate's queue instead of the main
 67     * REPL's. Never written to disk (teammate crons are always session-only).
 68     */
 69    agentId?: string
 70  }
 71  
 72  type CronFile = { tasks: CronTask[] }
 73  
 74  const CRON_FILE_REL = join('.claude', 'scheduled_tasks.json')
 75  
 76  /**
 77   * Path to the cron file. `dir` defaults to getProjectRoot() — pass it
 78   * explicitly from contexts that don't run through main.tsx (e.g. the Agent
 79   * SDK daemon, which has no bootstrap state).
 80   */
 81  export function getCronFilePath(dir?: string): string {
 82    return join(dir ?? getProjectRoot(), CRON_FILE_REL)
 83  }
 84  
 85  /**
 86   * Read and parse .claude/scheduled_tasks.json. Returns an empty task list if the file
 87   * is missing, empty, or malformed. Tasks with invalid cron strings are
 88   * silently dropped (logged at debug level) so a single bad entry never
 89   * blocks the whole file.
 90   */
 91  export async function readCronTasks(dir?: string): Promise<CronTask[]> {
 92    const fs = getFsImplementation()
 93    let raw: string
 94    try {
 95      raw = await fs.readFile(getCronFilePath(dir), { encoding: 'utf-8' })
 96    } catch (e: unknown) {
 97      if (isFsInaccessible(e)) return []
 98      logError(e)
 99      return []
100    }
101  
102    const parsed = safeParseJSON(raw, false)
103    if (!parsed || typeof parsed !== 'object') return []
104    const file = parsed as Partial<CronFile>
105    if (!Array.isArray(file.tasks)) return []
106  
107    const out: CronTask[] = []
108    for (const t of file.tasks) {
109      if (
110        !t ||
111        typeof t.id !== 'string' ||
112        typeof t.cron !== 'string' ||
113        typeof t.prompt !== 'string' ||
114        typeof t.createdAt !== 'number'
115      ) {
116        logForDebugging(
117          `[ScheduledTasks] skipping malformed task: ${jsonStringify(t)}`,
118        )
119        continue
120      }
121      if (!parseCronExpression(t.cron)) {
122        logForDebugging(
123          `[ScheduledTasks] skipping task ${t.id} with invalid cron '${t.cron}'`,
124        )
125        continue
126      }
127      out.push({
128        id: t.id,
129        cron: t.cron,
130        prompt: t.prompt,
131        createdAt: t.createdAt,
132        ...(typeof t.lastFiredAt === 'number'
133          ? { lastFiredAt: t.lastFiredAt }
134          : {}),
135        ...(t.recurring ? { recurring: true } : {}),
136        ...(t.permanent ? { permanent: true } : {}),
137      })
138    }
139    return out
140  }
141  
142  /**
143   * Sync check for whether the cron file has any valid tasks. Used by
144   * cronScheduler.start() to decide whether to auto-enable. One file read.
145   */
146  export function hasCronTasksSync(dir?: string): boolean {
147    let raw: string
148    try {
149      // eslint-disable-next-line custom-rules/no-sync-fs -- called once from cronScheduler.start()
150      raw = readFileSync(getCronFilePath(dir), 'utf-8')
151    } catch {
152      return false
153    }
154    const parsed = safeParseJSON(raw, false)
155    if (!parsed || typeof parsed !== 'object') return false
156    const tasks = (parsed as Partial<CronFile>).tasks
157    return Array.isArray(tasks) && tasks.length > 0
158  }
159  
160  /**
161   * Overwrite .claude/scheduled_tasks.json with the given tasks. Creates .claude/ if
162   * missing. Empty task list writes an empty file (rather than deleting) so
163   * the file watcher sees a change event on last-task-removed.
164   */
165  export async function writeCronTasks(
166    tasks: CronTask[],
167    dir?: string,
168  ): Promise<void> {
169    const root = dir ?? getProjectRoot()
170    await mkdir(join(root, '.claude'), { recursive: true })
171    // Strip the runtime-only `durable` flag — everything on disk is durable
172    // by definition, and keeping the flag out means readCronTasks() naturally
173    // yields durable: undefined without having to set it explicitly.
174    const body: CronFile = {
175      tasks: tasks.map(({ durable: _durable, ...rest }) => rest),
176    }
177    await writeFile(
178      getCronFilePath(root),
179      jsonStringify(body, null, 2) + '\n',
180      'utf-8',
181    )
182  }
183  
184  /**
185   * Append a task. Returns the generated id. Caller is responsible for having
186   * already validated the cron string (the tool does this via validateInput).
187   *
188   * When `durable` is false the task is held in process memory only
189   * (bootstrap/state.ts) — it fires on schedule this session but is never
190   * written to .claude/scheduled_tasks.json and dies with the process. The
191   * scheduler merges session tasks into its tick loop directly, so no file
192   * change event is needed.
193   */
194  export async function addCronTask(
195    cron: string,
196    prompt: string,
197    recurring: boolean,
198    durable: boolean,
199    agentId?: string,
200  ): Promise<string> {
201    // Short ID — 8 hex chars is plenty for MAX_JOBS=50, avoids slice/prefix
202    // juggling between the tool layer (shows short IDs) and disk.
203    const id = randomUUID().slice(0, 8)
204    const task = {
205      id,
206      cron,
207      prompt,
208      createdAt: Date.now(),
209      ...(recurring ? { recurring: true } : {}),
210    }
211    if (!durable) {
212      addSessionCronTask({ ...task, ...(agentId ? { agentId } : {}) })
213      return id
214    }
215    const tasks = await readCronTasks()
216    tasks.push(task)
217    await writeCronTasks(tasks)
218    return id
219  }
220  
221  /**
222   * Remove tasks by id. No-op if none match (e.g. another session raced us).
223   * Used for both fire-once cleanup and explicit CronDelete.
224   *
225   * When called with `dir` undefined (REPL path), also sweeps the in-memory
226   * session store — the caller doesn't know which store an id lives in.
227   * Daemon callers pass `dir` explicitly; they have no session, and the
228   * `dir !== undefined` guard keeps this function from touching bootstrap
229   * state on that path (tests enforce this).
230   */
231  export async function removeCronTasks(
232    ids: string[],
233    dir?: string,
234  ): Promise<void> {
235    if (ids.length === 0) return
236    // Sweep session store first. If every id was accounted for there, we're
237    // done — skip the file read entirely. removeSessionCronTasks is a no-op
238    // (returns 0) on miss, so pre-existing durable-delete paths fall through
239    // without allocating.
240    if (dir === undefined && removeSessionCronTasks(ids) === ids.length) {
241      return
242    }
243    const idSet = new Set(ids)
244    const tasks = await readCronTasks(dir)
245    const remaining = tasks.filter(t => !idSet.has(t.id))
246    if (remaining.length === tasks.length) return
247    await writeCronTasks(remaining, dir)
248  }
249  
250  /**
251   * Stamp `lastFiredAt` on the given recurring tasks and write back. Batched
252   * so N fires in one scheduler tick = one read-modify-write, not N. Only
253   * touches file-backed tasks — session tasks die with the process, no point
254   * persisting their fire time. No-op if none of the ids match (task was
255   * deleted between fire and write — e.g. user ran CronDelete mid-tick).
256   *
257   * Scheduler lock means at most one process calls this; chokidar picks up
258   * the write and triggers a reload which re-seeds `nextFireAt` from the
259   * just-written `lastFiredAt` — idempotent (same computation, same answer).
260   */
261  export async function markCronTasksFired(
262    ids: string[],
263    firedAt: number,
264    dir?: string,
265  ): Promise<void> {
266    if (ids.length === 0) return
267    const idSet = new Set(ids)
268    const tasks = await readCronTasks(dir)
269    let changed = false
270    for (const t of tasks) {
271      if (idSet.has(t.id)) {
272        t.lastFiredAt = firedAt
273        changed = true
274      }
275    }
276    if (!changed) return
277    await writeCronTasks(tasks, dir)
278  }
279  
280  /**
281   * File-backed tasks + session-only tasks, merged. Session tasks get
282   * `durable: false` so callers can distinguish them. File tasks are
283   * returned as-is (durable undefined → truthy).
284   *
285   * Only merges when `dir` is undefined — daemon callers (explicit `dir`)
286   * have no session store to merge with.
287   */
288  export async function listAllCronTasks(dir?: string): Promise<CronTask[]> {
289    const fileTasks = await readCronTasks(dir)
290    if (dir !== undefined) return fileTasks
291    const sessionTasks = getSessionCronTasks().map(t => ({
292      ...t,
293      durable: false as const,
294    }))
295    return [...fileTasks, ...sessionTasks]
296  }
297  
298  /**
299   * Next fire time in epoch ms for a cron string, strictly after `fromMs`.
300   * Returns null if invalid or no match in the next 366 days.
301   */
302  export function nextCronRunMs(cron: string, fromMs: number): number | null {
303    const fields = parseCronExpression(cron)
304    if (!fields) return null
305    const next = computeNextCronRun(fields, new Date(fromMs))
306    return next ? next.getTime() : null
307  }
308  
309  /**
310   * Cron scheduler tuning knobs. Sourced at runtime from the
311   * `tengu_kairos_cron_config` GrowthBook JSON config (see cronJitterConfig.ts)
312   * so ops can adjust behavior fleet-wide without shipping a client build.
313   * Defaults here preserve the pre-config behavior exactly.
314   */
315  export type CronJitterConfig = {
316    /** Recurring-task forward delay as a fraction of the interval between fires. */
317    recurringFrac: number
318    /** Upper bound on recurring forward delay regardless of interval length. */
319    recurringCapMs: number
320    /** One-shot backward lead: maximum ms a task may fire early. */
321    oneShotMaxMs: number
322    /**
323     * One-shot backward lead: minimum ms a task fires early when the minute-mod
324     * gate matches. 0 = taskIds hashing near zero fire on the exact mark. Raise
325     * this to guarantee nobody lands on the wall-clock boundary.
326     */
327    oneShotFloorMs: number
328    /**
329     * Jitter fires landing on minutes where `minute % N === 0`. 30 → :00/:30
330     * (the human-rounding hotspots). 15 → :00/:15/:30/:45. 1 → every minute.
331     */
332    oneShotMinuteMod: number
333    /**
334     * Recurring tasks auto-expire this many ms after creation (unless marked
335     * `permanent`). Cron is the primary driver of multi-day sessions (p99
336     * uptime 61min → 53h post-#19931), and unbounded recurrence lets Tier-1
337     * heap leaks compound indefinitely. The default (7 days) covers "check
338     * my PRs every hour this week" workflows while capping worst-case
339     * session lifetime. Permanent tasks (assistant mode's catch-up/
340     * morning-checkin/dream) never age out — they can't be recreated if
341     * deleted because install.ts's writeIfMissing() skips existing files.
342     *
343     * `0` = unlimited (tasks never auto-expire).
344     */
345    recurringMaxAgeMs: number
346  }
347  
348  export const DEFAULT_CRON_JITTER_CONFIG: CronJitterConfig = {
349    recurringFrac: 0.1,
350    recurringCapMs: 15 * 60 * 1000,
351    oneShotMaxMs: 90 * 1000,
352    oneShotFloorMs: 0,
353    oneShotMinuteMod: 30,
354    recurringMaxAgeMs: 7 * 24 * 60 * 60 * 1000,
355  }
356  
357  /**
358   * taskId is an 8-hex-char UUID slice (see {@link addCronTask}) → parse as
359   * u32 → [0, 1). Stable across restarts, uniformly distributed across the
360   * fleet. Non-hex ids (hand-edited JSON) fall back to 0 = no jitter.
361   */
362  function jitterFrac(taskId: string): number {
363    const frac = parseInt(taskId.slice(0, 8), 16) / 0x1_0000_0000
364    return Number.isFinite(frac) ? frac : 0
365  }
366  
367  /**
368   * Same as {@link nextCronRunMs}, plus a deterministic per-task delay to
369   * avoid a thundering herd when many sessions schedule the same cron string
370   * (e.g. `0 * * * *` → everyone hits inference at :00).
371   *
372   * The delay is proportional to the current gap between fires
373   * ({@link CronJitterConfig.recurringFrac}, capped at
374   * {@link CronJitterConfig.recurringCapMs}) so at defaults an hourly task
375   * spreads across [:00, :06) but a per-minute task only spreads by a few
376   * seconds.
377   *
378   * Only used for recurring tasks. One-shot tasks use
379   * {@link oneShotJitteredNextCronRunMs} (backward jitter, minute-gated).
380   */
381  export function jitteredNextCronRunMs(
382    cron: string,
383    fromMs: number,
384    taskId: string,
385    cfg: CronJitterConfig = DEFAULT_CRON_JITTER_CONFIG,
386  ): number | null {
387    const t1 = nextCronRunMs(cron, fromMs)
388    if (t1 === null) return null
389    const t2 = nextCronRunMs(cron, t1)
390    // No second match in the next year (e.g. pinned date) → nothing to
391    // proportion against, and near-certainly not a herd risk. Fire on t1.
392    if (t2 === null) return t1
393    const jitter = Math.min(
394      jitterFrac(taskId) * cfg.recurringFrac * (t2 - t1),
395      cfg.recurringCapMs,
396    )
397    return t1 + jitter
398  }
399  
400  /**
401   * Same as {@link nextCronRunMs}, minus a deterministic per-task lead time
402   * when the fire time lands on a minute boundary matching
403   * {@link CronJitterConfig.oneShotMinuteMod}.
404   *
405   * One-shot tasks are user-pinned ("remind me at 3pm") so delaying them
406   * breaks the contract — but firing slightly early is invisible and spreads
407   * the inference spike from everyone picking the same round wall-clock time.
408   * At defaults (mod 30, max 90 s, floor 0) only :00 and :30 get jitter,
409   * because humans round to the half-hour.
410   *
411   * During an incident, ops can push `tengu_kairos_cron_config` with e.g.
412   * `{oneShotMinuteMod: 15, oneShotMaxMs: 300000, oneShotFloorMs: 30000}` to
413   * spread :00/:15/:30/:45 fires across a [t-5min, t-30s] window — every task
414   * gets at least 30 s of lead, so nobody lands on the exact mark.
415   *
416   * Checks the computed fire time rather than the cron string so
417   * `0 15 * * *`, step expressions, and `0,30 9 * * *` all get jitter
418   * when they land on a matching minute. Clamped to `fromMs` so a task created
419   * inside its own jitter window doesn't fire before it was created.
420   */
421  export function oneShotJitteredNextCronRunMs(
422    cron: string,
423    fromMs: number,
424    taskId: string,
425    cfg: CronJitterConfig = DEFAULT_CRON_JITTER_CONFIG,
426  ): number | null {
427    const t1 = nextCronRunMs(cron, fromMs)
428    if (t1 === null) return null
429    // Cron resolution is 1 minute → computed times always have :00 seconds,
430    // so a minute-field check is sufficient to identify the hot marks.
431    // getMinutes() (local), not getUTCMinutes(): cron is evaluated in local
432    // time, and "user picked a round time" means round in *their* TZ. In
433    // half-hour-offset zones (India UTC+5:30) local :00 is UTC :30 — the
434    // UTC check would jitter the wrong marks.
435    if (new Date(t1).getMinutes() % cfg.oneShotMinuteMod !== 0) return t1
436    // floor + frac * (max - floor) → uniform over [floor, max). With floor=0
437    // this reduces to the original frac * max. With floor>0, even a taskId
438    // hashing to 0 gets `floor` ms of lead — nobody fires on the exact mark.
439    const lead =
440      cfg.oneShotFloorMs +
441      jitterFrac(taskId) * (cfg.oneShotMaxMs - cfg.oneShotFloorMs)
442    // t1 > fromMs is guaranteed by nextCronRunMs (strictly after), so the
443    // max() only bites when the task was created inside its own lead window.
444    return Math.max(t1 - lead, fromMs)
445  }
446  
447  /**
448   * A task is "missed" when its next scheduled run (computed from createdAt)
449   * is in the past. Surfaced to the user at startup. Works for both one-shot
450   * and recurring tasks — a recurring task whose window passed while Claude
451   * was down is still "missed".
452   */
453  export function findMissedTasks(tasks: CronTask[], nowMs: number): CronTask[] {
454    return tasks.filter(t => {
455      const next = nextCronRunMs(t.cron, t.createdAt)
456      return next !== null && next < nowMs
457    })
458  }