/ utils / cronTasksLock.ts
cronTasksLock.ts
  1  // Scheduler lease lock for .claude/scheduled_tasks.json.
  2  //
  3  // When multiple Claude sessions run in the same project directory, only one
  4  // should drive the cron scheduler. The first session to acquire this lock
  5  // becomes the scheduler; others stay passive and periodically probe the lock.
  6  // If the owner dies (PID no longer running), a passive session takes over.
  7  //
  8  // Pattern mirrors computerUseLock.ts: O_EXCL atomic create, PID liveness
  9  // probe, stale-lock recovery, cleanup-on-exit.
 10  
 11  import { mkdir, readFile, unlink, writeFile } from 'fs/promises'
 12  import { dirname, join } from 'path'
 13  import { z } from 'zod/v4'
 14  import { getProjectRoot, getSessionId } from '../bootstrap/state.js'
 15  import { registerCleanup } from './cleanupRegistry.js'
 16  import { logForDebugging } from './debug.js'
 17  import { getErrnoCode } from './errors.js'
 18  import { isProcessRunning } from './genericProcessUtils.js'
 19  import { safeParseJSON } from './json.js'
 20  import { lazySchema } from './lazySchema.js'
 21  import { jsonStringify } from './slowOperations.js'
 22  
 23  const LOCK_FILE_REL = join('.claude', 'scheduled_tasks.lock')
 24  
 25  const schedulerLockSchema = lazySchema(() =>
 26    z.object({
 27      sessionId: z.string(),
 28      pid: z.number(),
 29      acquiredAt: z.number(),
 30    }),
 31  )
 32  type SchedulerLock = z.infer<ReturnType<typeof schedulerLockSchema>>
 33  
 34  /**
 35   * Options for out-of-REPL callers (Agent SDK daemon) that don't have
 36   * bootstrap state. When omitted, falls back to getProjectRoot() +
 37   * getSessionId() as before. lockIdentity should be stable for the lifetime
 38   * of one daemon process (e.g. a randomUUID() captured at startup).
 39   */
 40  export type SchedulerLockOptions = {
 41    dir?: string
 42    lockIdentity?: string
 43  }
 44  
 45  let unregisterCleanup: (() => void) | undefined
 46  // Suppress repeat "held by X" log lines when polling a live owner.
 47  let lastBlockedBy: string | undefined
 48  
 49  function getLockPath(dir?: string): string {
 50    return join(dir ?? getProjectRoot(), LOCK_FILE_REL)
 51  }
 52  
 53  async function readLock(dir?: string): Promise<SchedulerLock | undefined> {
 54    let raw: string
 55    try {
 56      raw = await readFile(getLockPath(dir), 'utf8')
 57    } catch {
 58      return undefined
 59    }
 60    const result = schedulerLockSchema().safeParse(safeParseJSON(raw, false))
 61    return result.success ? result.data : undefined
 62  }
 63  
 64  async function tryCreateExclusive(
 65    lock: SchedulerLock,
 66    dir?: string,
 67  ): Promise<boolean> {
 68    const path = getLockPath(dir)
 69    const body = jsonStringify(lock)
 70    try {
 71      await writeFile(path, body, { flag: 'wx' })
 72      return true
 73    } catch (e: unknown) {
 74      const code = getErrnoCode(e)
 75      if (code === 'EEXIST') return false
 76      if (code === 'ENOENT') {
 77        // .claude/ doesn't exist yet — create it and retry once. In steady
 78        // state the dir already exists (scheduled_tasks.json lives there),
 79        // so this path is hit at most once.
 80        await mkdir(dirname(path), { recursive: true })
 81        try {
 82          await writeFile(path, body, { flag: 'wx' })
 83          return true
 84        } catch (retryErr: unknown) {
 85          if (getErrnoCode(retryErr) === 'EEXIST') return false
 86          throw retryErr
 87        }
 88      }
 89      throw e
 90    }
 91  }
 92  
 93  function registerLockCleanup(opts?: SchedulerLockOptions): void {
 94    unregisterCleanup?.()
 95    unregisterCleanup = registerCleanup(async () => {
 96      await releaseSchedulerLock(opts)
 97    })
 98  }
 99  
100  /**
101   * Try to acquire the scheduler lock for the current session.
102   * Returns true on success, false if another live session holds it.
103   *
104   * Uses O_EXCL ('wx') for atomic test-and-set. If the file exists:
105   *   - Already ours → true (idempotent re-acquire)
106   *   - Another live PID → false
107   *   - Stale (PID dead / corrupt) → unlink and retry exclusive create once
108   *
109   * If two sessions race to recover a stale lock, only one create succeeds.
110   */
111  export async function tryAcquireSchedulerLock(
112    opts?: SchedulerLockOptions,
113  ): Promise<boolean> {
114    const dir = opts?.dir
115    // "sessionId" in the lock file is really just a stable owner key. REPL
116    // uses getSessionId(); daemon callers supply their own UUID. PID remains
117    // the liveness signal regardless.
118    const sessionId = opts?.lockIdentity ?? getSessionId()
119    const lock: SchedulerLock = {
120      sessionId,
121      pid: process.pid,
122      acquiredAt: Date.now(),
123    }
124  
125    if (await tryCreateExclusive(lock, dir)) {
126      lastBlockedBy = undefined
127      registerLockCleanup(opts)
128      logForDebugging(
129        `[ScheduledTasks] acquired scheduler lock (PID ${process.pid})`,
130      )
131      return true
132    }
133  
134    const existing = await readLock(dir)
135  
136    // Already ours (idempotent). After --resume the session ID is restored
137    // but the process has a new PID — update the lock file so other sessions
138    // see a live PID and don't steal it.
139    if (existing?.sessionId === sessionId) {
140      if (existing.pid !== process.pid) {
141        await writeFile(getLockPath(dir), jsonStringify(lock))
142        registerLockCleanup(opts)
143      }
144      return true
145    }
146  
147    // Corrupt or unparseable — treat as stale.
148    // Another live session — blocked.
149    if (existing && isProcessRunning(existing.pid)) {
150      if (lastBlockedBy !== existing.sessionId) {
151        lastBlockedBy = existing.sessionId
152        logForDebugging(
153          `[ScheduledTasks] scheduler lock held by session ${existing.sessionId} (PID ${existing.pid})`,
154        )
155      }
156      return false
157    }
158  
159    // Stale — unlink and retry the exclusive create once.
160    if (existing) {
161      logForDebugging(
162        `[ScheduledTasks] recovering stale scheduler lock from PID ${existing.pid}`,
163      )
164    }
165    await unlink(getLockPath(dir)).catch(() => {})
166    if (await tryCreateExclusive(lock, dir)) {
167      lastBlockedBy = undefined
168      registerLockCleanup(opts)
169      return true
170    }
171    // Another session won the recovery race.
172    return false
173  }
174  
175  /**
176   * Release the scheduler lock if the current session owns it.
177   */
178  export async function releaseSchedulerLock(
179    opts?: SchedulerLockOptions,
180  ): Promise<void> {
181    unregisterCleanup?.()
182    unregisterCleanup = undefined
183    lastBlockedBy = undefined
184  
185    const dir = opts?.dir
186    const sessionId = opts?.lockIdentity ?? getSessionId()
187    const existing = await readLock(dir)
188    if (!existing || existing.sessionId !== sessionId) return
189    try {
190      await unlink(getLockPath(dir))
191      logForDebugging('[ScheduledTasks] released scheduler lock')
192    } catch {
193      // Already gone.
194    }
195  }