/ utils / listSessionsImpl.ts
listSessionsImpl.ts
  1  /**
  2   * Standalone implementation of listSessions for the Agent SDK.
  3   *
  4   * Dependencies are kept minimal and portable — no bootstrap/state.ts,
  5   * no analytics, no bun:bundle, no module-scope mutable state. This module
  6   * can be imported safely from the SDK entrypoint without triggering CLI
  7   * initialization or pulling in expensive dependency chains.
  8   */
  9  
 10  import type { Dirent } from 'fs'
 11  import { readdir, stat } from 'fs/promises'
 12  import { basename, join } from 'path'
 13  import { getWorktreePathsPortable } from './getWorktreePathsPortable.js'
 14  import type { LiteSessionFile } from './sessionStoragePortable.js'
 15  import {
 16    canonicalizePath,
 17    extractFirstPromptFromHead,
 18    extractJsonStringField,
 19    extractLastJsonStringField,
 20    findProjectDir,
 21    getProjectsDir,
 22    MAX_SANITIZED_LENGTH,
 23    readSessionLite,
 24    sanitizePath,
 25    validateUuid,
 26  } from './sessionStoragePortable.js'
 27  
 28  /**
 29   * Session metadata returned by listSessions.
 30   * Contains only data extractable from stat + head/tail reads — no full
 31   * JSONL parsing required.
 32   */
 33  export type SessionInfo = {
 34    sessionId: string
 35    summary: string
 36    lastModified: number
 37    fileSize?: number
 38    customTitle?: string
 39    firstPrompt?: string
 40    gitBranch?: string
 41    cwd?: string
 42    tag?: string
 43    /** Epoch ms — from first entry's ISO timestamp. Undefined if unparseable. */
 44    createdAt?: number
 45  }
 46  
 47  export type ListSessionsOptions = {
 48    /**
 49     * Directory to list sessions for. When provided, returns sessions for
 50     * this project directory (and optionally its git worktrees). When omitted,
 51     * returns sessions across all projects.
 52     */
 53    dir?: string
 54    /** Maximum number of sessions to return. */
 55    limit?: number
 56    /**
 57     * Number of sessions to skip from the start of the sorted result set.
 58     * Use with `limit` for pagination. Defaults to 0.
 59     */
 60    offset?: number
 61    /**
 62     * When `dir` is provided and the directory is inside a git repository,
 63     * include sessions from all git worktree paths. Defaults to `true`.
 64     */
 65    includeWorktrees?: boolean
 66  }
 67  
 68  // ---------------------------------------------------------------------------
 69  // Field extraction — shared by listSessionsImpl and getSessionInfoImpl
 70  // ---------------------------------------------------------------------------
 71  
 72  /**
 73   * Parses SessionInfo fields from a lite session read (head/tail/stat).
 74   * Returns null for sidechain sessions or metadata-only sessions with no
 75   * extractable summary.
 76   *
 77   * Exported for reuse by getSessionInfoImpl.
 78   */
 79  export function parseSessionInfoFromLite(
 80    sessionId: string,
 81    lite: LiteSessionFile,
 82    projectPath?: string,
 83  ): SessionInfo | null {
 84    const { head, tail, mtime, size } = lite
 85  
 86    // Check first line for sidechain sessions
 87    const firstNewline = head.indexOf('\n')
 88    const firstLine = firstNewline >= 0 ? head.slice(0, firstNewline) : head
 89    if (
 90      firstLine.includes('"isSidechain":true') ||
 91      firstLine.includes('"isSidechain": true')
 92    ) {
 93      return null
 94    }
 95    // User title (customTitle) wins over AI title (aiTitle); distinct
 96    // field names mean extractLastJsonStringField naturally disambiguates.
 97    const customTitle =
 98      extractLastJsonStringField(tail, 'customTitle') ||
 99      extractLastJsonStringField(head, 'customTitle') ||
100      extractLastJsonStringField(tail, 'aiTitle') ||
101      extractLastJsonStringField(head, 'aiTitle') ||
102      undefined
103    const firstPrompt = extractFirstPromptFromHead(head) || undefined
104    // First entry's ISO timestamp → epoch ms. More reliable than
105    // stat().birthtime which is unsupported on some filesystems.
106    const firstTimestamp = extractJsonStringField(head, 'timestamp')
107    let createdAt: number | undefined
108    if (firstTimestamp) {
109      const parsed = Date.parse(firstTimestamp)
110      if (!Number.isNaN(parsed)) createdAt = parsed
111    }
112    // last-prompt tail entry (captured by extractFirstPrompt at write
113    // time, filtered) shows what the user was most recently doing.
114    // Head scan is fallback for sessions without a last-prompt entry.
115    const summary =
116      customTitle ||
117      extractLastJsonStringField(tail, 'lastPrompt') ||
118      extractLastJsonStringField(tail, 'summary') ||
119      firstPrompt
120  
121    // Skip metadata-only sessions (no title, no summary, no prompt)
122    if (!summary) return null
123    const gitBranch =
124      extractLastJsonStringField(tail, 'gitBranch') ||
125      extractJsonStringField(head, 'gitBranch') ||
126      undefined
127    const sessionCwd =
128      extractJsonStringField(head, 'cwd') || projectPath || undefined
129    // Type-scope tag extraction to the {"type":"tag"} JSONL line to avoid
130    // collision with tool_use inputs containing a `tag` parameter (git tag,
131    // Docker tags, cloud resource tags). Mirrors sessionStorage.ts:608.
132    const tagLine = tail.split('\n').findLast(l => l.startsWith('{"type":"tag"'))
133    const tag = tagLine
134      ? extractLastJsonStringField(tagLine, 'tag') || undefined
135      : undefined
136  
137    return {
138      sessionId,
139      summary,
140      lastModified: mtime,
141      fileSize: size,
142      customTitle,
143      firstPrompt,
144      gitBranch,
145      cwd: sessionCwd,
146      tag,
147      createdAt,
148    }
149  }
150  
151  // ---------------------------------------------------------------------------
152  // Candidate discovery — stat-only pass. Cheap: 1 syscall per file, no
153  // data reads. Lets us sort/filter before doing expensive head/tail reads.
154  // ---------------------------------------------------------------------------
155  
156  type Candidate = {
157    sessionId: string
158    filePath: string
159    mtime: number
160    /** Project path for cwd fallback when file lacks a cwd field. */
161    projectPath?: string
162  }
163  
164  /**
165   * Lists candidate session files in a directory via readdir, optionally
166   * stat'ing each for mtime. When `doStat` is false, mtime is set to 0
167   * (caller must sort/dedup after reading file contents instead).
168   */
169  export async function listCandidates(
170    projectDir: string,
171    doStat: boolean,
172    projectPath?: string,
173  ): Promise<Candidate[]> {
174    let names: string[]
175    try {
176      names = await readdir(projectDir)
177    } catch {
178      return []
179    }
180  
181    const results = await Promise.all(
182      names.map(async (name): Promise<Candidate | null> => {
183        if (!name.endsWith('.jsonl')) return null
184        const sessionId = validateUuid(name.slice(0, -6))
185        if (!sessionId) return null
186        const filePath = join(projectDir, name)
187        if (!doStat) return { sessionId, filePath, mtime: 0, projectPath }
188        try {
189          const s = await stat(filePath)
190          return { sessionId, filePath, mtime: s.mtime.getTime(), projectPath }
191        } catch {
192          return null
193        }
194      }),
195    )
196  
197    return results.filter((c): c is Candidate => c !== null)
198  }
199  
200  /**
201   * Reads a candidate's file contents and extracts full SessionInfo.
202   * Returns null if the session should be filtered out (sidechain, no summary).
203   */
204  async function readCandidate(c: Candidate): Promise<SessionInfo | null> {
205    const lite = await readSessionLite(c.filePath)
206    if (!lite) return null
207  
208    const info = parseSessionInfoFromLite(c.sessionId, lite, c.projectPath)
209    if (!info) return null
210  
211    // Prefer stat-pass mtime for sort-key consistency; fall back to
212    // lite.mtime when doStat=false (c.mtime is 0 placeholder).
213    if (c.mtime) info.lastModified = c.mtime
214  
215    return info
216  }
217  
218  // ---------------------------------------------------------------------------
219  // Sort + limit — batch-read candidates in sorted order until `limit`
220  // survivors are collected (some candidates filter out on full read).
221  // ---------------------------------------------------------------------------
222  
223  /** Batch size for concurrent reads when walking the sorted candidate list. */
224  const READ_BATCH_SIZE = 32
225  
226  /**
227   * Sort comparator: lastModified desc, then sessionId desc for stable
228   * ordering across mtime ties.
229   */
230  function compareDesc(a: Candidate, b: Candidate): number {
231    if (b.mtime !== a.mtime) return b.mtime - a.mtime
232    return b.sessionId < a.sessionId ? -1 : b.sessionId > a.sessionId ? 1 : 0
233  }
234  
235  async function applySortAndLimit(
236    candidates: Candidate[],
237    limit: number | undefined,
238    offset: number,
239  ): Promise<SessionInfo[]> {
240    candidates.sort(compareDesc)
241  
242    const sessions: SessionInfo[] = []
243    // limit: 0 means "no limit" (matches getSessionMessages semantics)
244    const want = limit && limit > 0 ? limit : Infinity
245    let skipped = 0
246    // Dedup post-filter: since candidates are sorted mtime-desc, the first
247    // non-null read per sessionId is naturally the newest valid copy.
248    // Pre-filter dedup would drop a session entirely if its newest-mtime
249    // copy is unreadable/empty, diverging from the no-stat readAllAndSort path.
250    const seen = new Set<string>()
251  
252    for (let i = 0; i < candidates.length && sessions.length < want; ) {
253      const batchEnd = Math.min(i + READ_BATCH_SIZE, candidates.length)
254      const batch = candidates.slice(i, batchEnd)
255      const results = await Promise.all(batch.map(readCandidate))
256      for (let j = 0; j < results.length && sessions.length < want; j++) {
257        i++
258        const r = results[j]
259        if (!r) continue
260        if (seen.has(r.sessionId)) continue
261        seen.add(r.sessionId)
262        if (skipped < offset) {
263          skipped++
264          continue
265        }
266        sessions.push(r)
267      }
268    }
269  
270    return sessions
271  }
272  
273  /**
274   * Read-all path for when no limit/offset is set. Skips the stat pass
275   * entirely — reads every candidate, then sorts/dedups on real mtimes
276   * from readSessionLite. Matches pre-refactor I/O cost (no extra stats).
277   */
278  async function readAllAndSort(candidates: Candidate[]): Promise<SessionInfo[]> {
279    const all = await Promise.all(candidates.map(readCandidate))
280    const byId = new Map<string, SessionInfo>()
281    for (const s of all) {
282      if (!s) continue
283      const existing = byId.get(s.sessionId)
284      if (!existing || s.lastModified > existing.lastModified) {
285        byId.set(s.sessionId, s)
286      }
287    }
288    const sessions = [...byId.values()]
289    sessions.sort((a, b) =>
290      b.lastModified !== a.lastModified
291        ? b.lastModified - a.lastModified
292        : b.sessionId < a.sessionId
293          ? -1
294          : b.sessionId > a.sessionId
295            ? 1
296            : 0,
297    )
298    return sessions
299  }
300  
301  // ---------------------------------------------------------------------------
302  // Project directory enumeration (single-project vs all-projects)
303  // ---------------------------------------------------------------------------
304  
305  /**
306   * Gathers candidate session files for a specific project directory
307   * (and optionally its git worktrees).
308   */
309  async function gatherProjectCandidates(
310    dir: string,
311    includeWorktrees: boolean,
312    doStat: boolean,
313  ): Promise<Candidate[]> {
314    const canonicalDir = await canonicalizePath(dir)
315  
316    let worktreePaths: string[]
317    if (includeWorktrees) {
318      try {
319        worktreePaths = await getWorktreePathsPortable(canonicalDir)
320      } catch {
321        worktreePaths = []
322      }
323    } else {
324      worktreePaths = []
325    }
326  
327    // No worktrees (or git not available / scanning disabled) — just scan the single project dir
328    if (worktreePaths.length <= 1) {
329      const projectDir = await findProjectDir(canonicalDir)
330      if (!projectDir) return []
331      return listCandidates(projectDir, doStat, canonicalDir)
332    }
333  
334    // Worktree-aware scanning: find all project dirs matching any worktree
335    const projectsDir = getProjectsDir()
336    const caseInsensitive = process.platform === 'win32'
337  
338    // Sort worktree paths by sanitized prefix length (longest first) so
339    // more specific matches take priority over shorter ones
340    const indexed = worktreePaths.map(wt => {
341      const sanitized = sanitizePath(wt)
342      return {
343        path: wt,
344        prefix: caseInsensitive ? sanitized.toLowerCase() : sanitized,
345      }
346    })
347    indexed.sort((a, b) => b.prefix.length - a.prefix.length)
348  
349    let allDirents: Dirent[]
350    try {
351      allDirents = await readdir(projectsDir, { withFileTypes: true })
352    } catch {
353      // Fall back to single project dir
354      const projectDir = await findProjectDir(canonicalDir)
355      if (!projectDir) return []
356      return listCandidates(projectDir, doStat, canonicalDir)
357    }
358  
359    const all: Candidate[] = []
360    const seenDirs = new Set<string>()
361  
362    // Always include the user's actual directory (handles subdirectories
363    // like /repo/packages/my-app that won't match worktree root prefixes)
364    const canonicalProjectDir = await findProjectDir(canonicalDir)
365    if (canonicalProjectDir) {
366      const dirBase = basename(canonicalProjectDir)
367      seenDirs.add(caseInsensitive ? dirBase.toLowerCase() : dirBase)
368      all.push(
369        ...(await listCandidates(canonicalProjectDir, doStat, canonicalDir)),
370      )
371    }
372  
373    for (const dirent of allDirents) {
374      if (!dirent.isDirectory()) continue
375      const dirName = caseInsensitive ? dirent.name.toLowerCase() : dirent.name
376      if (seenDirs.has(dirName)) continue
377  
378      for (const { path: wtPath, prefix } of indexed) {
379        // Only use startsWith for truncated paths (>MAX_SANITIZED_LENGTH) where
380        // a hash suffix follows. For short paths, require exact match to avoid
381        // /root/project matching /root/project-foo.
382        const isMatch =
383          dirName === prefix ||
384          (prefix.length >= MAX_SANITIZED_LENGTH &&
385            dirName.startsWith(prefix + '-'))
386        if (isMatch) {
387          seenDirs.add(dirName)
388          all.push(
389            ...(await listCandidates(
390              join(projectsDir, dirent.name),
391              doStat,
392              wtPath,
393            )),
394          )
395          break
396        }
397      }
398    }
399  
400    return all
401  }
402  
403  /**
404   * Gathers candidate session files across all project directories.
405   */
406  async function gatherAllCandidates(doStat: boolean): Promise<Candidate[]> {
407    const projectsDir = getProjectsDir()
408  
409    let dirents: Dirent[]
410    try {
411      dirents = await readdir(projectsDir, { withFileTypes: true })
412    } catch {
413      return []
414    }
415  
416    const perProject = await Promise.all(
417      dirents
418        .filter(d => d.isDirectory())
419        .map(d => listCandidates(join(projectsDir, d.name), doStat)),
420    )
421  
422    return perProject.flat()
423  }
424  
425  /**
426   * Lists sessions with metadata extracted from stat + head/tail reads.
427   *
428   * When `dir` is provided, returns sessions for that project directory
429   * and its git worktrees. When omitted, returns sessions across all
430   * projects.
431   *
432   * Pagination via `limit`/`offset` operates on the filtered, sorted result
433   * set. When either is set, a cheap stat-only pass sorts candidates before
434   * expensive head/tail reads — so `limit: 20` on a directory with 1000
435   * sessions does ~1000 stats + ~20 content reads, not 1000 content reads.
436   * When neither is set, stat is skipped (read-all-then-sort, same I/O cost
437   * as the original implementation).
438   */
439  export async function listSessionsImpl(
440    options?: ListSessionsOptions,
441  ): Promise<SessionInfo[]> {
442    const { dir, limit, offset, includeWorktrees } = options ?? {}
443    const off = offset ?? 0
444    // Only stat when we need to sort before reading (won't read all anyway).
445    // limit: 0 means "no limit" (see applySortAndLimit), so treat it as unset.
446    const doStat = (limit !== undefined && limit > 0) || off > 0
447  
448    const candidates = dir
449      ? await gatherProjectCandidates(dir, includeWorktrees ?? true, doStat)
450      : await gatherAllCandidates(doStat)
451  
452    if (!doStat) return readAllAndSort(candidates)
453    return applySortAndLimit(candidates, limit, off)
454  }