/ libs / typescript / integrations / codex / src / transcript.ts
transcript.ts
  1  /**
  2   * Transcript parsing utilities for Codex CLI rollout JSONL files.
  3   *
  4   * Codex CLI transcripts use a RolloutLine format defined in
  5   * codex-rs/protocol/src/protocol.rs. Each line is:
  6   *   {"timestamp": "...", "type": "<variant>", "payload": {...}}
  7   *
  8   * Turns are delimited by event_msg task_started / task_complete pairs.
  9   *
 10   * References:
 11   * - Protocol types: github.com/openai/codex codex-rs/protocol/src/protocol.rs
 12   * - Rollout recorder: github.com/openai/codex codex-rs/rollout/src/recorder.rs
 13   */
 14  
 15  import { readFileSync, readdirSync, existsSync } from 'node:fs';
 16  import { join } from 'node:path';
 17  import { homedir } from 'node:os';
 18  import type {
 19    RolloutLine,
 20    ResponseItemPayload,
 21    EventMsgPayload,
 22    SessionMetaPayload,
 23    TokenUsage,
 24    ContentBlock,
 25  } from './types.js';
 26  
 27  export const NANOSECONDS_PER_MS = 1e6;
 28  
 29  /**
 30   * Read and parse a Codex JSONL transcript file.
 31   */
 32  export function readTranscript(path: string): RolloutLine[] {
 33    const content = readFileSync(path, 'utf-8');
 34    return content
 35      .split('\n')
 36      .filter((line) => line.trim())
 37      .map((line) => JSON.parse(line) as RolloutLine);
 38  }
 39  
 40  /**
 41   * Parse an ISO timestamp string to nanoseconds since Unix epoch.
 42   */
 43  export function parseTimestampToNs(timestamp: string | undefined | null): number | null {
 44    if (!timestamp) {
 45      return null;
 46    }
 47    try {
 48      const ms = new Date(timestamp).getTime();
 49      if (isNaN(ms)) {
 50        return null;
 51      }
 52      return ms * NANOSECONDS_PER_MS;
 53    } catch {
 54      return null;
 55    }
 56  }
 57  
 58  /**
 59   * Extract text from a response_item content field.
 60   * Content is an array of ContentBlock objects with type "input_text" or "output_text".
 61   */
 62  export function extractTextFromContent(content: ContentBlock[] | string | undefined): string {
 63    if (!content) {
 64      return '';
 65    }
 66    if (typeof content === 'string') {
 67      return content;
 68    }
 69    if (!Array.isArray(content)) {
 70      return '';
 71    }
 72    return content
 73      .filter((block) => block.type === 'input_text' || block.type === 'output_text')
 74      .map((block) => block.text)
 75      .join('\n');
 76  }
 77  
 78  /**
 79   * Find the last user prompt in the transcript.
 80   * User prompts are response_item records with payload.type=message and payload.role=user
 81   * whose content has input_text blocks that aren't system/developer injections.
 82   */
 83  export function findLastUserPrompt(records: RolloutLine[]): { text: string; index: number } | null {
 84    for (let i = records.length - 1; i >= 0; i--) {
 85      const record = records[i];
 86      if (record.type !== 'response_item') {
 87        continue;
 88      }
 89      const payload = record.payload as ResponseItemPayload;
 90      if (payload.type !== 'message' || payload.role !== 'user') {
 91        continue;
 92      }
 93  
 94      const text = extractTextFromContent(payload.content);
 95      // Skip system/developer context injections (start with XML-like tags)
 96      if (text && !text.startsWith('<')) {
 97        return { text, index: i };
 98      }
 99    }
100    return null;
101  }
102  
103  /**
104   * Extract records belonging to the last turn.
105   * Turns are delimited by event_msg records with type=task_started / task_complete.
106   */
107  export function getLastTurnRecords(records: RolloutLine[]): RolloutLine[] {
108    let lastStart: number | null = null;
109    let lastEnd: number | null = null;
110  
111    for (let i = 0; i < records.length; i++) {
112      if (records[i].type !== 'event_msg') {
113        continue;
114      }
115      const payload = records[i].payload as EventMsgPayload;
116      if (payload.type === 'task_started') {
117        lastStart = i;
118      } else if (payload.type === 'task_complete') {
119        lastEnd = i;
120      }
121    }
122  
123    if (lastStart != null) {
124      // If lastEnd is before lastStart (or missing), the turn is in-progress — slice to end of file
125      const end = lastEnd != null && lastEnd >= lastStart ? lastEnd + 1 : records.length;
126      return records.slice(lastStart, end);
127    }
128    return records;
129  }
130  
131  /**
132   * Extract cumulative token usage from the last token_count event in a set of records.
133   */
134  export function getTokenUsage(records: RolloutLine[]): TokenUsage | null {
135    let usage: TokenUsage | null = null;
136    for (const record of records) {
137      if (record.type !== 'event_msg') {
138        continue;
139      }
140      const payload = record.payload as EventMsgPayload;
141      if (payload.type !== 'token_count') {
142        continue;
143      }
144      if (payload.info?.last_token_usage) {
145        usage = payload.info.last_token_usage;
146      }
147    }
148    return usage;
149  }
150  
151  /**
152   * Extract model name from session_meta or turn_context records.
153   */
154  export function getModel(records: RolloutLine[]): string {
155    for (const record of records) {
156      if (record.type === 'session_meta' || record.type === 'turn_context') {
157        const model = (record.payload as Record<string, unknown>).model;
158        if (typeof model === 'string') {
159          return model;
160        }
161      }
162    }
163    return 'unknown';
164  }
165  
166  /**
167   * Extract session ID from the session_meta record.
168   */
169  export function getSessionId(records: RolloutLine[]): string | null {
170    for (const record of records) {
171      if (record.type === 'session_meta') {
172        return (record.payload as SessionMetaPayload).id ?? null;
173      }
174    }
175    return null;
176  }
177  
178  /**
179   * Build a map from function_call call_id to function_call_output output.
180   */
181  export function buildToolResultMap(records: RolloutLine[]): Record<string, string> {
182    const results: Record<string, string> = {};
183    for (const record of records) {
184      if (record.type !== 'response_item') {
185        continue;
186      }
187      const payload = record.payload as ResponseItemPayload;
188      if (payload.type === 'function_call_output' && payload.call_id) {
189        results[payload.call_id] = payload.output ?? '';
190      }
191    }
192    return results;
193  }
194  
195  /**
196   * Find the transcript rollout file for a given thread ID.
197   *
198   * Codex stores transcripts at:
199   *   ~/.codex/sessions/YYYY/MM/DD/rollout-<timestamp>-<thread-id>.jsonl
200   *
201   * This is optional enrichment — if not found, tracing still works
202   * from the notify payload alone.
203   */
204  export function findTranscriptForThread(threadId: string): string | null {
205    try {
206      const sessionsDir = join(homedir(), '.codex', 'sessions');
207      if (!existsSync(sessionsDir)) {
208        return null;
209      }
210  
211      // Fast path: check today's directory first since the hook fires
212      // right after a turn completes — the transcript is almost always
213      // from the current date.
214      const now = new Date();
215      const year = String(now.getFullYear());
216      const month = String(now.getMonth() + 1).padStart(2, '0');
217      const day = String(now.getDate()).padStart(2, '0');
218      const todayDir = join(sessionsDir, year, month, day);
219  
220      if (existsSync(todayDir)) {
221        const files = readdirSync(todayDir).filter(
222          (f) => f.endsWith('.jsonl') && f.includes(threadId),
223        );
224        if (files.length > 0) {
225          return join(todayDir, files[0]);
226        }
227      }
228  
229      // Slow path: walk year/month/day directories in reverse order.
230      // Only needed if the session started before midnight and the hook
231      // fires after, or the clock is off.
232      const years = readdirSync(sessionsDir).sort().reverse();
233      for (const y of years) {
234        const yearDir = join(sessionsDir, y);
235        const months = readdirSync(yearDir).sort().reverse();
236        for (const m of months) {
237          const monthDir = join(yearDir, m);
238          const days = readdirSync(monthDir).sort().reverse();
239          for (const d of days) {
240            // Skip today's dir — already checked above
241            if (y === year && m === month && d === day) {
242              continue;
243            }
244            const dayDir = join(monthDir, d);
245            const files = readdirSync(dayDir).filter(
246              (f) => f.endsWith('.jsonl') && f.includes(threadId),
247            );
248            if (files.length > 0) {
249              return join(dayDir, files[0]);
250            }
251          }
252        }
253      }
254    } catch {
255      // Transcript lookup is best-effort
256    }
257    return null;
258  }