/ src / utils / contextAnalysis.ts
contextAnalysis.ts
  1  import type { BetaContentBlock } from '@anthropic-ai/sdk/resources/beta/messages/messages.mjs'
  2  import type {
  3    ContentBlock,
  4    ContentBlockParam,
  5  } from '@anthropic-ai/sdk/resources/index.mjs'
  6  import { roughTokenCountEstimation as countTokens } from '../services/tokenEstimation.js'
  7  import type {
  8    AssistantMessage,
  9    Message,
 10    UserMessage,
 11  } from '../types/message.js'
 12  import { normalizeMessagesForAPI } from './messages.js'
 13  import { jsonStringify } from './slowOperations.js'
 14  
 15  type TokenStats = {
 16    toolRequests: Map<string, number>
 17    toolResults: Map<string, number>
 18    humanMessages: number
 19    assistantMessages: number
 20    localCommandOutputs: number
 21    other: number
 22    attachments: Map<string, number>
 23    duplicateFileReads: Map<string, { count: number; tokens: number }>
 24    total: number
 25  }
 26  
 27  export function analyzeContext(messages: Message[]): TokenStats {
 28    const stats: TokenStats = {
 29      toolRequests: new Map(),
 30      toolResults: new Map(),
 31      humanMessages: 0,
 32      assistantMessages: 0,
 33      localCommandOutputs: 0,
 34      other: 0,
 35      attachments: new Map(),
 36      duplicateFileReads: new Map(),
 37      total: 0,
 38    }
 39  
 40    const toolIdsToToolNames = new Map<string, string>()
 41    const readToolIdToFilePath = new Map<string, string>()
 42    const fileReadStats = new Map<
 43      string,
 44      { count: number; totalTokens: number }
 45    >()
 46  
 47    messages.forEach(msg => {
 48      if (msg.type === 'attachment') {
 49        const type = msg.attachment.type || 'unknown'
 50        stats.attachments.set(type, (stats.attachments.get(type) || 0) + 1)
 51      }
 52    })
 53  
 54    const normalizedMessages = normalizeMessagesForAPI(messages)
 55    normalizedMessages.forEach(msg => {
 56      const { content } = msg.message
 57  
 58      // Not sure if this path is still used, but adding as a fallback
 59      if (typeof content === 'string') {
 60        const tokens = countTokens(content)
 61        stats.total += tokens
 62        // Check if this is a local command output
 63        if (msg.type === 'user' && content.includes('local-command-stdout')) {
 64          stats.localCommandOutputs += tokens
 65        } else {
 66          stats[msg.type === 'user' ? 'humanMessages' : 'assistantMessages'] +=
 67            tokens
 68        }
 69      } else {
 70        content.forEach(block =>
 71          processBlock(
 72            block,
 73            msg,
 74            stats,
 75            toolIdsToToolNames,
 76            readToolIdToFilePath,
 77            fileReadStats,
 78          ),
 79        )
 80      }
 81    })
 82  
 83    // Calculate duplicate file reads
 84    fileReadStats.forEach((data, path) => {
 85      if (data.count > 1) {
 86        const averageTokensPerRead = Math.floor(data.totalTokens / data.count)
 87        const duplicateTokens = averageTokensPerRead * (data.count - 1)
 88  
 89        stats.duplicateFileReads.set(path, {
 90          count: data.count,
 91          tokens: duplicateTokens,
 92        })
 93      }
 94    })
 95  
 96    return stats
 97  }
 98  
 99  function processBlock(
100    block: ContentBlockParam | ContentBlock | BetaContentBlock,
101    message: UserMessage | AssistantMessage,
102    stats: TokenStats,
103    toolIds: Map<string, string>,
104    readToolPaths: Map<string, string>,
105    fileReads: Map<string, { count: number; totalTokens: number }>,
106  ): void {
107    const tokens = countTokens(jsonStringify(block))
108    stats.total += tokens
109  
110    switch (block.type) {
111      case 'text':
112        // Check if this is a local command output
113        if (
114          message.type === 'user' &&
115          'text' in block &&
116          block.text.includes('local-command-stdout')
117        ) {
118          stats.localCommandOutputs += tokens
119        } else {
120          stats[
121            message.type === 'user' ? 'humanMessages' : 'assistantMessages'
122          ] += tokens
123        }
124        break
125  
126      case 'tool_use': {
127        if ('name' in block && 'id' in block) {
128          const toolName = block.name || 'unknown'
129          increment(stats.toolRequests, toolName, tokens)
130          toolIds.set(block.id, toolName)
131  
132          // Track Read tool file paths
133          if (
134            toolName === 'Read' &&
135            'input' in block &&
136            block.input &&
137            typeof block.input === 'object' &&
138            'file_path' in block.input
139          ) {
140            const path = String(
141              (block.input as Record<string, unknown>).file_path,
142            )
143            readToolPaths.set(block.id, path)
144          }
145        }
146        break
147      }
148  
149      case 'tool_result': {
150        if ('tool_use_id' in block) {
151          const toolName = toolIds.get(block.tool_use_id) || 'unknown'
152          increment(stats.toolResults, toolName, tokens)
153  
154          // Track file read tokens
155          if (toolName === 'Read') {
156            const path = readToolPaths.get(block.tool_use_id)
157            if (path) {
158              const current = fileReads.get(path) || { count: 0, totalTokens: 0 }
159              fileReads.set(path, {
160                count: current.count + 1,
161                totalTokens: current.totalTokens + tokens,
162              })
163            }
164          }
165        }
166        break
167      }
168  
169      case 'image':
170      case 'server_tool_use':
171      case 'web_search_tool_result':
172      case 'search_result':
173      case 'document':
174      case 'thinking':
175      case 'redacted_thinking':
176      case 'code_execution_tool_result':
177      case 'mcp_tool_use':
178      case 'mcp_tool_result':
179      case 'container_upload':
180      case 'web_fetch_tool_result':
181      case 'bash_code_execution_tool_result':
182      case 'text_editor_code_execution_tool_result':
183      case 'tool_search_tool_result':
184      case 'compaction':
185        // Don't care about these for now..
186        stats['other'] += tokens
187        break
188    }
189  }
190  
191  function increment(map: Map<string, number>, key: string, value: number): void {
192    map.set(key, (map.get(key) || 0) + value)
193  }
194  
195  export function tokenStatsToStatsigMetrics(
196    stats: TokenStats,
197  ): Record<string, number> {
198    const metrics: Record<string, number> = {
199      total_tokens: stats.total,
200      human_message_tokens: stats.humanMessages,
201      assistant_message_tokens: stats.assistantMessages,
202      local_command_output_tokens: stats.localCommandOutputs,
203      other_tokens: stats.other,
204    }
205  
206    stats.attachments.forEach((count, type) => {
207      metrics[`attachment_${type}_count`] = count
208    })
209  
210    stats.toolRequests.forEach((tokens, tool) => {
211      metrics[`tool_request_${tool}_tokens`] = tokens
212    })
213  
214    stats.toolResults.forEach((tokens, tool) => {
215      metrics[`tool_result_${tool}_tokens`] = tokens
216    })
217  
218    const duplicateTotal = [...stats.duplicateFileReads.values()].reduce(
219      (sum, d) => sum + d.tokens,
220      0,
221    )
222  
223    metrics.duplicate_read_tokens = duplicateTotal
224    metrics.duplicate_read_file_count = stats.duplicateFileReads.size
225  
226    if (stats.total > 0) {
227      metrics.human_message_percent = Math.round(
228        (stats.humanMessages / stats.total) * 100,
229      )
230      metrics.assistant_message_percent = Math.round(
231        (stats.assistantMessages / stats.total) * 100,
232      )
233      metrics.local_command_output_percent = Math.round(
234        (stats.localCommandOutputs / stats.total) * 100,
235      )
236      metrics.duplicate_read_percent = Math.round(
237        (duplicateTotal / stats.total) * 100,
238      )
239  
240      const toolRequestTotal = [...stats.toolRequests.values()].reduce(
241        (sum, v) => sum + v,
242        0,
243      )
244      const toolResultTotal = [...stats.toolResults.values()].reduce(
245        (sum, v) => sum + v,
246        0,
247      )
248  
249      metrics.tool_request_percent = Math.round(
250        (toolRequestTotal / stats.total) * 100,
251      )
252      metrics.tool_result_percent = Math.round(
253        (toolResultTotal / stats.total) * 100,
254      )
255  
256      // Add individual tool request percentages
257      stats.toolRequests.forEach((tokens, tool) => {
258        metrics[`tool_request_${tool}_percent`] = Math.round(
259          (tokens / stats.total) * 100,
260        )
261      })
262  
263      // Add individual tool result percentages
264      stats.toolResults.forEach((tokens, tool) => {
265        metrics[`tool_result_${tool}_percent`] = Math.round(
266          (tokens / stats.total) * 100,
267        )
268      })
269    }
270  
271    return metrics
272  }