/ src / utils / codeIndexing.ts
codeIndexing.ts
  1  /**
  2   * Utility functions for detecting code indexing tool usage.
  3   *
  4   * Tracks usage of common code indexing solutions like Sourcegraph, Cody, etc.
  5   * both via CLI commands and MCP server integrations.
  6   */
  7  
  8  /**
  9   * Known code indexing tool identifiers.
 10   * These are the normalized names used in analytics events.
 11   */
 12  export type CodeIndexingTool =
 13    // Code search engines
 14    | 'sourcegraph'
 15    | 'hound'
 16    | 'seagoat'
 17    | 'bloop'
 18    | 'gitloop'
 19    // AI coding assistants with indexing
 20    | 'cody'
 21    | 'aider'
 22    | 'continue'
 23    | 'github-copilot'
 24    | 'cursor'
 25    | 'tabby'
 26    | 'codeium'
 27    | 'tabnine'
 28    | 'augment'
 29    | 'windsurf'
 30    | 'aide'
 31    | 'pieces'
 32    | 'qodo'
 33    | 'amazon-q'
 34    | 'gemini'
 35    // MCP code indexing servers
 36    | 'claude-context'
 37    | 'code-index-mcp'
 38    | 'local-code-search'
 39    | 'autodev-codebase'
 40    // Context providers
 41    | 'openctx'
 42  
 43  /**
 44   * Mapping of CLI command prefixes to code indexing tools.
 45   * The key is the command name (first word of the command).
 46   */
 47  const CLI_COMMAND_MAPPING: Record<string, CodeIndexingTool> = {
 48    // Sourcegraph ecosystem
 49    src: 'sourcegraph',
 50    cody: 'cody',
 51    // AI coding assistants
 52    aider: 'aider',
 53    tabby: 'tabby',
 54    tabnine: 'tabnine',
 55    augment: 'augment',
 56    pieces: 'pieces',
 57    qodo: 'qodo',
 58    aide: 'aide',
 59    // Code search tools
 60    hound: 'hound',
 61    seagoat: 'seagoat',
 62    bloop: 'bloop',
 63    gitloop: 'gitloop',
 64    // Cloud provider AI assistants
 65    q: 'amazon-q',
 66    gemini: 'gemini',
 67  }
 68  
 69  /**
 70   * Mapping of MCP server name patterns to code indexing tools.
 71   * Patterns are matched case-insensitively against the server name.
 72   */
 73  const MCP_SERVER_PATTERNS: Array<{
 74    pattern: RegExp
 75    tool: CodeIndexingTool
 76  }> = [
 77    // Sourcegraph ecosystem
 78    { pattern: /^sourcegraph$/i, tool: 'sourcegraph' },
 79    { pattern: /^cody$/i, tool: 'cody' },
 80    { pattern: /^openctx$/i, tool: 'openctx' },
 81    // AI coding assistants
 82    { pattern: /^aider$/i, tool: 'aider' },
 83    { pattern: /^continue$/i, tool: 'continue' },
 84    { pattern: /^github[-_]?copilot$/i, tool: 'github-copilot' },
 85    { pattern: /^copilot$/i, tool: 'github-copilot' },
 86    { pattern: /^cursor$/i, tool: 'cursor' },
 87    { pattern: /^tabby$/i, tool: 'tabby' },
 88    { pattern: /^codeium$/i, tool: 'codeium' },
 89    { pattern: /^tabnine$/i, tool: 'tabnine' },
 90    { pattern: /^augment[-_]?code$/i, tool: 'augment' },
 91    { pattern: /^augment$/i, tool: 'augment' },
 92    { pattern: /^windsurf$/i, tool: 'windsurf' },
 93    { pattern: /^aide$/i, tool: 'aide' },
 94    { pattern: /^codestory$/i, tool: 'aide' },
 95    { pattern: /^pieces$/i, tool: 'pieces' },
 96    { pattern: /^qodo$/i, tool: 'qodo' },
 97    { pattern: /^amazon[-_]?q$/i, tool: 'amazon-q' },
 98    { pattern: /^gemini[-_]?code[-_]?assist$/i, tool: 'gemini' },
 99    { pattern: /^gemini$/i, tool: 'gemini' },
100    // Code search tools
101    { pattern: /^hound$/i, tool: 'hound' },
102    { pattern: /^seagoat$/i, tool: 'seagoat' },
103    { pattern: /^bloop$/i, tool: 'bloop' },
104    { pattern: /^gitloop$/i, tool: 'gitloop' },
105    // MCP code indexing servers
106    { pattern: /^claude[-_]?context$/i, tool: 'claude-context' },
107    { pattern: /^code[-_]?index[-_]?mcp$/i, tool: 'code-index-mcp' },
108    { pattern: /^code[-_]?index$/i, tool: 'code-index-mcp' },
109    { pattern: /^local[-_]?code[-_]?search$/i, tool: 'local-code-search' },
110    { pattern: /^codebase$/i, tool: 'autodev-codebase' },
111    { pattern: /^autodev[-_]?codebase$/i, tool: 'autodev-codebase' },
112    { pattern: /^code[-_]?context$/i, tool: 'claude-context' },
113  ]
114  
115  /**
116   * Detects if a bash command is using a code indexing CLI tool.
117   *
118   * @param command - The full bash command string
119   * @returns The code indexing tool identifier, or undefined if not a code indexing command
120   *
121   * @example
122   * detectCodeIndexingFromCommand('src search "pattern"') // returns 'sourcegraph'
123   * detectCodeIndexingFromCommand('cody chat --message "help"') // returns 'cody'
124   * detectCodeIndexingFromCommand('ls -la') // returns undefined
125   */
126  export function detectCodeIndexingFromCommand(
127    command: string,
128  ): CodeIndexingTool | undefined {
129    // Extract the first word (command name)
130    const trimmed = command.trim()
131    const firstWord = trimmed.split(/\s+/)[0]?.toLowerCase()
132  
133    if (!firstWord) {
134      return undefined
135    }
136  
137    // Check for npx/bunx prefixed commands
138    if (firstWord === 'npx' || firstWord === 'bunx') {
139      const secondWord = trimmed.split(/\s+/)[1]?.toLowerCase()
140      if (secondWord && secondWord in CLI_COMMAND_MAPPING) {
141        return CLI_COMMAND_MAPPING[secondWord]
142      }
143    }
144  
145    return CLI_COMMAND_MAPPING[firstWord]
146  }
147  
148  /**
149   * Detects if an MCP tool is from a code indexing server.
150   *
151   * @param toolName - The MCP tool name (format: mcp__serverName__toolName)
152   * @returns The code indexing tool identifier, or undefined if not a code indexing tool
153   *
154   * @example
155   * detectCodeIndexingFromMcpTool('mcp__sourcegraph__search') // returns 'sourcegraph'
156   * detectCodeIndexingFromMcpTool('mcp__cody__chat') // returns 'cody'
157   * detectCodeIndexingFromMcpTool('mcp__filesystem__read') // returns undefined
158   */
159  export function detectCodeIndexingFromMcpTool(
160    toolName: string,
161  ): CodeIndexingTool | undefined {
162    // MCP tool names follow the format: mcp__serverName__toolName
163    if (!toolName.startsWith('mcp__')) {
164      return undefined
165    }
166  
167    const parts = toolName.split('__')
168    if (parts.length < 3) {
169      return undefined
170    }
171  
172    const serverName = parts[1]
173    if (!serverName) {
174      return undefined
175    }
176  
177    for (const { pattern, tool } of MCP_SERVER_PATTERNS) {
178      if (pattern.test(serverName)) {
179        return tool
180      }
181    }
182  
183    return undefined
184  }
185  
186  /**
187   * Detects if an MCP server name corresponds to a code indexing tool.
188   *
189   * @param serverName - The MCP server name
190   * @returns The code indexing tool identifier, or undefined if not a code indexing server
191   *
192   * @example
193   * detectCodeIndexingFromMcpServerName('sourcegraph') // returns 'sourcegraph'
194   * detectCodeIndexingFromMcpServerName('filesystem') // returns undefined
195   */
196  export function detectCodeIndexingFromMcpServerName(
197    serverName: string,
198  ): CodeIndexingTool | undefined {
199    for (const { pattern, tool } of MCP_SERVER_PATTERNS) {
200      if (pattern.test(serverName)) {
201        return tool
202      }
203    }
204  
205    return undefined
206  }