/ tools / shared / gitOperationTracking.ts
gitOperationTracking.ts
  1  /**
  2   * Shell-agnostic git operation tracking for usage metrics.
  3   *
  4   * Detects `git commit`, `git push`, `gh pr create`, `glab mr create`, and
  5   * curl-based PR creation in command strings, then increments OTLP counters
  6   * and fires analytics events. The regexes operate on raw command text so they
  7   * work identically for Bash and PowerShell (both invoke git/gh/glab/curl as
  8   * external binaries with the same argv syntax).
  9   */
 10  
 11  import { getCommitCounter, getPrCounter } from '../../bootstrap/state.js'
 12  import {
 13    type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
 14    logEvent,
 15  } from '../../services/analytics/index.js'
 16  
 17  /**
 18   * Build a regex that matches `git <subcmd>` while tolerating git's global
 19   * options between `git` and the subcommand (e.g. `-c key=val`, `-C path`,
 20   * `--git-dir=path`). Common when the model retries with
 21   * `git -c commit.gpgsign=false commit` after a signing failure.
 22   */
 23  function gitCmdRe(subcmd: string, suffix = ''): RegExp {
 24    return new RegExp(
 25      `\\bgit(?:\\s+-[cC]\\s+\\S+|\\s+--\\S+=\\S+)*\\s+${subcmd}\\b${suffix}`,
 26    )
 27  }
 28  
 29  const GIT_COMMIT_RE = gitCmdRe('commit')
 30  const GIT_PUSH_RE = gitCmdRe('push')
 31  const GIT_CHERRY_PICK_RE = gitCmdRe('cherry-pick')
 32  const GIT_MERGE_RE = gitCmdRe('merge', '(?!-)')
 33  const GIT_REBASE_RE = gitCmdRe('rebase')
 34  
 35  export type CommitKind = 'committed' | 'amended' | 'cherry-picked'
 36  export type BranchAction = 'merged' | 'rebased'
 37  export type PrAction =
 38    | 'created'
 39    | 'edited'
 40    | 'merged'
 41    | 'commented'
 42    | 'closed'
 43    | 'ready'
 44  
 45  const GH_PR_ACTIONS: readonly { re: RegExp; action: PrAction; op: string }[] = [
 46    { re: /\bgh\s+pr\s+create\b/, action: 'created', op: 'pr_create' },
 47    { re: /\bgh\s+pr\s+edit\b/, action: 'edited', op: 'pr_edit' },
 48    { re: /\bgh\s+pr\s+merge\b/, action: 'merged', op: 'pr_merge' },
 49    { re: /\bgh\s+pr\s+comment\b/, action: 'commented', op: 'pr_comment' },
 50    { re: /\bgh\s+pr\s+close\b/, action: 'closed', op: 'pr_close' },
 51    { re: /\bgh\s+pr\s+ready\b/, action: 'ready', op: 'pr_ready' },
 52  ]
 53  
 54  /**
 55   * Parse PR info from a GitHub PR URL.
 56   * Returns { prNumber, prUrl, prRepository } or null if not a valid PR URL.
 57   */
 58  function parsePrUrl(
 59    url: string,
 60  ): { prNumber: number; prUrl: string; prRepository: string } | null {
 61    const match = url.match(/https:\/\/github\.com\/([^/]+\/[^/]+)\/pull\/(\d+)/)
 62    if (match?.[1] && match?.[2]) {
 63      return {
 64        prNumber: parseInt(match[2], 10),
 65        prUrl: url,
 66        prRepository: match[1],
 67      }
 68    }
 69    return null
 70  }
 71  
 72  /** Find a GitHub PR URL embedded anywhere in stdout and parse it. */
 73  function findPrInStdout(stdout: string): ReturnType<typeof parsePrUrl> {
 74    const m = stdout.match(/https:\/\/github\.com\/[^/\s]+\/[^/\s]+\/pull\/\d+/)
 75    return m ? parsePrUrl(m[0]) : null
 76  }
 77  
 78  // Exported for testing purposes
 79  export function parseGitCommitId(stdout: string): string | undefined {
 80    // git commit output: [branch abc1234] message
 81    // or for root commit: [branch (root-commit) abc1234] message
 82    const match = stdout.match(/\[[\w./-]+(?: \(root-commit\))? ([0-9a-f]+)\]/)
 83    return match?.[1]
 84  }
 85  
 86  /**
 87   * Parse branch name from git push output. Push writes progress to stderr but
 88   * the ref update line ("abc..def  branch -> branch", "* [new branch]
 89   * branch -> branch", or " + abc...def  branch -> branch (forced update)") is
 90   * the signal. Works on either stdout or stderr. Git prefixes each ref line
 91   * with a status flag (space, +, -, *, !, =); the char class tolerates any.
 92   */
 93  function parseGitPushBranch(output: string): string | undefined {
 94    const match = output.match(
 95      /^\s*[+\-*!= ]?\s*(?:\[new branch\]|\S+\.\.+\S+)\s+\S+\s*->\s*(\S+)/m,
 96    )
 97    return match?.[1]
 98  }
 99  
100  /**
101   * gh pr merge/close/ready print "✓ <Verb> pull request owner/repo#1234" with
102   * no URL. Extract the PR number from the text.
103   */
104  function parsePrNumberFromText(stdout: string): number | undefined {
105    const match = stdout.match(/[Pp]ull request (?:\S+#)?#?(\d+)/)
106    return match?.[1] ? parseInt(match[1], 10) : undefined
107  }
108  
109  /**
110   * Extract target ref from `git merge <ref>` / `git rebase <ref>` command.
111   * Skips flags and keywords — first non-flag argument is the ref.
112   */
113  function parseRefFromCommand(
114    command: string,
115    verb: string,
116  ): string | undefined {
117    const after = command.split(gitCmdRe(verb))[1]
118    if (!after) return undefined
119    for (const t of after.trim().split(/\s+/)) {
120      if (/^[&|;><]/.test(t)) break
121      if (t.startsWith('-')) continue
122      return t
123    }
124    return undefined
125  }
126  
127  /**
128   * Scan bash command + output for git operations worth surfacing in the
129   * collapsed tool-use summary ("committed a1b2c3, created PR #42, ran 3 bash
130   * commands"). Checks the command to avoid matching SHAs/URLs that merely
131   * appear in unrelated output (e.g. `git log`).
132   *
133   * Pass stdout+stderr concatenated — git push writes the ref update to stderr.
134   */
135  export function detectGitOperation(
136    command: string,
137    output: string,
138  ): {
139    commit?: { sha: string; kind: CommitKind }
140    push?: { branch: string }
141    branch?: { ref: string; action: BranchAction }
142    pr?: { number: number; url?: string; action: PrAction }
143  } {
144    const result: ReturnType<typeof detectGitOperation> = {}
145    // commit and cherry-pick both produce "[branch sha] msg" output
146    const isCherryPick = GIT_CHERRY_PICK_RE.test(command)
147    if (GIT_COMMIT_RE.test(command) || isCherryPick) {
148      const sha = parseGitCommitId(output)
149      if (sha) {
150        result.commit = {
151          sha: sha.slice(0, 6),
152          kind: isCherryPick
153            ? 'cherry-picked'
154            : /--amend\b/.test(command)
155              ? 'amended'
156              : 'committed',
157        }
158      }
159    }
160    if (GIT_PUSH_RE.test(command)) {
161      const branch = parseGitPushBranch(output)
162      if (branch) result.push = { branch }
163    }
164    if (
165      GIT_MERGE_RE.test(command) &&
166      /(Fast-forward|Merge made by)/.test(output)
167    ) {
168      const ref = parseRefFromCommand(command, 'merge')
169      if (ref) result.branch = { ref, action: 'merged' }
170    }
171    if (GIT_REBASE_RE.test(command) && /Successfully rebased/.test(output)) {
172      const ref = parseRefFromCommand(command, 'rebase')
173      if (ref) result.branch = { ref, action: 'rebased' }
174    }
175    const prAction = GH_PR_ACTIONS.find(a => a.re.test(command))?.action
176    if (prAction) {
177      const pr = findPrInStdout(output)
178      if (pr) {
179        result.pr = { number: pr.prNumber, url: pr.prUrl, action: prAction }
180      } else {
181        const num = parsePrNumberFromText(output)
182        if (num) result.pr = { number: num, action: prAction }
183      }
184    }
185    return result
186  }
187  
188  // Exported for testing purposes
189  export function trackGitOperations(
190    command: string,
191    exitCode: number,
192    stdout?: string,
193  ): void {
194    const success = exitCode === 0
195    if (!success) {
196      return
197    }
198  
199    if (GIT_COMMIT_RE.test(command)) {
200      logEvent('tengu_git_operation', {
201        operation:
202          'commit' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
203      })
204      if (command.match(/--amend\b/)) {
205        logEvent('tengu_git_operation', {
206          operation:
207            'commit_amend' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
208        })
209      }
210      getCommitCounter()?.add(1)
211    }
212    if (GIT_PUSH_RE.test(command)) {
213      logEvent('tengu_git_operation', {
214        operation:
215          'push' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
216      })
217    }
218    const prHit = GH_PR_ACTIONS.find(a => a.re.test(command))
219    if (prHit) {
220      logEvent('tengu_git_operation', {
221        operation:
222          prHit.op as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
223      })
224    }
225    if (prHit?.action === 'created') {
226      getPrCounter()?.add(1)
227      // Auto-link session to PR if we can extract PR URL from stdout
228      if (stdout) {
229        const prInfo = findPrInStdout(stdout)
230        if (prInfo) {
231          // Import is done dynamically to avoid circular dependency
232          void import('../../utils/sessionStorage.js').then(
233            ({ linkSessionToPR }) => {
234              void import('../../bootstrap/state.js').then(({ getSessionId }) => {
235                const sessionId = getSessionId()
236                if (sessionId) {
237                  void linkSessionToPR(
238                    sessionId as `${string}-${string}-${string}-${string}-${string}`,
239                    prInfo.prNumber,
240                    prInfo.prUrl,
241                    prInfo.prRepository,
242                  )
243                }
244              })
245            },
246          )
247        }
248      }
249    }
250    if (command.match(/\bglab\s+mr\s+create\b/)) {
251      logEvent('tengu_git_operation', {
252        operation:
253          'pr_create' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
254      })
255      getPrCounter()?.add(1)
256    }
257    // Detect PR creation via curl to REST APIs (Bitbucket, GitHub API, GitLab API)
258    // Check for POST method and PR endpoint separately to handle any argument order
259    // Also detect implicit POST when -d is used (curl defaults to POST with data)
260    const isCurlPost =
261      command.match(/\bcurl\b/) &&
262      (command.match(/-X\s*POST\b/i) ||
263        command.match(/--request\s*=?\s*POST\b/i) ||
264        command.match(/\s-d\s/))
265    // Match PR endpoints in URLs, but not sub-resources like /pulls/123/comments
266    // Require https?:// prefix to avoid matching text in POST body or other params
267    const isPrEndpoint = command.match(
268      /https?:\/\/[^\s'"]*\/(pulls|pull-requests|merge[-_]requests)(?!\/\d)/i,
269    )
270    if (isCurlPost && isPrEndpoint) {
271      logEvent('tengu_git_operation', {
272        operation:
273          'pr_create' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
274      })
275      getPrCounter()?.add(1)
276    }
277  }