/ utils / plugins / fetchTelemetry.ts
fetchTelemetry.ts
  1  /**
  2   * Telemetry for plugin/marketplace fetches that hit the network.
  3   *
  4   * Added for inc-5046 (GitHub complained about claude-plugins-official load).
  5   * Before this, fetch operations only had logForDebugging — no way to measure
  6   * actual network volume. This surfaces what's hitting GitHub vs GCS vs
  7   * user-hosted so we can see the GCS migration take effect and catch future
  8   * hot-path regressions before GitHub emails us again.
  9   *
 10   * Volume: these fire at startup (install-counts 24h-TTL)
 11   * and on explicit user action (install/update). NOT per-interaction. Similar
 12   * envelope to tengu_binary_download_*.
 13   */
 14  
 15  import {
 16    logEvent,
 17    type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS as SafeString,
 18  } from '../../services/analytics/index.js'
 19  import { OFFICIAL_MARKETPLACE_NAME } from './officialMarketplace.js'
 20  
 21  export type PluginFetchSource =
 22    | 'install_counts'
 23    | 'marketplace_clone'
 24    | 'marketplace_pull'
 25    | 'marketplace_url'
 26    | 'plugin_clone'
 27    | 'mcpb'
 28  
 29  export type PluginFetchOutcome = 'success' | 'failure' | 'cache_hit'
 30  
 31  // Allowlist of public hosts we report by name. Anything else (enterprise
 32  // git, self-hosted, internal) is bucketed as 'other' — we don't want
 33  // internal hostnames (git.mycorp.internal) landing in telemetry. Bounded
 34  // cardinality also keeps the dashboard host-breakdown tractable.
 35  const KNOWN_PUBLIC_HOSTS = new Set([
 36    'github.com',
 37    'raw.githubusercontent.com',
 38    'objects.githubusercontent.com',
 39    'gist.githubusercontent.com',
 40    'gitlab.com',
 41    'bitbucket.org',
 42    'codeberg.org',
 43    'dev.azure.com',
 44    'ssh.dev.azure.com',
 45    'storage.googleapis.com', // GCS — where Dickson's migration points
 46  ])
 47  
 48  /**
 49   * Extract hostname from a URL or git spec and bucket to the allowlist.
 50   * Handles `https://host/...`, `git@host:path`, `ssh://host/...`.
 51   * Returns a known public host, 'other' (parseable but not allowlisted —
 52   * don't leak private hostnames), or 'unknown' (unparseable / local path).
 53   */
 54  function extractHost(urlOrSpec: string): string {
 55    let host: string
 56    const scpMatch = /^[^@/]+@([^:/]+):/.exec(urlOrSpec)
 57    if (scpMatch) {
 58      host = scpMatch[1]!
 59    } else {
 60      try {
 61        host = new URL(urlOrSpec).hostname
 62      } catch {
 63        return 'unknown'
 64      }
 65    }
 66    const normalized = host.toLowerCase()
 67    return KNOWN_PUBLIC_HOSTS.has(normalized) ? normalized : 'other'
 68  }
 69  
 70  /**
 71   * True if the URL/spec points at anthropics/claude-plugins-official — the
 72   * repo GitHub complained about. Lets the dashboard separate "our problem"
 73   * traffic from user-configured marketplaces.
 74   */
 75  function isOfficialRepo(urlOrSpec: string): boolean {
 76    return urlOrSpec.includes(`anthropics/${OFFICIAL_MARKETPLACE_NAME}`)
 77  }
 78  
 79  export function logPluginFetch(
 80    source: PluginFetchSource,
 81    urlOrSpec: string | undefined,
 82    outcome: PluginFetchOutcome,
 83    durationMs: number,
 84    errorKind?: string,
 85  ): void {
 86    // String values are bounded enums / hostname-only — no code, no paths,
 87    // no raw error messages. Same privacy envelope as tengu_web_fetch_host.
 88    logEvent('tengu_plugin_remote_fetch', {
 89      source: source as SafeString,
 90      host: (urlOrSpec ? extractHost(urlOrSpec) : 'unknown') as SafeString,
 91      is_official: urlOrSpec ? isOfficialRepo(urlOrSpec) : false,
 92      outcome: outcome as SafeString,
 93      duration_ms: Math.round(durationMs),
 94      ...(errorKind && { error_kind: errorKind as SafeString }),
 95    })
 96  }
 97  
 98  /**
 99   * Classify an error into a stable bucket for the error_kind field. Keeps
100   * cardinality bounded — raw error messages would explode dashboard grouping.
101   *
102   * Handles both axios Error objects (Node.js error codes like ENOTFOUND) and
103   * git stderr strings (human phrases like "Could not resolve host"). DNS
104   * checked BEFORE timeout because gitClone's error enhancement at
105   * marketplaceManager.ts:~950 rewrites DNS failures to include the word
106   * "timeout" — ordering the other way would misclassify git DNS as timeout.
107   */
108  export function classifyFetchError(error: unknown): string {
109    const msg = String((error as { message?: unknown })?.message ?? error)
110    if (
111      /ENOTFOUND|ECONNREFUSED|EAI_AGAIN|Could not resolve host|Connection refused/i.test(
112        msg,
113      )
114    ) {
115      return 'dns_or_refused'
116    }
117    if (/ETIMEDOUT|timed out|timeout/i.test(msg)) return 'timeout'
118    if (
119      /ECONNRESET|socket hang up|Connection reset by peer|remote end hung up/i.test(
120        msg,
121      )
122    ) {
123      return 'conn_reset'
124    }
125    if (/403|401|authentication|permission denied/i.test(msg)) return 'auth'
126    if (/404|not found|repository not found/i.test(msg)) return 'not_found'
127    if (/certificate|SSL|TLS|unable to get local issuer/i.test(msg)) return 'tls'
128    // Schema validation throws "Invalid response format" (install_counts) —
129    // distinguish from true unknowns so the dashboard can
130    // see "server sent garbage" separately.
131    if (/Invalid response format|Invalid marketplace schema/i.test(msg)) {
132      return 'invalid_schema'
133    }
134    return 'other'
135  }