/ utils / plugins / officialMarketplaceGcs.ts
officialMarketplaceGcs.ts
  1  /**
  2   * inc-5046: fetch the official marketplace from a GCS mirror instead of
  3   * git-cloning GitHub on every startup.
  4   *
  5   * Backend (anthropic#317037) publishes a marketplace-only zip alongside the
  6   * titanium squashfs, keyed by base repo SHA. This module fetches the `latest`
  7   * pointer, compares against a local sentinel, and downloads+extracts the zip
  8   * when there's a new SHA. Callers decide fallback behavior on failure.
  9   */
 10  
 11  import axios from 'axios'
 12  import { chmod, mkdir, readFile, rename, rm, writeFile } from 'fs/promises'
 13  import { dirname, join, resolve, sep } from 'path'
 14  import { waitForScrollIdle } from '../../bootstrap/state.js'
 15  import type { AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS } from '../../services/analytics/index.js'
 16  import { logEvent } from '../../services/analytics/index.js'
 17  import { logForDebugging } from '../debug.js'
 18  import { parseZipModes, unzipFile } from '../dxt/zip.js'
 19  import { errorMessage, getErrnoCode } from '../errors.js'
 20  
 21  type SafeString = AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS
 22  
 23  // CDN-fronted domain for the public GCS bucket (same bucket the native
 24  // binary ships from — nativeInstaller/download.ts:24 uses the raw GCS URL).
 25  // `{sha}.zip` is content-addressed so CDN can cache it indefinitely;
 26  // `latest` has Cache-Control: max-age=300 so CDN staleness is bounded.
 27  // Backend (anthropic#317037) populates this prefix.
 28  const GCS_BASE =
 29    'https://downloads.claude.ai/claude-code-releases/plugins/claude-plugins-official'
 30  
 31  // Zip arc paths are seed-dir-relative (marketplaces/claude-plugins-official/…)
 32  // so the titanium seed machinery can use the same zip. Strip this prefix when
 33  // extracting for a laptop install.
 34  const ARC_PREFIX = 'marketplaces/claude-plugins-official/'
 35  
 36  /**
 37   * Fetch the official marketplace from GCS and extract to installLocation.
 38   * Idempotent — checks a `.gcs-sha` sentinel before downloading the ~3.5MB zip.
 39   *
 40   * @param installLocation where to extract (must be inside marketplacesCacheDir)
 41   * @param marketplacesCacheDir the plugins marketplace cache root — passed in
 42   *   by callers (rather than imported from pluginDirectories) to break a
 43   *   circular-dep edge through marketplaceManager
 44   * @returns the fetched SHA on success (including no-op), null on any failure
 45   *   (network, 404, zip parse). Caller decides whether to fall through to git.
 46   */
 47  export async function fetchOfficialMarketplaceFromGcs(
 48    installLocation: string,
 49    marketplacesCacheDir: string,
 50  ): Promise<string | null> {
 51    // Defense in depth: this function does `rm(installLocation, {recursive})`
 52    // during the atomic swap. A corrupted known_marketplaces.json (gh-32793 —
 53    // Windows path read on WSL, literal tilde, manual edit) could point at the
 54    // user's project. Refuse any path outside the marketplaces cache dir.
 55    // Same guard as refreshMarketplace() at marketplaceManager.ts:~2392 but
 56    // inside the function so ALL callers are covered.
 57    const cacheDir = resolve(marketplacesCacheDir)
 58    const resolvedLoc = resolve(installLocation)
 59    if (resolvedLoc !== cacheDir && !resolvedLoc.startsWith(cacheDir + sep)) {
 60      logForDebugging(
 61        `fetchOfficialMarketplaceFromGcs: refusing path outside cache dir: ${installLocation}`,
 62        { level: 'error' },
 63      )
 64      return null
 65    }
 66  
 67    // Network + zip extraction competes for the event loop with scroll frames.
 68    // This is a fire-and-forget startup call — delaying by a few hundred ms
 69    // until scroll settles is invisible to the user.
 70    await waitForScrollIdle()
 71  
 72    const start = performance.now()
 73    let outcome: 'noop' | 'updated' | 'failed' = 'failed'
 74    let sha: string | undefined
 75    let bytes: number | undefined
 76    let errKind: string | undefined
 77  
 78    try {
 79      // 1. Latest pointer — ~40 bytes, backend sets Cache-Control: no-cache,
 80      //    max-age=300. Cheap enough to hit every startup.
 81      const latest = await axios.get(`${GCS_BASE}/latest`, {
 82        responseType: 'text',
 83        timeout: 10_000,
 84      })
 85      sha = String(latest.data).trim()
 86      if (!sha) {
 87        // Empty /latest body — backend misconfigured. Bail (null), don't
 88        // lock into a permanently-broken empty-sentinel state.
 89        throw new Error('latest pointer returned empty body')
 90      }
 91  
 92      // 2. Sentinel check — `.gcs-sha` at the install root holds the last
 93      //    extracted SHA. Matching means we already have this content.
 94      const sentinelPath = join(installLocation, '.gcs-sha')
 95      const currentSha = await readFile(sentinelPath, 'utf8').then(
 96        s => s.trim(),
 97        () => null, // ENOENT — first fetch, proceed to download
 98      )
 99      if (currentSha === sha) {
100        outcome = 'noop'
101        return sha
102      }
103  
104      // 3. Download zip and extract to a staging dir, then atomic-swap into
105      //    place. Crash mid-extract leaves a .staging dir (next run rm's it)
106      //    rather than a half-written installLocation.
107      const zipResp = await axios.get(`${GCS_BASE}/${sha}.zip`, {
108        responseType: 'arraybuffer',
109        timeout: 60_000,
110      })
111      const zipBuf = Buffer.from(zipResp.data)
112      bytes = zipBuf.length
113      const files = await unzipFile(zipBuf)
114      // fflate doesn't surface external_attr, so parse the central directory
115      // ourselves to recover exec bits. Without this, hooks/scripts extract as
116      // 0644 and `sh -c "/path/script.sh"` (hooks.ts:~1002) fails with EACCES
117      // on Unix. Git-clone preserves +x natively; this keeps GCS at parity.
118      const modes = parseZipModes(zipBuf)
119  
120      const staging = `${installLocation}.staging`
121      await rm(staging, { recursive: true, force: true })
122      await mkdir(staging, { recursive: true })
123      for (const [arcPath, data] of Object.entries(files)) {
124        if (!arcPath.startsWith(ARC_PREFIX)) continue
125        const rel = arcPath.slice(ARC_PREFIX.length)
126        if (!rel || rel.endsWith('/')) continue // prefix dir entry or subdir entry
127        const dest = join(staging, rel)
128        await mkdir(dirname(dest), { recursive: true })
129        await writeFile(dest, data)
130        const mode = modes[arcPath]
131        if (mode && mode & 0o111) {
132          // Only chmod when an exec bit is set — skip plain files to save syscalls.
133          // Swallow EPERM/ENOTSUP (NFS root_squash, some FUSE mounts) — losing +x
134          // is the pre-PR behavior and better than aborting mid-extraction.
135          await chmod(dest, mode & 0o777).catch(() => {})
136        }
137      }
138      await writeFile(join(staging, '.gcs-sha'), sha)
139  
140      // Atomic swap: rm old, rename staging. Brief window where installLocation
141      // doesn't exist — acceptable for a background refresh (caller retries next
142      // startup if it crashes here).
143      await rm(installLocation, { recursive: true, force: true })
144      await rename(staging, installLocation)
145  
146      outcome = 'updated'
147      return sha
148    } catch (e) {
149      errKind = classifyGcsError(e)
150      logForDebugging(
151        `Official marketplace GCS fetch failed: ${errorMessage(e)}`,
152        { level: 'warn' },
153      )
154      return null
155    } finally {
156      // tengu_plugin_remote_fetch schema shared with the telemetry PR
157      // (.daisy/inc-5046/index.md) — adds source:'marketplace_gcs'. All string
158      // values below are static enums or a git SHA — not code/filepaths/PII.
159      logEvent('tengu_plugin_remote_fetch', {
160        source: 'marketplace_gcs' as SafeString,
161        host: 'downloads.claude.ai' as SafeString,
162        is_official: true,
163        outcome: outcome as SafeString,
164        duration_ms: Math.round(performance.now() - start),
165        ...(bytes !== undefined && { bytes }),
166        ...(sha && { sha: sha as SafeString }),
167        ...(errKind && { error_kind: errKind as SafeString }),
168      })
169    }
170  }
171  
172  // Bounded set of errno codes we report by name. Anything else buckets as
173  // fs_other to keep dashboard cardinality tractable.
174  const KNOWN_FS_CODES = new Set([
175    'ENOSPC',
176    'EACCES',
177    'EPERM',
178    'EXDEV',
179    'EBUSY',
180    'ENOENT',
181    'ENOTDIR',
182    'EROFS',
183    'EMFILE',
184    'ENAMETOOLONG',
185  ])
186  
187  /**
188   * Classify a GCS fetch error into a stable telemetry bucket.
189   *
190   * Telemetry from v2.1.83+ showed 50% of failures landing in 'other' — and
191   * 99.99% of those had both sha+bytes set, meaning download succeeded but
192   * extraction/fs failed. This splits that bucket so we can see whether the
193   * failures are fixable (wrong staging dir, cross-device rename) or inherent
194   * (disk full, permission denied) before flipping the git-fallback kill switch.
195   */
196  export function classifyGcsError(e: unknown): string {
197    if (axios.isAxiosError(e)) {
198      if (e.code === 'ECONNABORTED') return 'timeout'
199      if (e.response) return `http_${e.response.status}`
200      return 'network'
201    }
202    const code = getErrnoCode(e)
203    // Node fs errno codes are E<UPPERCASE> (ENOSPC, EACCES). Axios also sets
204    // .code (ERR_NETWORK, ERR_BAD_OPTION, EPROTO) — don't bucket those as fs.
205    if (code && /^E[A-Z]+$/.test(code) && !code.startsWith('ERR_')) {
206      return KNOWN_FS_CODES.has(code) ? `fs_${code}` : 'fs_other'
207    }
208    // fflate sets numeric .code (0-14) on inflate/unzip errors — catches
209    // deflate-level corruption ("unexpected EOF", "invalid block type") that
210    // the message regex misses.
211    if (typeof (e as { code?: unknown })?.code === 'number') return 'zip_parse'
212    const msg = errorMessage(e)
213    if (/unzip|invalid zip|central directory/i.test(msg)) return 'zip_parse'
214    if (/empty body/.test(msg)) return 'empty_latest'
215    return 'other'
216  }