/ utils / imagePaste.ts
imagePaste.ts
  1  import { feature } from 'bun:bundle'
  2  import { randomBytes } from 'crypto'
  3  import { execa } from 'execa'
  4  import { basename, extname, isAbsolute, join } from 'path'
  5  import {
  6    IMAGE_MAX_HEIGHT,
  7    IMAGE_MAX_WIDTH,
  8    IMAGE_TARGET_RAW_SIZE,
  9  } from '../constants/apiLimits.js'
 10  import { getFeatureValue_CACHED_MAY_BE_STALE } from '../services/analytics/growthbook.js'
 11  import { getImageProcessor } from '../tools/FileReadTool/imageProcessor.js'
 12  import { logForDebugging } from './debug.js'
 13  import { execFileNoThrowWithCwd } from './execFileNoThrow.js'
 14  import { getFsImplementation } from './fsOperations.js'
 15  import {
 16    detectImageFormatFromBase64,
 17    type ImageDimensions,
 18    maybeResizeAndDownsampleImageBuffer,
 19  } from './imageResizer.js'
 20  import { logError } from './log.js'
 21  
 22  // Native NSPasteboard reader. GrowthBook gate tengu_collage_kaleidoscope is
 23  // a kill switch (default on). Falls through to osascript when off.
 24  // The gate string is inlined at each callsite INSIDE the feature() condition
 25  // — module-scope helpers are NOT tree-shaken (see docs/feature-gating.md).
 26  
 27  type SupportedPlatform = 'darwin' | 'linux' | 'win32'
 28  
 29  // Threshold in characters for when to consider text a "large paste"
 30  export const PASTE_THRESHOLD = 800
 31  function getClipboardCommands() {
 32    const platform = process.platform as SupportedPlatform
 33  
 34    // Platform-specific temporary file paths
 35    // Use CLAUDE_CODE_TMPDIR if set, otherwise fall back to platform defaults
 36    const baseTmpDir =
 37      process.env.CLAUDE_CODE_TMPDIR ||
 38      (platform === 'win32' ? process.env.TEMP || 'C:\\Temp' : '/tmp')
 39    const screenshotFilename = 'claude_cli_latest_screenshot.png'
 40    const tempPaths: Record<SupportedPlatform, string> = {
 41      darwin: join(baseTmpDir, screenshotFilename),
 42      linux: join(baseTmpDir, screenshotFilename),
 43      win32: join(baseTmpDir, screenshotFilename),
 44    }
 45  
 46    const screenshotPath = tempPaths[platform] || tempPaths.linux
 47  
 48    // Platform-specific clipboard commands
 49    const commands: Record<
 50      SupportedPlatform,
 51      {
 52        checkImage: string
 53        saveImage: string
 54        getPath: string
 55        deleteFile: string
 56      }
 57    > = {
 58      darwin: {
 59        checkImage: `osascript -e 'the clipboard as «class PNGf»'`,
 60        saveImage: `osascript -e 'set png_data to (the clipboard as «class PNGf»)' -e 'set fp to open for access POSIX file "${screenshotPath}" with write permission' -e 'write png_data to fp' -e 'close access fp'`,
 61        getPath: `osascript -e 'get POSIX path of (the clipboard as «class furl»)'`,
 62        deleteFile: `rm -f "${screenshotPath}"`,
 63      },
 64      linux: {
 65        checkImage:
 66          'xclip -selection clipboard -t TARGETS -o 2>/dev/null | grep -E "image/(png|jpeg|jpg|gif|webp|bmp)" || wl-paste -l 2>/dev/null | grep -E "image/(png|jpeg|jpg|gif|webp|bmp)"',
 67        saveImage: `xclip -selection clipboard -t image/png -o > "${screenshotPath}" 2>/dev/null || wl-paste --type image/png > "${screenshotPath}" 2>/dev/null || xclip -selection clipboard -t image/bmp -o > "${screenshotPath}" 2>/dev/null || wl-paste --type image/bmp > "${screenshotPath}"`,
 68        getPath:
 69          'xclip -selection clipboard -t text/plain -o 2>/dev/null || wl-paste 2>/dev/null',
 70        deleteFile: `rm -f "${screenshotPath}"`,
 71      },
 72      win32: {
 73        checkImage:
 74          'powershell -NoProfile -Command "(Get-Clipboard -Format Image) -ne $null"',
 75        saveImage: `powershell -NoProfile -Command "$img = Get-Clipboard -Format Image; if ($img) { $img.Save('${screenshotPath.replace(/\\/g, '\\\\')}', [System.Drawing.Imaging.ImageFormat]::Png) }"`,
 76        getPath: 'powershell -NoProfile -Command "Get-Clipboard"',
 77        deleteFile: `del /f "${screenshotPath}"`,
 78      },
 79    }
 80  
 81    return {
 82      commands: commands[platform] || commands.linux,
 83      screenshotPath,
 84    }
 85  }
 86  
 87  export type ImageWithDimensions = {
 88    base64: string
 89    mediaType: string
 90    dimensions?: ImageDimensions
 91  }
 92  
 93  /**
 94   * Check if clipboard contains an image without retrieving it.
 95   */
 96  export async function hasImageInClipboard(): Promise<boolean> {
 97    if (process.platform !== 'darwin') {
 98      return false
 99    }
100    if (
101      feature('NATIVE_CLIPBOARD_IMAGE') &&
102      getFeatureValue_CACHED_MAY_BE_STALE('tengu_collage_kaleidoscope', true)
103    ) {
104      // Native NSPasteboard check (~0.03ms warm). Fall through to osascript
105      // when the module/export is missing. Catch a throw too: it would surface
106      // as an unhandled rejection in useClipboardImageHint's setTimeout.
107      try {
108        const { getNativeModule } = await import('image-processor-napi')
109        const hasImage = getNativeModule()?.hasClipboardImage
110        if (hasImage) {
111          return hasImage()
112        }
113      } catch (e) {
114        logError(e as Error)
115      }
116    }
117    const result = await execFileNoThrowWithCwd('osascript', [
118      '-e',
119      'the clipboard as «class PNGf»',
120    ])
121    return result.code === 0
122  }
123  
124  export async function getImageFromClipboard(): Promise<ImageWithDimensions | null> {
125    // Fast path: native NSPasteboard reader (macOS only). Reads PNG bytes
126    // directly in-process and downsamples via CoreGraphics if over the
127    // dimension cap. ~5ms cold, sub-ms warm — vs. ~1.5s for the osascript
128    // path below. Throws if the native module is unavailable, in which case
129    // the catch block falls through to osascript. A `null` return from the
130    // native call is authoritative (clipboard has no image).
131    if (
132      feature('NATIVE_CLIPBOARD_IMAGE') &&
133      process.platform === 'darwin' &&
134      getFeatureValue_CACHED_MAY_BE_STALE('tengu_collage_kaleidoscope', true)
135    ) {
136      try {
137        const { getNativeModule } = await import('image-processor-napi')
138        const readClipboard = getNativeModule()?.readClipboardImage
139        if (!readClipboard) {
140          throw new Error('native clipboard reader unavailable')
141        }
142        const native = readClipboard(IMAGE_MAX_WIDTH, IMAGE_MAX_HEIGHT)
143        if (!native) {
144          return null
145        }
146        // The native path caps dimensions but not file size. A complex
147        // 2000×2000 PNG can still exceed the 3.75MB raw / 5MB base64 API
148        // limit — for that edge case, run through the same size-cap that
149        // the osascript path uses (degrades to JPEG if needed). Cheap if
150        // already under: just a sharp metadata read.
151        const buffer: Buffer = native.png
152        if (buffer.length > IMAGE_TARGET_RAW_SIZE) {
153          const resized = await maybeResizeAndDownsampleImageBuffer(
154            buffer,
155            buffer.length,
156            'png',
157          )
158          return {
159            base64: resized.buffer.toString('base64'),
160            mediaType: `image/${resized.mediaType}`,
161            // resized.dimensions sees the already-downsampled buffer; native knows the true originals.
162            dimensions: {
163              originalWidth: native.originalWidth,
164              originalHeight: native.originalHeight,
165              displayWidth: resized.dimensions?.displayWidth ?? native.width,
166              displayHeight: resized.dimensions?.displayHeight ?? native.height,
167            },
168          }
169        }
170        return {
171          base64: buffer.toString('base64'),
172          mediaType: 'image/png',
173          dimensions: {
174            originalWidth: native.originalWidth,
175            originalHeight: native.originalHeight,
176            displayWidth: native.width,
177            displayHeight: native.height,
178          },
179        }
180      } catch (e) {
181        logError(e as Error)
182        // Fall through to osascript fallback.
183      }
184    }
185  
186    const { commands, screenshotPath } = getClipboardCommands()
187    try {
188      // Check if clipboard has image
189      const checkResult = await execa(commands.checkImage, {
190        shell: true,
191        reject: false,
192      })
193      if (checkResult.exitCode !== 0) {
194        return null
195      }
196  
197      // Save the image
198      const saveResult = await execa(commands.saveImage, {
199        shell: true,
200        reject: false,
201      })
202      if (saveResult.exitCode !== 0) {
203        return null
204      }
205  
206      // Read the image and convert to base64
207      let imageBuffer = getFsImplementation().readFileBytesSync(screenshotPath)
208  
209      // BMP is not supported by the API — convert to PNG via Sharp.
210      // This handles WSL2 where Windows copies images as BMP by default.
211      if (
212        imageBuffer.length >= 2 &&
213        imageBuffer[0] === 0x42 &&
214        imageBuffer[1] === 0x4d
215      ) {
216        const sharp = await getImageProcessor()
217        imageBuffer = await sharp(imageBuffer).png().toBuffer()
218      }
219  
220      // Resize if needed to stay under 5MB API limit
221      const resized = await maybeResizeAndDownsampleImageBuffer(
222        imageBuffer,
223        imageBuffer.length,
224        'png',
225      )
226      const base64Image = resized.buffer.toString('base64')
227  
228      // Detect format from magic bytes
229      const mediaType = detectImageFormatFromBase64(base64Image)
230  
231      // Cleanup (fire-and-forget, don't await)
232      void execa(commands.deleteFile, { shell: true, reject: false })
233  
234      return {
235        base64: base64Image,
236        mediaType,
237        dimensions: resized.dimensions,
238      }
239    } catch {
240      return null
241    }
242  }
243  
244  export async function getImagePathFromClipboard(): Promise<string | null> {
245    const { commands } = getClipboardCommands()
246  
247    try {
248      // Try to get text from clipboard
249      const result = await execa(commands.getPath, {
250        shell: true,
251        reject: false,
252      })
253      if (result.exitCode !== 0 || !result.stdout) {
254        return null
255      }
256      return result.stdout.trim()
257    } catch (e) {
258      logError(e as Error)
259      return null
260    }
261  }
262  
263  /**
264   * Regex pattern to match supported image file extensions. Kept in sync with
265   * MIME_BY_EXT in BriefTool/upload.ts — attachments.ts uses this to set isImage
266   * on the wire, and remote viewers fetch /preview iff isImage is true. An ext
267   * here but not in MIME_BY_EXT (e.g. bmp) uploads as octet-stream and has no
268   * /preview variant → broken thumbnail.
269   */
270  export const IMAGE_EXTENSION_REGEX = /\.(png|jpe?g|gif|webp)$/i
271  
272  /**
273   * Remove outer single or double quotes from a string
274   * @param text Text to clean
275   * @returns Text without outer quotes
276   */
277  function removeOuterQuotes(text: string): string {
278    if (
279      (text.startsWith('"') && text.endsWith('"')) ||
280      (text.startsWith("'") && text.endsWith("'"))
281    ) {
282      return text.slice(1, -1)
283    }
284    return text
285  }
286  
287  /**
288   * Remove shell escape backslashes from a path (for macOS/Linux/WSL)
289   * On Windows systems, this function returns the path unchanged
290   * @param path Path that might contain shell-escaped characters
291   * @returns Path with escape backslashes removed (on macOS/Linux/WSL only)
292   */
293  function stripBackslashEscapes(path: string): string {
294    const platform = process.platform as SupportedPlatform
295  
296    // On Windows, don't remove backslashes as they're part of the path
297    if (platform === 'win32') {
298      return path
299    }
300  
301    // On macOS/Linux/WSL, handle shell-escaped paths
302    // Double-backslashes (\\) represent actual backslashes in the filename
303    // Single backslashes followed by special chars are shell escapes
304  
305    // First, temporarily replace double backslashes with a placeholder
306    // Use random salt to prevent injection attacks where path contains literal placeholder
307    const salt = randomBytes(8).toString('hex')
308    const placeholder = `__DOUBLE_BACKSLASH_${salt}__`
309    const withPlaceholder = path.replace(/\\\\/g, placeholder)
310  
311    // Remove single backslashes that are shell escapes
312    // This handles cases like "name\ \(15\).png" -> "name (15).png"
313    const withoutEscapes = withPlaceholder.replace(/\\(.)/g, '$1')
314  
315    // Replace placeholders back to single backslashes
316    return withoutEscapes.replace(new RegExp(placeholder, 'g'), '\\')
317  }
318  
319  /**
320   * Check if a given text represents an image file path
321   * @param text Text to check
322   * @returns Boolean indicating if text is an image path
323   */
324  export function isImageFilePath(text: string): boolean {
325    const cleaned = removeOuterQuotes(text.trim())
326    const unescaped = stripBackslashEscapes(cleaned)
327    return IMAGE_EXTENSION_REGEX.test(unescaped)
328  }
329  
330  /**
331   * Clean and normalize a text string that might be an image file path
332   * @param text Text to process
333   * @returns Cleaned text with quotes removed, whitespace trimmed, and shell escapes removed, or null if not an image path
334   */
335  export function asImageFilePath(text: string): string | null {
336    const cleaned = removeOuterQuotes(text.trim())
337    const unescaped = stripBackslashEscapes(cleaned)
338  
339    if (IMAGE_EXTENSION_REGEX.test(unescaped)) {
340      return unescaped
341    }
342  
343    return null
344  }
345  
346  /**
347   * Try to find and read an image file, falling back to clipboard search
348   * @param text Pasted text that might be an image filename or path
349   * @returns Object containing the image path and base64 data, or null if not found
350   */
351  export async function tryReadImageFromPath(
352    text: string,
353  ): Promise<(ImageWithDimensions & { path: string }) | null> {
354    // Strip terminal added spaces or quotes to dragged in paths
355    const cleanedPath = asImageFilePath(text)
356  
357    if (!cleanedPath) {
358      return null
359    }
360  
361    const imagePath = cleanedPath
362    let imageBuffer
363  
364    try {
365      if (isAbsolute(imagePath)) {
366        imageBuffer = getFsImplementation().readFileBytesSync(imagePath)
367      } else {
368        // VSCode Terminal just grabs the text content which is the filename
369        // instead of getting the full path of the file pasted with cmd-v. So
370        // we check if it matches the filename of the image in the clipboard.
371        const clipboardPath = await getImagePathFromClipboard()
372        if (clipboardPath && imagePath === basename(clipboardPath)) {
373          imageBuffer = getFsImplementation().readFileBytesSync(clipboardPath)
374        }
375      }
376    } catch (e) {
377      logError(e as Error)
378      return null
379    }
380    if (!imageBuffer) {
381      return null
382    }
383    if (imageBuffer.length === 0) {
384      logForDebugging(`Image file is empty: ${imagePath}`, { level: 'warn' })
385      return null
386    }
387  
388    // BMP is not supported by the API — convert to PNG via Sharp.
389    if (
390      imageBuffer.length >= 2 &&
391      imageBuffer[0] === 0x42 &&
392      imageBuffer[1] === 0x4d
393    ) {
394      const sharp = await getImageProcessor()
395      imageBuffer = await sharp(imageBuffer).png().toBuffer()
396    }
397  
398    // Resize if needed to stay under 5MB API limit
399    // Extract extension from path for format hint
400    const ext = extname(imagePath).slice(1).toLowerCase() || 'png'
401    const resized = await maybeResizeAndDownsampleImageBuffer(
402      imageBuffer,
403      imageBuffer.length,
404      ext,
405    )
406    const base64Image = resized.buffer.toString('base64')
407  
408    // Detect format from the actual file contents using magic bytes
409    const mediaType = detectImageFormatFromBase64(base64Image)
410    return {
411      path: imagePath,
412      base64: base64Image,
413      mediaType,
414      dimensions: resized.dimensions,
415    }
416  }