/ utils / dxt / zip.ts
zip.ts
  1  import { isAbsolute, normalize } from 'path'
  2  import { logForDebugging } from '../debug.js'
  3  import { isENOENT } from '../errors.js'
  4  import { getFsImplementation } from '../fsOperations.js'
  5  import { containsPathTraversal } from '../path.js'
  6  
  7  const LIMITS = {
  8    MAX_FILE_SIZE: 512 * 1024 * 1024, // 512MB per file
  9    MAX_TOTAL_SIZE: 1024 * 1024 * 1024, // 1024MB total uncompressed
 10    MAX_FILE_COUNT: 100000, // Maximum number of files
 11    MAX_COMPRESSION_RATIO: 50, // Anything above 50:1 is suspicious
 12    MIN_COMPRESSION_RATIO: 0.5, // Below 0.5:1 might indicate already compressed malicious content
 13  }
 14  
 15  /**
 16   * State tracker for zip file validation during extraction
 17   */
 18  type ZipValidationState = {
 19    fileCount: number
 20    totalUncompressedSize: number
 21    compressedSize: number
 22    errors: string[]
 23  }
 24  
 25  /**
 26   * File metadata from fflate filter
 27   */
 28  type ZipFileMetadata = {
 29    name: string
 30    originalSize?: number
 31  }
 32  
 33  /**
 34   * Result of validating a single file in a zip archive
 35   */
 36  type FileValidationResult = {
 37    isValid: boolean
 38    error?: string
 39  }
 40  
 41  /**
 42   * Validates a file path to prevent path traversal attacks
 43   */
 44  export function isPathSafe(filePath: string): boolean {
 45    if (containsPathTraversal(filePath)) {
 46      return false
 47    }
 48  
 49    // Normalize the path to resolve any '.' segments
 50    const normalized = normalize(filePath)
 51  
 52    // Check for absolute paths (we only want relative paths in archives)
 53    if (isAbsolute(normalized)) {
 54      return false
 55    }
 56  
 57    return true
 58  }
 59  
 60  /**
 61   * Validates a single file during zip extraction
 62   */
 63  export function validateZipFile(
 64    file: ZipFileMetadata,
 65    state: ZipValidationState,
 66  ): FileValidationResult {
 67    state.fileCount++
 68  
 69    let error: string | undefined
 70  
 71    // Check file count
 72    if (state.fileCount > LIMITS.MAX_FILE_COUNT) {
 73      error = `Archive contains too many files: ${state.fileCount} (max: ${LIMITS.MAX_FILE_COUNT})`
 74    }
 75  
 76    // Validate path safety
 77    if (!isPathSafe(file.name)) {
 78      error = `Unsafe file path detected: "${file.name}". Path traversal or absolute paths are not allowed.`
 79    }
 80  
 81    // Check individual file size
 82    const fileSize = file.originalSize || 0
 83    if (fileSize > LIMITS.MAX_FILE_SIZE) {
 84      error = `File "${file.name}" is too large: ${Math.round(fileSize / 1024 / 1024)}MB (max: ${Math.round(LIMITS.MAX_FILE_SIZE / 1024 / 1024)}MB)`
 85    }
 86  
 87    // Track total uncompressed size
 88    state.totalUncompressedSize += fileSize
 89  
 90    // Check total size
 91    if (state.totalUncompressedSize > LIMITS.MAX_TOTAL_SIZE) {
 92      error = `Archive total size is too large: ${Math.round(state.totalUncompressedSize / 1024 / 1024)}MB (max: ${Math.round(LIMITS.MAX_TOTAL_SIZE / 1024 / 1024)}MB)`
 93    }
 94  
 95    // Check compression ratio for zip bomb detection
 96    const currentRatio = state.totalUncompressedSize / state.compressedSize
 97    if (currentRatio > LIMITS.MAX_COMPRESSION_RATIO) {
 98      error = `Suspicious compression ratio detected: ${currentRatio.toFixed(1)}:1 (max: ${LIMITS.MAX_COMPRESSION_RATIO}:1). This may be a zip bomb.`
 99    }
100  
101    return error ? { isValid: false, error } : { isValid: true }
102  }
103  
104  /**
105   * Unzips data from a Buffer and returns its contents as a record of file paths to Uint8Array data.
106   * Uses unzipSync to avoid fflate worker termination crashes in bun.
107   * Accepts raw zip bytes so that the caller can read the file asynchronously.
108   *
109   * fflate is lazy-imported to avoid its ~196KB of top-level lookup tables (revfd
110   * Int32Array(32769), rev Uint16Array(32768), etc.) being allocated at startup
111   * when this module is reached via the plugin loader chain.
112   */
113  export async function unzipFile(
114    zipData: Buffer,
115  ): Promise<Record<string, Uint8Array>> {
116    const { unzipSync } = await import('fflate')
117    const compressedSize = zipData.length
118  
119    const state: ZipValidationState = {
120      fileCount: 0,
121      totalUncompressedSize: 0,
122      compressedSize: compressedSize,
123      errors: [],
124    }
125  
126    const result = unzipSync(new Uint8Array(zipData), {
127      filter: file => {
128        const validationResult = validateZipFile(file, state)
129        if (!validationResult.isValid) {
130          throw new Error(validationResult.error!)
131        }
132        return true
133      },
134    })
135  
136    logForDebugging(
137      `Zip extraction completed: ${state.fileCount} files, ${Math.round(state.totalUncompressedSize / 1024)}KB uncompressed`,
138    )
139  
140    return result
141  }
142  
143  /**
144   * Parse Unix file modes from a zip's central directory.
145   *
146   * fflate's `unzipSync` returns only `Record<string, Uint8Array>` — it does not
147   * surface the external file attributes stored in the central directory. This
148   * means executable bits are lost during extraction (everything becomes 0644).
149   * The git-clone path preserves +x natively, but the GCS/zip path needs this
150   * helper to keep parity.
151   *
152   * Returns `name → mode` for entries created on a Unix host (`versionMadeBy`
153   * high byte === 3). Entries from other hosts, or with no mode bits set, are
154   * omitted. Callers should treat a missing key as "use default mode".
155   *
156   * Format per PKZIP APPNOTE.TXT §4.3.12 (central directory) and §4.3.16 (EOCD).
157   * ZIP64 is not handled — returns `{}` on archives >4GB or >65535 entries,
158   * which is fine for marketplace zips (~3.5MB) and MCPB bundles.
159   */
160  export function parseZipModes(data: Uint8Array): Record<string, number> {
161    // Buffer view for readUInt* methods — shares memory, no copy.
162    const buf = Buffer.from(data.buffer, data.byteOffset, data.byteLength)
163    const modes: Record<string, number> = {}
164  
165    // 1. Find the End of Central Directory record (sig 0x06054b50). It lives in
166    //    the trailing 22 + 65535 bytes (fixed EOCD size + max comment length).
167    //    Scan backwards — the EOCD is typically the last 22 bytes.
168    const minEocd = Math.max(0, buf.length - 22 - 0xffff)
169    let eocd = -1
170    for (let i = buf.length - 22; i >= minEocd; i--) {
171      if (buf.readUInt32LE(i) === 0x06054b50) {
172        eocd = i
173        break
174      }
175    }
176    if (eocd < 0) return modes // malformed — let fflate's error surface elsewhere
177  
178    const entryCount = buf.readUInt16LE(eocd + 10)
179    let off = buf.readUInt32LE(eocd + 16) // central directory start offset
180  
181    // 2. Walk central directory entries (sig 0x02014b50). Each entry has a
182    //    46-byte fixed header followed by variable-length name/extra/comment.
183    for (let i = 0; i < entryCount; i++) {
184      if (off + 46 > buf.length || buf.readUInt32LE(off) !== 0x02014b50) break
185      const versionMadeBy = buf.readUInt16LE(off + 4)
186      const nameLen = buf.readUInt16LE(off + 28)
187      const extraLen = buf.readUInt16LE(off + 30)
188      const commentLen = buf.readUInt16LE(off + 32)
189      const externalAttr = buf.readUInt32LE(off + 38)
190      const name = buf.toString('utf8', off + 46, off + 46 + nameLen)
191  
192      // versionMadeBy high byte = host OS. 3 = Unix. For Unix zips, the high
193      // 16 bits of externalAttr hold st_mode (file type + permission bits).
194      if (versionMadeBy >> 8 === 3) {
195        const mode = (externalAttr >>> 16) & 0xffff
196        if (mode) modes[name] = mode
197      }
198  
199      off += 46 + nameLen + extraLen + commentLen
200    }
201  
202    return modes
203  }
204  
205  /**
206   * Reads a zip file from disk asynchronously and unzips it.
207   * Returns its contents as a record of file paths to Uint8Array data.
208   */
209  export async function readAndUnzipFile(
210    filePath: string,
211  ): Promise<Record<string, Uint8Array>> {
212    const fs = getFsImplementation()
213  
214    try {
215      const zipData = await fs.readFileBytes(filePath)
216      // await is required here: without it, rejections from the now-async
217      // unzipFile() escape the try/catch and bypass the error wrapping below.
218      return await unzipFile(zipData)
219    } catch (error) {
220      if (isENOENT(error)) {
221        throw error
222      }
223      const errorMessage = error instanceof Error ? error.message : String(error)
224      throw new Error(`Failed to read or unzip file: ${errorMessage}`)
225    }
226  }