dedupe-uncropped-screenshots.js
1 #!/usr/bin/env node 2 3 /** 4 * Deduplicate Uncropped Screenshots 5 * 6 * Compares cropped and uncropped screenshot versions to detect cases where 7 * cropping had minimal effect. Deletes uncropped versions that are essentially 8 * identical to their cropped counterparts (after resizing to match dimensions). 9 * 10 * This recovers disk space from screenshots captured before the optimization 11 * that skips storing identical uncropped versions. 12 */ 13 14 import { readdir, stat, unlink } from 'fs/promises'; 15 import { join, dirname } from 'path'; 16 import { fileURLToPath } from 'url'; 17 import sharp from 'sharp'; 18 19 const __filename = fileURLToPath(import.meta.url); 20 const __dirname = dirname(__filename); 21 22 const SCREENSHOTS_DIR = process.env.SCREENSHOT_BASE_PATH || join(__dirname, '..', 'screenshots'); 23 24 // Parse command line args 25 const dryRun = process.argv.includes('--dry-run'); 26 27 // Screenshot pairs to check (cropped -> uncropped) 28 const SCREENSHOT_PAIRS = [ 29 { cropped: 'desktop_above.jpg', uncropped: 'desktop_above_uncropped.jpg' }, 30 { cropped: 'desktop_below.jpg', uncropped: 'desktop_below_uncropped.jpg' }, 31 { cropped: 'mobile_above.jpg', uncropped: 'mobile_above_uncropped.jpg' }, 32 ]; 33 34 // Thresholds for considering images identical (same as image-optimizer.js) 35 const SIZE_DIFF_THRESHOLD_BYTES = 5120; // 5KB 36 const SIZE_DIFF_THRESHOLD_PERCENT = 1; // 1% 37 38 /** 39 * Format bytes to human readable format 40 */ 41 function formatBytes(bytes) { 42 if (bytes === 0) return '0 Bytes'; 43 const k = 1024; 44 const sizes = ['Bytes', 'KB', 'MB', 'GB']; 45 const i = Math.floor(Math.log(bytes) / Math.log(k)); 46 return `${Math.round((bytes / Math.pow(k, i)) * 100) / 100} ${sizes[i]}`; 47 } 48 49 /** 50 * Compare cropped and uncropped screenshots 51 * Resizes uncropped to match cropped dimensions before comparison 52 * @param {string} croppedPath - Path to cropped screenshot 53 * @param {string} uncroppedPath - Path to uncropped screenshot 54 * @returns {Promise<Object>} - Comparison result 55 */ 56 async function compareScreenshots(croppedPath, uncroppedPath) { 57 try { 58 // Load cropped image and get dimensions 59 const croppedBuffer = await sharp(croppedPath).toBuffer(); 60 const croppedMetadata = await sharp(croppedPath).metadata(); 61 62 // Resize uncropped to match cropped dimensions 63 const resizedUncroppedBuffer = await sharp(uncroppedPath) 64 .resize(croppedMetadata.width, croppedMetadata.height, { 65 fit: 'cover', 66 position: 'entropy', 67 }) 68 .jpeg({ 69 quality: 85, 70 mozjpeg: true, 71 }) 72 .toBuffer(); 73 74 // Compare file sizes 75 const croppedSize = croppedBuffer.length; 76 const uncroppedSize = resizedUncroppedBuffer.length; 77 const sizeDiff = Math.abs(croppedSize - uncroppedSize); 78 const sizeDiffPercent = (sizeDiff / uncroppedSize) * 100; 79 80 const isIdentical = 81 sizeDiff < SIZE_DIFF_THRESHOLD_BYTES && sizeDiffPercent < SIZE_DIFF_THRESHOLD_PERCENT; 82 83 return { 84 isIdentical, 85 croppedSize, 86 uncroppedSize, 87 sizeDiff, 88 sizeDiffPercent, 89 }; 90 } catch (error) { 91 return { 92 error: error.message, 93 }; 94 } 95 } 96 97 /** 98 * Get file size in bytes 99 */ 100 async function getFileSize(filePath) { 101 try { 102 const stats = await stat(filePath); 103 return stats.size; 104 } catch { 105 return 0; 106 } 107 } 108 109 async function main() { 110 console.log('š Analyzing screenshot folders for duplicate uncropped versions...\n'); 111 112 // Get all screenshot folders 113 let screenshotFolders; 114 try { 115 screenshotFolders = await readdir(SCREENSHOTS_DIR); 116 } catch (error) { 117 console.error('ā Error reading screenshots directory:', error.message); 118 process.exit(1); 119 } 120 121 console.log(`Found ${screenshotFolders.length} screenshot folders\n`); 122 123 const stats = { 124 foldersChecked: 0, 125 pairsChecked: 0, 126 identicalFound: 0, 127 differentFound: 0, 128 errors: 0, 129 spaceToFree: 0, 130 filesToDelete: [], 131 }; 132 133 // Check each folder 134 for (const folder of screenshotFolders) { 135 const folderPath = join(SCREENSHOTS_DIR, folder); 136 137 // Skip if not a directory 138 try { 139 const folderStats = await stat(folderPath); 140 if (!folderStats.isDirectory()) { 141 continue; 142 } 143 } catch { 144 continue; 145 } 146 147 stats.foldersChecked++; 148 149 // Check each screenshot pair 150 for (const pair of SCREENSHOT_PAIRS) { 151 const croppedPath = join(folderPath, pair.cropped); 152 const uncroppedPath = join(folderPath, pair.uncropped); 153 154 // Skip if either file doesn't exist 155 try { 156 await stat(croppedPath); 157 await stat(uncroppedPath); 158 } catch { 159 continue; 160 } 161 162 stats.pairsChecked++; 163 164 // Compare screenshots 165 const comparison = await compareScreenshots(croppedPath, uncroppedPath); 166 167 if (comparison.error) { 168 stats.errors++; 169 continue; 170 } 171 172 if (comparison.isIdentical) { 173 stats.identicalFound++; 174 const fileSize = await getFileSize(uncroppedPath); 175 stats.spaceToFree += fileSize; 176 stats.filesToDelete.push({ 177 path: uncroppedPath, 178 size: fileSize, 179 folder, 180 filename: pair.uncropped, 181 sizeDiff: comparison.sizeDiff, 182 sizeDiffPercent: comparison.sizeDiffPercent, 183 }); 184 } else { 185 stats.differentFound++; 186 } 187 } 188 189 // Progress indicator every 100 folders 190 if (stats.foldersChecked % 100 === 0) { 191 console.log(` Checked ${stats.foldersChecked}/${screenshotFolders.length} folders...`); 192 } 193 } 194 195 console.log('\nš Analysis Results:'); 196 console.log(` Folders checked: ${stats.foldersChecked}`); 197 console.log(` Screenshot pairs checked: ${stats.pairsChecked}`); 198 console.log(` Identical uncropped found: ${stats.identicalFound}`); 199 console.log(` Different uncropped found: ${stats.differentFound}`); 200 console.log(` Errors: ${stats.errors}`); 201 console.log(`\nš¾ Space to be freed: ${formatBytes(stats.spaceToFree)}`); 202 203 if (stats.identicalFound === 0) { 204 console.log('\nā No duplicate uncropped versions found!'); 205 return; 206 } 207 208 if (dryRun) { 209 console.log('\nš DRY RUN MODE - No files will be deleted\n'); 210 console.log('Sample files that would be deleted:'); 211 stats.filesToDelete.slice(0, 10).forEach(file => { 212 console.log( 213 ` - ${file.folder}/${file.filename} (${formatBytes(file.size)}, diff: ${Math.round(file.sizeDiff / 1024)}KB / ${file.sizeDiffPercent.toFixed(1)}%)` 214 ); 215 }); 216 if (stats.filesToDelete.length > 10) { 217 console.log(` ... and ${stats.filesToDelete.length - 10} more`); 218 } 219 console.log('\nRun without --dry-run to actually delete these files'); 220 } else { 221 console.log('\nšļø Deleting duplicate uncropped screenshots...'); 222 223 let deleted = 0; 224 let deleteErrors = 0; 225 226 for (const file of stats.filesToDelete) { 227 try { 228 await unlink(file.path); 229 deleted++; 230 if (deleted % 100 === 0) { 231 console.log(` Deleted ${deleted}/${stats.filesToDelete.length} files...`); 232 } 233 } catch (error) { 234 deleteErrors++; 235 console.error(` ā Error deleting ${file.path}: ${error.message}`); 236 } 237 } 238 239 console.log('\nā Cleanup complete!'); 240 console.log(` Files deleted: ${deleted}`); 241 console.log(` Errors: ${deleteErrors}`); 242 console.log(` Space freed: ${formatBytes(stats.spaceToFree)}`); 243 } 244 } 245 246 main().catch(error => { 247 console.error('ā Fatal error:', error); 248 process.exit(1); 249 });