/ scripts / dedupe-uncropped-screenshots.js
dedupe-uncropped-screenshots.js
  1  #!/usr/bin/env node
  2  
  3  /**
  4   * Deduplicate Uncropped Screenshots
  5   *
  6   * Compares cropped and uncropped screenshot versions to detect cases where
  7   * cropping had minimal effect. Deletes uncropped versions that are essentially
  8   * identical to their cropped counterparts (after resizing to match dimensions).
  9   *
 10   * This recovers disk space from screenshots captured before the optimization
 11   * that skips storing identical uncropped versions.
 12   */
 13  
 14  import { readdir, stat, unlink } from 'fs/promises';
 15  import { join, dirname } from 'path';
 16  import { fileURLToPath } from 'url';
 17  import sharp from 'sharp';
 18  
 19  const __filename = fileURLToPath(import.meta.url);
 20  const __dirname = dirname(__filename);
 21  
 22  const SCREENSHOTS_DIR = process.env.SCREENSHOT_BASE_PATH || join(__dirname, '..', 'screenshots');
 23  
 24  // Parse command line args
 25  const dryRun = process.argv.includes('--dry-run');
 26  
 27  // Screenshot pairs to check (cropped -> uncropped)
 28  const SCREENSHOT_PAIRS = [
 29    { cropped: 'desktop_above.jpg', uncropped: 'desktop_above_uncropped.jpg' },
 30    { cropped: 'desktop_below.jpg', uncropped: 'desktop_below_uncropped.jpg' },
 31    { cropped: 'mobile_above.jpg', uncropped: 'mobile_above_uncropped.jpg' },
 32  ];
 33  
 34  // Thresholds for considering images identical (same as image-optimizer.js)
 35  const SIZE_DIFF_THRESHOLD_BYTES = 5120; // 5KB
 36  const SIZE_DIFF_THRESHOLD_PERCENT = 1; // 1%
 37  
 38  /**
 39   * Format bytes to human readable format
 40   */
 41  function formatBytes(bytes) {
 42    if (bytes === 0) return '0 Bytes';
 43    const k = 1024;
 44    const sizes = ['Bytes', 'KB', 'MB', 'GB'];
 45    const i = Math.floor(Math.log(bytes) / Math.log(k));
 46    return `${Math.round((bytes / Math.pow(k, i)) * 100) / 100} ${sizes[i]}`;
 47  }
 48  
 49  /**
 50   * Compare cropped and uncropped screenshots
 51   * Resizes uncropped to match cropped dimensions before comparison
 52   * @param {string} croppedPath - Path to cropped screenshot
 53   * @param {string} uncroppedPath - Path to uncropped screenshot
 54   * @returns {Promise<Object>} - Comparison result
 55   */
 56  async function compareScreenshots(croppedPath, uncroppedPath) {
 57    try {
 58      // Load cropped image and get dimensions
 59      const croppedBuffer = await sharp(croppedPath).toBuffer();
 60      const croppedMetadata = await sharp(croppedPath).metadata();
 61  
 62      // Resize uncropped to match cropped dimensions
 63      const resizedUncroppedBuffer = await sharp(uncroppedPath)
 64        .resize(croppedMetadata.width, croppedMetadata.height, {
 65          fit: 'cover',
 66          position: 'entropy',
 67        })
 68        .jpeg({
 69          quality: 85,
 70          mozjpeg: true,
 71        })
 72        .toBuffer();
 73  
 74      // Compare file sizes
 75      const croppedSize = croppedBuffer.length;
 76      const uncroppedSize = resizedUncroppedBuffer.length;
 77      const sizeDiff = Math.abs(croppedSize - uncroppedSize);
 78      const sizeDiffPercent = (sizeDiff / uncroppedSize) * 100;
 79  
 80      const isIdentical =
 81        sizeDiff < SIZE_DIFF_THRESHOLD_BYTES && sizeDiffPercent < SIZE_DIFF_THRESHOLD_PERCENT;
 82  
 83      return {
 84        isIdentical,
 85        croppedSize,
 86        uncroppedSize,
 87        sizeDiff,
 88        sizeDiffPercent,
 89      };
 90    } catch (error) {
 91      return {
 92        error: error.message,
 93      };
 94    }
 95  }
 96  
 97  /**
 98   * Get file size in bytes
 99   */
100  async function getFileSize(filePath) {
101    try {
102      const stats = await stat(filePath);
103      return stats.size;
104    } catch {
105      return 0;
106    }
107  }
108  
109  async function main() {
110    console.log('šŸ” Analyzing screenshot folders for duplicate uncropped versions...\n');
111  
112    // Get all screenshot folders
113    let screenshotFolders;
114    try {
115      screenshotFolders = await readdir(SCREENSHOTS_DIR);
116    } catch (error) {
117      console.error('āŒ Error reading screenshots directory:', error.message);
118      process.exit(1);
119    }
120  
121    console.log(`Found ${screenshotFolders.length} screenshot folders\n`);
122  
123    const stats = {
124      foldersChecked: 0,
125      pairsChecked: 0,
126      identicalFound: 0,
127      differentFound: 0,
128      errors: 0,
129      spaceToFree: 0,
130      filesToDelete: [],
131    };
132  
133    // Check each folder
134    for (const folder of screenshotFolders) {
135      const folderPath = join(SCREENSHOTS_DIR, folder);
136  
137      // Skip if not a directory
138      try {
139        const folderStats = await stat(folderPath);
140        if (!folderStats.isDirectory()) {
141          continue;
142        }
143      } catch {
144        continue;
145      }
146  
147      stats.foldersChecked++;
148  
149      // Check each screenshot pair
150      for (const pair of SCREENSHOT_PAIRS) {
151        const croppedPath = join(folderPath, pair.cropped);
152        const uncroppedPath = join(folderPath, pair.uncropped);
153  
154        // Skip if either file doesn't exist
155        try {
156          await stat(croppedPath);
157          await stat(uncroppedPath);
158        } catch {
159          continue;
160        }
161  
162        stats.pairsChecked++;
163  
164        // Compare screenshots
165        const comparison = await compareScreenshots(croppedPath, uncroppedPath);
166  
167        if (comparison.error) {
168          stats.errors++;
169          continue;
170        }
171  
172        if (comparison.isIdentical) {
173          stats.identicalFound++;
174          const fileSize = await getFileSize(uncroppedPath);
175          stats.spaceToFree += fileSize;
176          stats.filesToDelete.push({
177            path: uncroppedPath,
178            size: fileSize,
179            folder,
180            filename: pair.uncropped,
181            sizeDiff: comparison.sizeDiff,
182            sizeDiffPercent: comparison.sizeDiffPercent,
183          });
184        } else {
185          stats.differentFound++;
186        }
187      }
188  
189      // Progress indicator every 100 folders
190      if (stats.foldersChecked % 100 === 0) {
191        console.log(`  Checked ${stats.foldersChecked}/${screenshotFolders.length} folders...`);
192      }
193    }
194  
195    console.log('\nšŸ“Š Analysis Results:');
196    console.log(`   Folders checked: ${stats.foldersChecked}`);
197    console.log(`   Screenshot pairs checked: ${stats.pairsChecked}`);
198    console.log(`   Identical uncropped found: ${stats.identicalFound}`);
199    console.log(`   Different uncropped found: ${stats.differentFound}`);
200    console.log(`   Errors: ${stats.errors}`);
201    console.log(`\nšŸ’¾ Space to be freed: ${formatBytes(stats.spaceToFree)}`);
202  
203    if (stats.identicalFound === 0) {
204      console.log('\nāœ… No duplicate uncropped versions found!');
205      return;
206    }
207  
208    if (dryRun) {
209      console.log('\nšŸ” DRY RUN MODE - No files will be deleted\n');
210      console.log('Sample files that would be deleted:');
211      stats.filesToDelete.slice(0, 10).forEach(file => {
212        console.log(
213          `  - ${file.folder}/${file.filename} (${formatBytes(file.size)}, diff: ${Math.round(file.sizeDiff / 1024)}KB / ${file.sizeDiffPercent.toFixed(1)}%)`
214        );
215      });
216      if (stats.filesToDelete.length > 10) {
217        console.log(`  ... and ${stats.filesToDelete.length - 10} more`);
218      }
219      console.log('\nRun without --dry-run to actually delete these files');
220    } else {
221      console.log('\nšŸ—‘ļø  Deleting duplicate uncropped screenshots...');
222  
223      let deleted = 0;
224      let deleteErrors = 0;
225  
226      for (const file of stats.filesToDelete) {
227        try {
228          await unlink(file.path);
229          deleted++;
230          if (deleted % 100 === 0) {
231            console.log(`  Deleted ${deleted}/${stats.filesToDelete.length} files...`);
232          }
233        } catch (error) {
234          deleteErrors++;
235          console.error(`  āŒ Error deleting ${file.path}: ${error.message}`);
236        }
237      }
238  
239      console.log('\nāœ… Cleanup complete!');
240      console.log(`   Files deleted: ${deleted}`);
241      console.log(`   Errors: ${deleteErrors}`);
242      console.log(`   Space freed: ${formatBytes(stats.spaceToFree)}`);
243    }
244  }
245  
246  main().catch(error => {
247    console.error('āŒ Fatal error:', error);
248    process.exit(1);
249  });