test-sharp-autocrop.js
1 #!/usr/bin/env node 2 3 /** 4 * Test Sharp Autocrop Methods 5 * 6 * Compares different Sharp autocrop strategies on existing screenshots: 7 * 1. Entropy strategy (focus on complex/interesting content) 8 * 2. Attention strategy (focus on salient features) 9 * 3. Trim method (remove uniform borders) 10 * 4. Current DOM-based cropping (for comparison) 11 * 12 * Generates test output in screenshots/autocrop-tests/{siteId}/ 13 */ 14 15 import sharp from 'sharp'; 16 import { readdir, stat, mkdir } from 'fs/promises'; 17 import { join, dirname } from 'path'; 18 import { fileURLToPath } from 'url'; 19 20 const __filename = fileURLToPath(import.meta.url); 21 const __dirname = dirname(__filename); 22 23 const SCREENSHOTS_DIR = process.env.SCREENSHOT_BASE_PATH || join(__dirname, '..', 'screenshots'); 24 const OUTPUT_DIR = join(SCREENSHOTS_DIR, 'autocrop-tests'); 25 26 // Parse command line args 27 const siteId = process.argv[2]; 28 const limit = parseInt(process.argv[3]) || 5; 29 30 /** 31 * Target dimensions for cropped screenshots (same as current system) 32 */ 33 const TARGET_DIMENSIONS = { 34 desktop_above: { width: 1024, height: 768 }, 35 desktop_below: { width: 1024, height: 768 }, 36 mobile_above: { width: 375, height: 667 }, 37 }; 38 39 /** 40 * Apply entropy-based cropping 41 */ 42 async function cropWithEntropy(inputPath, outputPath, targetWidth, targetHeight) { 43 const metadata = await sharp(inputPath).metadata(); 44 45 const buffer = await sharp(inputPath) 46 .resize(targetWidth, null, { 47 fit: 'inside', // Preserve aspect ratio like current system 48 position: 'entropy', // Focus on most complex content 49 withoutEnlargement: true, 50 }) 51 .jpeg({ quality: 85, mozjpeg: true }) 52 .toBuffer(); 53 54 const resultMeta = await sharp(buffer).metadata(); 55 await sharp(buffer).toFile(outputPath); 56 57 const stats = await stat(outputPath); 58 return { 59 method: 'entropy', 60 originalSize: `${metadata.width}x${metadata.height}`, 61 croppedSize: `${resultMeta.width}x${resultMeta.height}`, 62 fileSize: stats.size, 63 }; 64 } 65 66 /** 67 * Apply attention-based cropping 68 */ 69 async function cropWithAttention(inputPath, outputPath, targetWidth, targetHeight) { 70 const metadata = await sharp(inputPath).metadata(); 71 72 const buffer = await sharp(inputPath) 73 .resize(targetWidth, null, { 74 fit: 'inside', // Preserve aspect ratio like current system 75 position: 'attention', // Focus on salient features 76 withoutEnlargement: true, 77 }) 78 .jpeg({ quality: 85, mozjpeg: true }) 79 .toBuffer(); 80 81 const resultMeta = await sharp(buffer).metadata(); 82 await sharp(buffer).toFile(outputPath); 83 84 const stats = await stat(outputPath); 85 return { 86 method: 'attention', 87 originalSize: `${metadata.width}x${metadata.height}`, 88 croppedSize: `${resultMeta.width}x${resultMeta.height}`, 89 fileSize: stats.size, 90 }; 91 } 92 93 /** 94 * Apply trim-based autocrop (removes uniform borders) 95 */ 96 async function cropWithTrim(inputPath, outputPath, targetWidth, targetHeight) { 97 const metadata = await sharp(inputPath).metadata(); 98 99 // First trim uniform borders, then resize to target 100 const trimmed = await sharp(inputPath) 101 .trim({ 102 threshold: 10, // Trim pixels within 10 units of border color 103 }) 104 .toBuffer(); 105 106 const trimmedMeta = await sharp(trimmed).metadata(); 107 108 const buffer = await sharp(trimmed) 109 .resize(targetWidth, null, { 110 fit: 'inside', // Preserve aspect ratio like current system 111 withoutEnlargement: true, 112 }) 113 .jpeg({ quality: 85, mozjpeg: true }) 114 .toBuffer(); 115 116 const resultMeta = await sharp(buffer).metadata(); 117 await sharp(buffer).toFile(outputPath); 118 119 const stats = await stat(outputPath); 120 return { 121 method: 'trim', 122 originalSize: `${metadata.width}x${metadata.height}`, 123 trimmedSize: `${trimmedMeta.width}x${trimmedMeta.height}`, 124 croppedSize: `${resultMeta.width}x${resultMeta.height}`, 125 fileSize: stats.size, 126 }; 127 } 128 129 /** 130 * Simple center crop (baseline) 131 */ 132 async function cropWithCenter(inputPath, outputPath, targetWidth, targetHeight) { 133 const metadata = await sharp(inputPath).metadata(); 134 135 const buffer = await sharp(inputPath) 136 .resize(targetWidth, null, { 137 fit: 'inside', // Preserve aspect ratio like current system 138 withoutEnlargement: true, 139 }) 140 .jpeg({ quality: 85, mozjpeg: true }) 141 .toBuffer(); 142 143 const resultMeta = await sharp(buffer).metadata(); 144 await sharp(buffer).toFile(outputPath); 145 146 const stats = await stat(outputPath); 147 return { 148 method: 'center', 149 originalSize: `${metadata.width}x${metadata.height}`, 150 croppedSize: `${resultMeta.width}x${resultMeta.height}`, 151 fileSize: stats.size, 152 }; 153 } 154 155 /** 156 * Test all autocrop methods on a single screenshot 157 */ 158 async function testScreenshot(siteId, filename) { 159 const inputPath = join(SCREENSHOTS_DIR, siteId, filename); 160 const outputDir = join(OUTPUT_DIR, siteId); 161 162 // Skip if file doesn't exist 163 try { 164 await stat(inputPath); 165 } catch { 166 return null; 167 } 168 169 // Create output directory 170 await mkdir(outputDir, { recursive: true }); 171 172 // Determine target dimensions based on filename 173 let targetDims; 174 if (filename.includes('desktop_above')) { 175 targetDims = TARGET_DIMENSIONS.desktop_above; 176 } else if (filename.includes('desktop_below')) { 177 targetDims = TARGET_DIMENSIONS.desktop_below; 178 } else if (filename.includes('mobile_above')) { 179 targetDims = TARGET_DIMENSIONS.mobile_above; 180 } else { 181 return null; // Skip non-uncropped files 182 } 183 184 const baseName = filename.replace('.jpg', ''); 185 186 console.log(`\n Testing ${filename}...`); 187 188 const results = []; 189 190 // Test entropy 191 try { 192 const entropyPath = join(outputDir, `${baseName}_entropy.jpg`); 193 const result = await cropWithEntropy( 194 inputPath, 195 entropyPath, 196 targetDims.width, 197 targetDims.height 198 ); 199 results.push(result); 200 console.log(` ✓ Entropy: ${result.fileSize} bytes`); 201 } catch (error) { 202 console.log(` ✗ Entropy failed: ${error.message}`); 203 } 204 205 // Test attention 206 try { 207 const attentionPath = join(outputDir, `${baseName}_attention.jpg`); 208 const result = await cropWithAttention( 209 inputPath, 210 attentionPath, 211 targetDims.width, 212 targetDims.height 213 ); 214 results.push(result); 215 console.log(` ✓ Attention: ${result.fileSize} bytes`); 216 } catch (error) { 217 console.log(` ✗ Attention failed: ${error.message}`); 218 } 219 220 // Test trim 221 try { 222 const trimPath = join(outputDir, `${baseName}_trim.jpg`); 223 const result = await cropWithTrim(inputPath, trimPath, targetDims.width, targetDims.height); 224 results.push(result); 225 console.log(` ✓ Trim: ${result.fileSize} bytes (trimmed to ${result.trimmedSize})`); 226 } catch (error) { 227 console.log(` ✗ Trim failed: ${error.message}`); 228 } 229 230 // Test center (baseline) 231 try { 232 const centerPath = join(outputDir, `${baseName}_center.jpg`); 233 const result = await cropWithCenter(inputPath, centerPath, targetDims.width, targetDims.height); 234 results.push(result); 235 console.log(` ✓ Center: ${result.fileSize} bytes`); 236 } catch (error) { 237 console.log(` ✗ Center failed: ${error.message}`); 238 } 239 240 // Copy current cropped version for comparison (if exists) 241 const currentCroppedPath = join( 242 SCREENSHOTS_DIR, 243 siteId, 244 `${baseName.replace('_uncropped', '')}.jpg` 245 ); 246 try { 247 await stat(currentCroppedPath); 248 const currentStats = await stat(currentCroppedPath); 249 const currentMeta = await sharp(currentCroppedPath).metadata(); 250 results.push({ 251 method: 'current_dom', 252 originalSize: 'N/A', 253 croppedSize: `${currentMeta.width}x${currentMeta.height}`, 254 fileSize: currentStats.size, 255 }); 256 console.log(` ✓ Current DOM-based: ${currentStats.size} bytes`); 257 258 // Copy to output dir for visual comparison 259 await sharp(currentCroppedPath).toFile(join(outputDir, `${baseName}_current_dom.jpg`)); 260 } catch { 261 // Current cropped version doesn't exist 262 } 263 264 return { 265 filename, 266 results, 267 }; 268 } 269 270 /** 271 * Test all screenshots for a site 272 */ 273 async function testSite(siteId) { 274 console.log(`\nTesting site ${siteId}...`); 275 276 const uncroppedFiles = [ 277 'desktop_above_uncropped.jpg', 278 'desktop_below_uncropped.jpg', 279 'mobile_above_uncropped.jpg', 280 ]; 281 282 const results = []; 283 284 for (const filename of uncroppedFiles) { 285 const result = await testScreenshot(siteId, filename); 286 if (result) { 287 results.push(result); 288 } 289 } 290 291 return results; 292 } 293 294 async function main() { 295 console.log('🔬 Testing Sharp Autocrop Methods\n'); 296 console.log('Methods to test:'); 297 console.log(' 1. Entropy - Focus on most complex/interesting content'); 298 console.log(' 2. Attention - Focus on salient features (faces, high contrast)'); 299 console.log(' 3. Trim - Remove uniform borders, then crop'); 300 console.log(' 4. Center - Simple center crop (baseline)'); 301 console.log(' 5. Current DOM-based - Our existing approach (for comparison)\n'); 302 303 let sitesToTest = []; 304 305 if (siteId) { 306 // Test specific site 307 sitesToTest = [siteId]; 308 } else { 309 // Test random sites with uncropped screenshots 310 console.log(`Scanning for sites with uncropped screenshots (limit: ${limit})...`); 311 312 const folders = await readdir(SCREENSHOTS_DIR); 313 314 for (const folder of folders) { 315 if (sitesToTest.length >= limit) break; 316 317 const folderPath = join(SCREENSHOTS_DIR, folder); 318 319 try { 320 const folderStats = await stat(folderPath); 321 if (!folderStats.isDirectory()) continue; 322 323 // Check if has uncropped files 324 const hasUncropped = await stat(join(folderPath, 'desktop_above_uncropped.jpg')) 325 .then(() => true) 326 .catch(() => false); 327 328 if (hasUncropped) { 329 sitesToTest.push(folder); 330 } 331 } catch { 332 continue; 333 } 334 } 335 } 336 337 console.log(`\nTesting ${sitesToTest.length} sites...\n`); 338 339 const allResults = []; 340 341 for (const site of sitesToTest) { 342 const results = await testSite(site); 343 allResults.push({ siteId: site, results }); 344 } 345 346 // Print summary 347 console.log('\n\n📊 Summary\n'); 348 console.log(`Tested ${sitesToTest.length} sites`); 349 console.log(`Output directory: ${OUTPUT_DIR}\n`); 350 console.log('Next steps:'); 351 console.log(' 1. Browse to output directory and visually compare results'); 352 console.log(' 2. Look for which method best preserves important content (CTAs, trust signals)'); 353 console.log(' 3. Compare file sizes and visual quality'); 354 console.log(' 4. Decide if Sharp autocrop can replace DOM-based cropping\n'); 355 console.log('Visual comparison tips:'); 356 console.log(' - Entropy tends to focus on detailed/textured areas'); 357 console.log(' - Attention focuses on high-contrast salient features'); 358 console.log(' - Trim removes borders but may not crop intelligently'); 359 console.log(' - Center is simple but may miss important content'); 360 console.log(' - Current DOM-based preserves headers, CTAs, and trust signals\n'); 361 } 362 363 main().catch(error => { 364 console.error('❌ Fatal error:', error); 365 process.exit(1); 366 });