error-page-detector.test.js
1 /** 2 * Tests for Error Page Detector 3 */ 4 5 import { test } from 'node:test'; 6 import assert from 'node:assert'; 7 import { 8 detectErrorPage, 9 extractVisibleText, 10 countWords, 11 } from '../../src/utils/error-page-detector.js'; 12 13 test('extractVisibleText - removes scripts and styles', () => { 14 const html = ` 15 <html> 16 <head> 17 <style>body { color: red; }</style> 18 <script>console.log('test');</script> 19 </head> 20 <body> 21 <h1>HTTP ERROR 403</h1> 22 <p>Permission Denied</p> 23 <script>alert('hidden');</script> 24 </body> 25 </html> 26 `; 27 28 const text = extractVisibleText(html); 29 assert.ok(text.includes('HTTP ERROR 403')); 30 assert.ok(text.includes('Permission Denied')); 31 assert.ok(!text.includes('console.log')); 32 assert.ok(!text.includes('alert')); 33 assert.ok(!text.includes('color: red')); 34 }); 35 36 test('extractVisibleText - normalizes whitespace', () => { 37 const html = ` 38 <html> 39 <body> 40 <h1>Title</h1> 41 42 <p>Paragraph with spaces</p> 43 </body> 44 </html> 45 `; 46 47 const text = extractVisibleText(html); 48 // Multiple spaces/newlines should be collapsed to single space 49 assert.ok(!text.includes(' ')); 50 assert.ok(text.includes('Title')); 51 assert.ok(text.includes('Paragraph with spaces')); 52 }); 53 54 test('countWords - counts words correctly', () => { 55 assert.strictEqual(countWords('Hello world'), 2); 56 assert.strictEqual(countWords('One two three four five'), 5); 57 assert.strictEqual(countWords(''), 0); 58 assert.strictEqual(countWords(' '), 0); 59 assert.strictEqual(countWords('Single'), 1); 60 assert.strictEqual(countWords('Multiple spaces between'), 3); 61 }); 62 63 test('detectErrorPage - detects HTTP ERROR 403', () => { 64 const html = ` 65 <html> 66 <body> 67 <h1>HTTP ERROR 403</h1> 68 <p>Access Denied</p> 69 </body> 70 </html> 71 `; 72 73 const result = detectErrorPage(html, 200); 74 assert.strictEqual(result.isErrorPage, true); 75 assert.ok(result.reason.includes('HTTP ERROR')); 76 assert.ok(result.wordCount < 200); 77 assert.strictEqual(result.httpStatusCode, 200); 78 }); 79 80 test('detectErrorPage - detects Permission Denied', () => { 81 const html = ` 82 <html> 83 <body> 84 <h1>Permission Denied</h1> 85 <p>You do not have access to this resource.</p> 86 </body> 87 </html> 88 `; 89 90 const result = detectErrorPage(html, 200); 91 assert.strictEqual(result.isErrorPage, true); 92 assert.ok(result.reason.includes('Permission Denied')); 93 }); 94 95 test('detectErrorPage - detects "You don\'t have authorisation to view this page"', () => { 96 const html = ` 97 <html> 98 <body> 99 <h1>Error</h1> 100 <p>You don't have authorisation to view this page.</p> 101 </body> 102 </html> 103 `; 104 105 const result = detectErrorPage(html, 200); 106 assert.strictEqual(result.isErrorPage, true); 107 assert.ok(result.reason.includes("You don't have authorisation")); 108 }); 109 110 test('detectErrorPage - detects 403 Forbidden', () => { 111 const html = ` 112 <html> 113 <body> 114 <h1>403 Forbidden</h1> 115 <p>Access to this resource is denied.</p> 116 </body> 117 </html> 118 `; 119 120 const result = detectErrorPage(html, 200); 121 assert.strictEqual(result.isErrorPage, true); 122 assert.ok(result.reason.includes('Forbidden') || result.reason.includes('403')); 123 }); 124 125 test('detectErrorPage - does not flag pages with 200+ words', () => { 126 // Generate HTML with lots of content (>200 words) 127 const words = Array(250) 128 .fill('word') 129 .map((w, i) => `${w}${i}`) 130 .join(' '); 131 const html = ` 132 <html> 133 <body> 134 <h1>HTTP ERROR</h1> 135 <p>${words}</p> 136 </body> 137 </html> 138 `; 139 140 const result = detectErrorPage(html, 200); 141 assert.strictEqual(result.isErrorPage, false); 142 assert.ok(result.reason.includes('>= 200 word threshold')); 143 assert.ok(result.wordCount >= 200); 144 }); 145 146 test('detectErrorPage - does not flag legitimate pages with few words', () => { 147 const html = ` 148 <html> 149 <body> 150 <h1>Welcome</h1> 151 <p>Contact us today for a free quote.</p> 152 <button>Get Started</button> 153 </body> 154 </html> 155 `; 156 157 const result = detectErrorPage(html, 200); 158 assert.strictEqual(result.isErrorPage, false); 159 assert.ok(result.reason.includes('No error indicators found')); 160 }); 161 162 test('detectErrorPage - only checks 2xx status codes', () => { 163 const html = ` 164 <html> 165 <body> 166 <h1>HTTP ERROR 403</h1> 167 <p>Permission Denied</p> 168 </body> 169 </html> 170 `; 171 172 // Should not check 4xx status codes (not false positives) 173 const result403 = detectErrorPage(html, 403); 174 assert.strictEqual(result403.isErrorPage, false); 175 assert.ok(result403.reason.includes('Non-2xx status code')); 176 177 // Should not check 5xx status codes 178 const result500 = detectErrorPage(html, 500); 179 assert.strictEqual(result500.isErrorPage, false); 180 assert.ok(result500.reason.includes('Non-2xx status code')); 181 182 // Should check 2xx status codes 183 const result200 = detectErrorPage(html, 200); 184 assert.strictEqual(result200.isErrorPage, true); 185 }); 186 187 test('detectErrorPage - case insensitive detection', () => { 188 const html = ` 189 <html> 190 <body> 191 <h1>http error</h1> 192 <p>permission denied</p> 193 </body> 194 </html> 195 `; 196 197 const result = detectErrorPage(html, 200); 198 assert.strictEqual(result.isErrorPage, true); 199 }); 200 201 test('detectErrorPage - detects Access Denied', () => { 202 const html = ` 203 <html> 204 <body> 205 <h1>Access Denied</h1> 206 <p>You do not have permission to view this page.</p> 207 </body> 208 </html> 209 `; 210 211 const result = detectErrorPage(html, 200); 212 assert.strictEqual(result.isErrorPage, true); 213 assert.ok(result.reason.includes('Access Denied') || result.reason.includes('Access denied')); 214 }); 215 216 test('detectErrorPage - detects Service Unavailable', () => { 217 const html = ` 218 <html> 219 <body> 220 <h1>Service Unavailable</h1> 221 <p>Please try again later.</p> 222 </body> 223 </html> 224 `; 225 226 const result = detectErrorPage(html, 200); 227 assert.strictEqual(result.isErrorPage, true); 228 assert.ok(result.reason.includes('Service Unavailable')); 229 }); 230 231 test('detectErrorPage - detects 404 Not Found', () => { 232 const html = ` 233 <html> 234 <body> 235 <h1>404 Not Found</h1> 236 <p>The page you are looking for does not exist.</p> 237 </body> 238 </html> 239 `; 240 241 const result = detectErrorPage(html, 200); 242 assert.strictEqual(result.isErrorPage, true); 243 assert.ok(result.reason.includes('404 Not Found') || result.reason.includes('Not Found')); 244 }); 245 246 test('countWords - handles null and undefined', () => { 247 assert.strictEqual(countWords(null), 0); 248 assert.strictEqual(countWords(undefined), 0); 249 assert.strictEqual(countWords(''), 0); 250 }); 251 252 test('extractVisibleText - handles empty HTML', () => { 253 assert.strictEqual(extractVisibleText(''), ''); 254 assert.strictEqual(extractVisibleText('<html></html>'), ''); 255 assert.strictEqual(extractVisibleText('<html><body></body></html>'), ''); 256 }); 257 258 test('extractVisibleText - handles malformed HTML gracefully', () => { 259 const text = extractVisibleText('<div>Unclosed div'); 260 // JSDOM should handle this gracefully 261 assert.ok(typeof text === 'string'); 262 });