error-page-detector.test.js
1 /** 2 * Tests for src/utils/error-page-detector.js 3 * 4 * Covers: extractVisibleText, countWords, detectErrorPage, detectUnderConstruction. 5 */ 6 7 import { test, describe } from 'node:test'; 8 import assert from 'node:assert/strict'; 9 10 import { 11 detectErrorPage, 12 detectUnderConstruction, 13 extractVisibleText, 14 countWords, 15 ERROR_INDICATORS, 16 } from '../../src/utils/error-page-detector.js'; 17 18 // ─── extractVisibleText ─────────────────────────────────────────────────────── 19 20 describe('extractVisibleText', () => { 21 test('strips script tags', () => { 22 const html = '<html><script>alert("x");</script><p>Hello world</p></html>'; 23 const text = extractVisibleText(html); 24 assert.ok(!text.includes('alert')); 25 assert.ok(text.includes('Hello world')); 26 }); 27 28 test('strips style tags', () => { 29 const html = '<html><style>body { color: red; }</style><p>Visible</p></html>'; 30 const text = extractVisibleText(html); 31 assert.ok(!text.includes('color')); 32 assert.ok(text.includes('Visible')); 33 }); 34 35 test('strips noscript tags', () => { 36 const html = '<html><noscript>JS required</noscript><p>Content</p></html>'; 37 const text = extractVisibleText(html); 38 assert.ok(!text.includes('JS required')); 39 assert.ok(text.includes('Content')); 40 }); 41 42 test('strips iframe tags', () => { 43 const html = '<html><iframe src="x.html">Frame content</iframe><p>Real</p></html>'; 44 const text = extractVisibleText(html); 45 assert.ok(!text.includes('Frame content')); 46 assert.ok(text.includes('Real')); 47 }); 48 49 test('strips all HTML tags', () => { 50 const html = '<div><p>Hello <strong>world</strong></p></div>'; 51 const text = extractVisibleText(html); 52 assert.ok(!text.includes('<')); 53 assert.ok(text.includes('Hello')); 54 assert.ok(text.includes('world')); 55 }); 56 57 test('normalizes whitespace', () => { 58 const html = '<p> too many spaces </p>'; 59 const text = extractVisibleText(html); 60 assert.ok(!text.includes(' ')); 61 }); 62 63 test('returns empty string for empty html', () => { 64 const text = extractVisibleText(''); 65 assert.equal(text, ''); 66 }); 67 }); 68 69 // ─── countWords ─────────────────────────────────────────────────────────────── 70 71 describe('countWords', () => { 72 test('counts simple words', () => { 73 assert.equal(countWords('hello world foo'), 3); 74 }); 75 76 test('returns 0 for empty string', () => { 77 assert.equal(countWords(''), 0); 78 }); 79 80 test('returns 0 for null', () => { 81 assert.equal(countWords(null), 0); 82 }); 83 84 test('returns 0 for undefined', () => { 85 assert.equal(countWords(undefined), 0); 86 }); 87 88 test('handles multiple spaces between words', () => { 89 assert.equal(countWords('one two three'), 3); 90 }); 91 92 test('returns 0 for whitespace-only string', () => { 93 assert.equal(countWords(' '), 0); 94 }); 95 96 test('counts single word', () => { 97 assert.equal(countWords('hello'), 1); 98 }); 99 }); 100 101 // ─── detectErrorPage ───────────────────────────────────────────────────────── 102 103 describe('detectErrorPage', () => { 104 test('returns isErrorPage=false for non-2xx status codes', () => { 105 const result = detectErrorPage('<html>error</html>', 301); 106 assert.equal(result.isErrorPage, false); 107 }); 108 109 test('returns isErrorPage=false for 404 status (non-2xx)', () => { 110 const result = detectErrorPage('<html>not found</html>', 404); 111 assert.equal(result.isErrorPage, false); 112 }); 113 114 test('returns isErrorPage=false for pages with >=200 words', () => { 115 const manyWords = Array(250).fill('word').join(' '); 116 const html = `<p>${manyWords}</p>`; 117 const result = detectErrorPage(html, 200); 118 assert.equal(result.isErrorPage, false); 119 assert.ok(result.wordCount >= 200); 120 }); 121 122 test('detects 403 Forbidden on sparse page', () => { 123 const html = '<html><body><p>403 Forbidden</p></body></html>'; 124 const result = detectErrorPage(html, 200); 125 assert.equal(result.isErrorPage, true); 126 assert.ok(result.indicator.includes('403')); 127 }); 128 129 test('detects Access Denied on sparse page', () => { 130 const html = '<html><body><p>Access Denied</p></body></html>'; 131 const result = detectErrorPage(html, 200); 132 assert.equal(result.isErrorPage, true); 133 }); 134 135 test('detects HTTP ERROR on sparse page', () => { 136 const html = '<html><body><p>HTTP ERROR 500</p></body></html>'; 137 const result = detectErrorPage(html, 200); 138 assert.equal(result.isErrorPage, true); 139 }); 140 141 test('returns isErrorPage=false for normal sparse page without error indicators', () => { 142 const html = '<html><body><p>Welcome to our site</p></body></html>'; 143 const result = detectErrorPage(html, 200); 144 assert.equal(result.isErrorPage, false); 145 }); 146 147 test('result includes wordCount and httpStatusCode when error detected', () => { 148 const html = '<html><body><p>Access Denied</p></body></html>'; 149 const result = detectErrorPage(html, 200); 150 assert.ok('wordCount' in result); 151 assert.ok('httpStatusCode' in result); 152 }); 153 154 test('works with 201 status code (2xx)', () => { 155 const html = '<html><body><p>403 Forbidden</p></body></html>'; 156 const result = detectErrorPage(html, 201); 157 assert.equal(result.isErrorPage, true); 158 }); 159 }); 160 161 // ─── detectUnderConstruction ────────────────────────────────────────────────── 162 163 describe('detectUnderConstruction', () => { 164 test('detects "under construction" phrase', () => { 165 const html = '<html><body><h1>Under Construction</h1></body></html>'; 166 const result = detectUnderConstruction(html); 167 assert.equal(result.isUnderConstruction, true); 168 assert.ok(result.phrase.toLowerCase().includes('under construction')); 169 }); 170 171 test('detects "coming soon" phrase', () => { 172 const html = '<html><body><h1>Coming Soon</h1><p>We are launching soon.</p></body></html>'; 173 const result = detectUnderConstruction(html); 174 assert.equal(result.isUnderConstruction, true); 175 }); 176 177 test('detects "parked domain"', () => { 178 const html = '<html><body><p>This is a parked domain</p></body></html>'; 179 const result = detectUnderConstruction(html); 180 assert.equal(result.isUnderConstruction, true); 181 }); 182 183 test('returns false for normal page', () => { 184 const html = 185 '<html><body><h1>Welcome to Our Plumbing Service</h1><p>We fix pipes.</p></body></html>'; 186 const result = detectUnderConstruction(html); 187 assert.equal(result.isUnderConstruction, false); 188 }); 189 190 test('ignores "coming soon" when inside parentheses in nav', () => { 191 const html = 192 '<html><nav><ul><li>New Service (coming soon)</li></ul></nav><p>We are open.</p></html>'; 193 const result = detectUnderConstruction(html); 194 // "(coming soon)" inside nav should be stripped, so NOT under construction 195 assert.equal(result.isUnderConstruction, false); 196 }); 197 198 test('detects phrase from pageTitle', () => { 199 const html = '<html><body><p>Welcome.</p></body></html>'; 200 const result = detectUnderConstruction(html, 'Coming Soon'); 201 assert.equal(result.isUnderConstruction, true); 202 }); 203 204 test('detects "domain is for sale"', () => { 205 const html = '<html><body><p>This domain is for sale</p></body></html>'; 206 const result = detectUnderConstruction(html); 207 assert.equal(result.isUnderConstruction, true); 208 }); 209 210 test('result includes reason string', () => { 211 const html = '<html><body><p>normal page</p></body></html>'; 212 const result = detectUnderConstruction(html); 213 assert.ok(typeof result.reason === 'string'); 214 }); 215 }); 216 217 // ─── ERROR_INDICATORS ───────────────────────────────────────────────────────── 218 219 describe('ERROR_INDICATORS', () => { 220 test('is an array', () => { 221 assert.ok(Array.isArray(ERROR_INDICATORS)); 222 }); 223 224 test('contains common HTTP error strings', () => { 225 assert.ok(ERROR_INDICATORS.includes('403 Forbidden')); 226 assert.ok(ERROR_INDICATORS.includes('404 Not Found')); 227 assert.ok(ERROR_INDICATORS.includes('Access Denied')); 228 }); 229 });