/ tests / utils / error-page-detector.test.js
error-page-detector.test.js
  1  /**
  2   * Tests for src/utils/error-page-detector.js
  3   *
  4   * Covers: extractVisibleText, countWords, detectErrorPage, detectUnderConstruction.
  5   */
  6  
  7  import { test, describe } from 'node:test';
  8  import assert from 'node:assert/strict';
  9  
 10  import {
 11    detectErrorPage,
 12    detectUnderConstruction,
 13    extractVisibleText,
 14    countWords,
 15    ERROR_INDICATORS,
 16  } from '../../src/utils/error-page-detector.js';
 17  
 18  // ─── extractVisibleText ───────────────────────────────────────────────────────
 19  
 20  describe('extractVisibleText', () => {
 21    test('strips script tags', () => {
 22      const html = '<html><script>alert("x");</script><p>Hello world</p></html>';
 23      const text = extractVisibleText(html);
 24      assert.ok(!text.includes('alert'));
 25      assert.ok(text.includes('Hello world'));
 26    });
 27  
 28    test('strips style tags', () => {
 29      const html = '<html><style>body { color: red; }</style><p>Visible</p></html>';
 30      const text = extractVisibleText(html);
 31      assert.ok(!text.includes('color'));
 32      assert.ok(text.includes('Visible'));
 33    });
 34  
 35    test('strips noscript tags', () => {
 36      const html = '<html><noscript>JS required</noscript><p>Content</p></html>';
 37      const text = extractVisibleText(html);
 38      assert.ok(!text.includes('JS required'));
 39      assert.ok(text.includes('Content'));
 40    });
 41  
 42    test('strips iframe tags', () => {
 43      const html = '<html><iframe src="x.html">Frame content</iframe><p>Real</p></html>';
 44      const text = extractVisibleText(html);
 45      assert.ok(!text.includes('Frame content'));
 46      assert.ok(text.includes('Real'));
 47    });
 48  
 49    test('strips all HTML tags', () => {
 50      const html = '<div><p>Hello <strong>world</strong></p></div>';
 51      const text = extractVisibleText(html);
 52      assert.ok(!text.includes('<'));
 53      assert.ok(text.includes('Hello'));
 54      assert.ok(text.includes('world'));
 55    });
 56  
 57    test('normalizes whitespace', () => {
 58      const html = '<p>  too   many   spaces  </p>';
 59      const text = extractVisibleText(html);
 60      assert.ok(!text.includes('   '));
 61    });
 62  
 63    test('returns empty string for empty html', () => {
 64      const text = extractVisibleText('');
 65      assert.equal(text, '');
 66    });
 67  });
 68  
 69  // ─── countWords ───────────────────────────────────────────────────────────────
 70  
 71  describe('countWords', () => {
 72    test('counts simple words', () => {
 73      assert.equal(countWords('hello world foo'), 3);
 74    });
 75  
 76    test('returns 0 for empty string', () => {
 77      assert.equal(countWords(''), 0);
 78    });
 79  
 80    test('returns 0 for null', () => {
 81      assert.equal(countWords(null), 0);
 82    });
 83  
 84    test('returns 0 for undefined', () => {
 85      assert.equal(countWords(undefined), 0);
 86    });
 87  
 88    test('handles multiple spaces between words', () => {
 89      assert.equal(countWords('one   two   three'), 3);
 90    });
 91  
 92    test('returns 0 for whitespace-only string', () => {
 93      assert.equal(countWords('   '), 0);
 94    });
 95  
 96    test('counts single word', () => {
 97      assert.equal(countWords('hello'), 1);
 98    });
 99  });
100  
101  // ─── detectErrorPage ─────────────────────────────────────────────────────────
102  
103  describe('detectErrorPage', () => {
104    test('returns isErrorPage=false for non-2xx status codes', () => {
105      const result = detectErrorPage('<html>error</html>', 301);
106      assert.equal(result.isErrorPage, false);
107    });
108  
109    test('returns isErrorPage=false for 404 status (non-2xx)', () => {
110      const result = detectErrorPage('<html>not found</html>', 404);
111      assert.equal(result.isErrorPage, false);
112    });
113  
114    test('returns isErrorPage=false for pages with >=200 words', () => {
115      const manyWords = Array(250).fill('word').join(' ');
116      const html = `<p>${manyWords}</p>`;
117      const result = detectErrorPage(html, 200);
118      assert.equal(result.isErrorPage, false);
119      assert.ok(result.wordCount >= 200);
120    });
121  
122    test('detects 403 Forbidden on sparse page', () => {
123      const html = '<html><body><p>403 Forbidden</p></body></html>';
124      const result = detectErrorPage(html, 200);
125      assert.equal(result.isErrorPage, true);
126      assert.ok(result.indicator.includes('403'));
127    });
128  
129    test('detects Access Denied on sparse page', () => {
130      const html = '<html><body><p>Access Denied</p></body></html>';
131      const result = detectErrorPage(html, 200);
132      assert.equal(result.isErrorPage, true);
133    });
134  
135    test('detects HTTP ERROR on sparse page', () => {
136      const html = '<html><body><p>HTTP ERROR 500</p></body></html>';
137      const result = detectErrorPage(html, 200);
138      assert.equal(result.isErrorPage, true);
139    });
140  
141    test('returns isErrorPage=false for normal sparse page without error indicators', () => {
142      const html = '<html><body><p>Welcome to our site</p></body></html>';
143      const result = detectErrorPage(html, 200);
144      assert.equal(result.isErrorPage, false);
145    });
146  
147    test('result includes wordCount and httpStatusCode when error detected', () => {
148      const html = '<html><body><p>Access Denied</p></body></html>';
149      const result = detectErrorPage(html, 200);
150      assert.ok('wordCount' in result);
151      assert.ok('httpStatusCode' in result);
152    });
153  
154    test('works with 201 status code (2xx)', () => {
155      const html = '<html><body><p>403 Forbidden</p></body></html>';
156      const result = detectErrorPage(html, 201);
157      assert.equal(result.isErrorPage, true);
158    });
159  });
160  
161  // ─── detectUnderConstruction ──────────────────────────────────────────────────
162  
163  describe('detectUnderConstruction', () => {
164    test('detects "under construction" phrase', () => {
165      const html = '<html><body><h1>Under Construction</h1></body></html>';
166      const result = detectUnderConstruction(html);
167      assert.equal(result.isUnderConstruction, true);
168      assert.ok(result.phrase.toLowerCase().includes('under construction'));
169    });
170  
171    test('detects "coming soon" phrase', () => {
172      const html = '<html><body><h1>Coming Soon</h1><p>We are launching soon.</p></body></html>';
173      const result = detectUnderConstruction(html);
174      assert.equal(result.isUnderConstruction, true);
175    });
176  
177    test('detects "parked domain"', () => {
178      const html = '<html><body><p>This is a parked domain</p></body></html>';
179      const result = detectUnderConstruction(html);
180      assert.equal(result.isUnderConstruction, true);
181    });
182  
183    test('returns false for normal page', () => {
184      const html =
185        '<html><body><h1>Welcome to Our Plumbing Service</h1><p>We fix pipes.</p></body></html>';
186      const result = detectUnderConstruction(html);
187      assert.equal(result.isUnderConstruction, false);
188    });
189  
190    test('ignores "coming soon" when inside parentheses in nav', () => {
191      const html =
192        '<html><nav><ul><li>New Service (coming soon)</li></ul></nav><p>We are open.</p></html>';
193      const result = detectUnderConstruction(html);
194      // "(coming soon)" inside nav should be stripped, so NOT under construction
195      assert.equal(result.isUnderConstruction, false);
196    });
197  
198    test('detects phrase from pageTitle', () => {
199      const html = '<html><body><p>Welcome.</p></body></html>';
200      const result = detectUnderConstruction(html, 'Coming Soon');
201      assert.equal(result.isUnderConstruction, true);
202    });
203  
204    test('detects "domain is for sale"', () => {
205      const html = '<html><body><p>This domain is for sale</p></body></html>';
206      const result = detectUnderConstruction(html);
207      assert.equal(result.isUnderConstruction, true);
208    });
209  
210    test('result includes reason string', () => {
211      const html = '<html><body><p>normal page</p></body></html>';
212      const result = detectUnderConstruction(html);
213      assert.ok(typeof result.reason === 'string');
214    });
215  });
216  
217  // ─── ERROR_INDICATORS ─────────────────────────────────────────────────────────
218  
219  describe('ERROR_INDICATORS', () => {
220    test('is an array', () => {
221      assert.ok(Array.isArray(ERROR_INDICATORS));
222    });
223  
224    test('contains common HTTP error strings', () => {
225      assert.ok(ERROR_INDICATORS.includes('403 Forbidden'));
226      assert.ok(ERROR_INDICATORS.includes('404 Not Found'));
227      assert.ok(ERROR_INDICATORS.includes('Access Denied'));
228    });
229  });