/ tests / pipeline / error-page-detector.test.js
error-page-detector.test.js
  1  /**
  2   * Tests for Error Page Detector
  3   */
  4  
  5  import { test } from 'node:test';
  6  import assert from 'node:assert';
  7  import {
  8    detectErrorPage,
  9    extractVisibleText,
 10    countWords,
 11  } from '../../src/utils/error-page-detector.js';
 12  
 13  test('extractVisibleText - removes scripts and styles', () => {
 14    const html = `
 15      <html>
 16        <head>
 17          <style>body { color: red; }</style>
 18          <script>console.log('test');</script>
 19        </head>
 20        <body>
 21          <h1>HTTP ERROR 403</h1>
 22          <p>Permission Denied</p>
 23          <script>alert('hidden');</script>
 24        </body>
 25      </html>
 26    `;
 27  
 28    const text = extractVisibleText(html);
 29    assert.ok(text.includes('HTTP ERROR 403'));
 30    assert.ok(text.includes('Permission Denied'));
 31    assert.ok(!text.includes('console.log'));
 32    assert.ok(!text.includes('alert'));
 33    assert.ok(!text.includes('color: red'));
 34  });
 35  
 36  test('extractVisibleText - normalizes whitespace', () => {
 37    const html = `
 38      <html>
 39        <body>
 40          <h1>Title</h1>
 41  
 42          <p>Paragraph   with   spaces</p>
 43        </body>
 44      </html>
 45    `;
 46  
 47    const text = extractVisibleText(html);
 48    // Multiple spaces/newlines should be collapsed to single space
 49    assert.ok(!text.includes('  '));
 50    assert.ok(text.includes('Title'));
 51    assert.ok(text.includes('Paragraph with spaces'));
 52  });
 53  
 54  test('countWords - counts words correctly', () => {
 55    assert.strictEqual(countWords('Hello world'), 2);
 56    assert.strictEqual(countWords('One two three four five'), 5);
 57    assert.strictEqual(countWords(''), 0);
 58    assert.strictEqual(countWords('   '), 0);
 59    assert.strictEqual(countWords('Single'), 1);
 60    assert.strictEqual(countWords('Multiple   spaces   between'), 3);
 61  });
 62  
 63  test('detectErrorPage - detects HTTP ERROR 403', () => {
 64    const html = `
 65      <html>
 66        <body>
 67          <h1>HTTP ERROR 403</h1>
 68          <p>Access Denied</p>
 69        </body>
 70      </html>
 71    `;
 72  
 73    const result = detectErrorPage(html, 200);
 74    assert.strictEqual(result.isErrorPage, true);
 75    assert.ok(result.reason.includes('HTTP ERROR'));
 76    assert.ok(result.wordCount < 200);
 77    assert.strictEqual(result.httpStatusCode, 200);
 78  });
 79  
 80  test('detectErrorPage - detects Permission Denied', () => {
 81    const html = `
 82      <html>
 83        <body>
 84          <h1>Permission Denied</h1>
 85          <p>You do not have access to this resource.</p>
 86        </body>
 87      </html>
 88    `;
 89  
 90    const result = detectErrorPage(html, 200);
 91    assert.strictEqual(result.isErrorPage, true);
 92    assert.ok(result.reason.includes('Permission Denied'));
 93  });
 94  
 95  test('detectErrorPage - detects "You don\'t have authorisation to view this page"', () => {
 96    const html = `
 97      <html>
 98        <body>
 99          <h1>Error</h1>
100          <p>You don't have authorisation to view this page.</p>
101        </body>
102      </html>
103    `;
104  
105    const result = detectErrorPage(html, 200);
106    assert.strictEqual(result.isErrorPage, true);
107    assert.ok(result.reason.includes("You don't have authorisation"));
108  });
109  
110  test('detectErrorPage - detects 403 Forbidden', () => {
111    const html = `
112      <html>
113        <body>
114          <h1>403 Forbidden</h1>
115          <p>Access to this resource is denied.</p>
116        </body>
117      </html>
118    `;
119  
120    const result = detectErrorPage(html, 200);
121    assert.strictEqual(result.isErrorPage, true);
122    assert.ok(result.reason.includes('Forbidden') || result.reason.includes('403'));
123  });
124  
125  test('detectErrorPage - does not flag pages with 200+ words', () => {
126    // Generate HTML with lots of content (>200 words)
127    const words = Array(250)
128      .fill('word')
129      .map((w, i) => `${w}${i}`)
130      .join(' ');
131    const html = `
132      <html>
133        <body>
134          <h1>HTTP ERROR</h1>
135          <p>${words}</p>
136        </body>
137      </html>
138    `;
139  
140    const result = detectErrorPage(html, 200);
141    assert.strictEqual(result.isErrorPage, false);
142    assert.ok(result.reason.includes('>= 200 word threshold'));
143    assert.ok(result.wordCount >= 200);
144  });
145  
146  test('detectErrorPage - does not flag legitimate pages with few words', () => {
147    const html = `
148      <html>
149        <body>
150          <h1>Welcome</h1>
151          <p>Contact us today for a free quote.</p>
152          <button>Get Started</button>
153        </body>
154      </html>
155    `;
156  
157    const result = detectErrorPage(html, 200);
158    assert.strictEqual(result.isErrorPage, false);
159    assert.ok(result.reason.includes('No error indicators found'));
160  });
161  
162  test('detectErrorPage - only checks 2xx status codes', () => {
163    const html = `
164      <html>
165        <body>
166          <h1>HTTP ERROR 403</h1>
167          <p>Permission Denied</p>
168        </body>
169      </html>
170    `;
171  
172    // Should not check 4xx status codes (not false positives)
173    const result403 = detectErrorPage(html, 403);
174    assert.strictEqual(result403.isErrorPage, false);
175    assert.ok(result403.reason.includes('Non-2xx status code'));
176  
177    // Should not check 5xx status codes
178    const result500 = detectErrorPage(html, 500);
179    assert.strictEqual(result500.isErrorPage, false);
180    assert.ok(result500.reason.includes('Non-2xx status code'));
181  
182    // Should check 2xx status codes
183    const result200 = detectErrorPage(html, 200);
184    assert.strictEqual(result200.isErrorPage, true);
185  });
186  
187  test('detectErrorPage - case insensitive detection', () => {
188    const html = `
189      <html>
190        <body>
191          <h1>http error</h1>
192          <p>permission denied</p>
193        </body>
194      </html>
195    `;
196  
197    const result = detectErrorPage(html, 200);
198    assert.strictEqual(result.isErrorPage, true);
199  });
200  
201  test('detectErrorPage - detects Access Denied', () => {
202    const html = `
203      <html>
204        <body>
205          <h1>Access Denied</h1>
206          <p>You do not have permission to view this page.</p>
207        </body>
208      </html>
209    `;
210  
211    const result = detectErrorPage(html, 200);
212    assert.strictEqual(result.isErrorPage, true);
213    assert.ok(result.reason.includes('Access Denied') || result.reason.includes('Access denied'));
214  });
215  
216  test('detectErrorPage - detects Service Unavailable', () => {
217    const html = `
218      <html>
219        <body>
220          <h1>Service Unavailable</h1>
221          <p>Please try again later.</p>
222        </body>
223      </html>
224    `;
225  
226    const result = detectErrorPage(html, 200);
227    assert.strictEqual(result.isErrorPage, true);
228    assert.ok(result.reason.includes('Service Unavailable'));
229  });
230  
231  test('detectErrorPage - detects 404 Not Found', () => {
232    const html = `
233      <html>
234        <body>
235          <h1>404 Not Found</h1>
236          <p>The page you are looking for does not exist.</p>
237        </body>
238      </html>
239    `;
240  
241    const result = detectErrorPage(html, 200);
242    assert.strictEqual(result.isErrorPage, true);
243    assert.ok(result.reason.includes('404 Not Found') || result.reason.includes('Not Found'));
244  });
245  
246  test('countWords - handles null and undefined', () => {
247    assert.strictEqual(countWords(null), 0);
248    assert.strictEqual(countWords(undefined), 0);
249    assert.strictEqual(countWords(''), 0);
250  });
251  
252  test('extractVisibleText - handles empty HTML', () => {
253    assert.strictEqual(extractVisibleText(''), '');
254    assert.strictEqual(extractVisibleText('<html></html>'), '');
255    assert.strictEqual(extractVisibleText('<html><body></body></html>'), '');
256  });
257  
258  test('extractVisibleText - handles malformed HTML gracefully', () => {
259    const text = extractVisibleText('<div>Unclosed div');
260    // JSDOM should handle this gracefully
261    assert.ok(typeof text === 'string');
262  });