/ tests / pipeline / score.test.js
score.test.js
  1  /**
  2   * Tests for Scoring Module
  3   * Uses Node.js 22+ mock.module() to mock LLM provider for comprehensive testing
  4   */
  5  
  6  import { test, describe, mock, beforeEach, afterEach } from 'node:test';
  7  import assert from 'node:assert';
  8  
  9  // Ensure vision scoring is enabled for tests
 10  // Must be set BEFORE score.js is imported (module captures at load time)
 11  process.env.ENABLE_VISION = 'true';
 12  process.env.USE_COMPUTER_VISION_SCORING = 'true';
 13  process.env.USE_COMPUTER_VISION_RESCORING = 'true';
 14  
 15  // Create mock functions
 16  const callLLMMock = mock.fn();
 17  const getProviderMock = mock.fn(() => 'openrouter');
 18  const getProviderDisplayNameMock = mock.fn(() => 'OpenRouter');
 19  
 20  // Mock llm-provider module BEFORE importing score.js
 21  mock.module('../../src/utils/llm-provider.js', {
 22    namedExports: {
 23      callLLM: callLLMMock,
 24      getProvider: getProviderMock,
 25      getProviderDisplayName: getProviderDisplayNameMock,
 26    },
 27  });
 28  
 29  // Mock llm-usage-tracker to avoid DB writes in tests
 30  mock.module('../../src/utils/llm-usage-tracker.js', {
 31    namedExports: {
 32      logLLMUsage: mock.fn(),
 33    },
 34  });
 35  
 36  // Mock circuit-breaker module to pass through without opening
 37  const mockOpenRouterBreaker = {
 38    fire: async fn => fn(),
 39    opened: false,
 40    halfOpen: false,
 41  };
 42  
 43  mock.module('../../src/utils/circuit-breaker.js', {
 44    namedExports: {
 45      openRouterBreaker: mockOpenRouterBreaker,
 46    },
 47  });
 48  
 49  // Now import score module
 50  const { scoreWebsite, extractGrade, extractScore } = await import('../../src/score.js');
 51  
 52  describe('Score Module', () => {
 53    beforeEach(() => {
 54      callLLMMock.mock.resetCalls();
 55      getProviderMock.mock.resetCalls();
 56      getProviderDisplayNameMock.mock.resetCalls();
 57    });
 58  
 59    describe('extractGrade', () => {
 60      test('extracts letter grade from valid result', () => {
 61        const result = {
 62          overall_calculation: {
 63            letter_grade: 'B+',
 64            conversion_score: 87.5,
 65          },
 66        };
 67  
 68        assert.strictEqual(extractGrade(result), 'B+');
 69      });
 70  
 71      test('returns null for missing grade', () => {
 72        const result = {};
 73        assert.strictEqual(extractGrade(result), null);
 74      });
 75  
 76      test('handles null input', () => {
 77        assert.strictEqual(extractGrade(null), null);
 78      });
 79  
 80      test('handles undefined input', () => {
 81        assert.strictEqual(extractGrade(undefined), null);
 82      });
 83  
 84      test('handles missing overall_calculation', () => {
 85        const result = { some_other_field: 'value' };
 86        assert.strictEqual(extractGrade(result), null);
 87      });
 88  
 89      test('handles null overall_calculation', () => {
 90        const result = { overall_calculation: null };
 91        assert.strictEqual(extractGrade(result), null);
 92      });
 93  
 94      test('extracts all letter grades correctly', () => {
 95        const grades = ['A+', 'A', 'A-', 'B+', 'B', 'B-', 'C+', 'C', 'C-', 'D+', 'D', 'D-', 'F'];
 96  
 97        grades.forEach(grade => {
 98          const result = {
 99            overall_calculation: {
100              letter_grade: grade,
101              conversion_score: 50,
102            },
103          };
104          assert.strictEqual(extractGrade(result), grade);
105        });
106      });
107    });
108  
109    describe('extractScore', () => {
110      test('extracts numeric score from valid result', () => {
111        const result = {
112          overall_calculation: {
113            letter_grade: 'A-',
114            conversion_score: 92.3,
115          },
116        };
117  
118        assert.strictEqual(extractScore(result), 92.3);
119      });
120  
121      test('returns null for missing score', () => {
122        const result = {};
123        assert.strictEqual(extractScore(result), null);
124      });
125  
126      test('handles null input', () => {
127        assert.strictEqual(extractScore(null), null);
128      });
129  
130      test('handles undefined input', () => {
131        assert.strictEqual(extractScore(undefined), null);
132      });
133  
134      test('handles missing overall_calculation', () => {
135        const result = { some_other_field: 'value' };
136        assert.strictEqual(extractScore(result), null);
137      });
138  
139      test('handles null overall_calculation', () => {
140        const result = { overall_calculation: null };
141        assert.strictEqual(extractScore(result), null);
142      });
143  
144      test('handles zero score (returns null due to falsy check)', () => {
145        const result = {
146          overall_calculation: {
147            letter_grade: 'F',
148            conversion_score: 0,
149          },
150        };
151        // Note: Current implementation treats 0 as falsy and returns null
152        assert.strictEqual(extractScore(result), null);
153      });
154  
155      test('handles decimal scores', () => {
156        const scores = [100.0, 95.5, 87.25, 72.75, 50.5, 25.25, 0.5];
157  
158        scores.forEach(score => {
159          const result = {
160            overall_calculation: {
161              letter_grade: 'C',
162              conversion_score: score,
163            },
164          };
165          assert.strictEqual(extractScore(result), score);
166        });
167      });
168  
169      test('handles negative score (edge case)', () => {
170        const result = {
171          overall_calculation: {
172            letter_grade: 'F',
173            conversion_score: -5,
174          },
175        };
176        assert.strictEqual(extractScore(result), -5);
177      });
178  
179      test('handles very high score (edge case)', () => {
180        const result = {
181          overall_calculation: {
182            letter_grade: 'A+',
183            conversion_score: 105,
184          },
185        };
186        assert.strictEqual(extractScore(result), 105);
187      });
188    });
189  
190    describe('Score Validation', () => {
191      test('validates score and grade consistency', () => {
192        const testCases = [
193          { score: 100, expectedGrades: ['A+'] },
194          { score: 95, expectedGrades: ['A+', 'A'] },
195          { score: 85, expectedGrades: ['A-', 'B+'] },
196          { score: 70, expectedGrades: ['B-', 'C+'] },
197          { score: 50, expectedGrades: ['C', 'C-'] },
198          { score: 25, expectedGrades: ['D', 'D-'] },
199          // Note: score=0 is excluded because extractScore treats 0 as falsy and returns null
200        ];
201  
202        testCases.forEach(({ score, expectedGrades }) => {
203          expectedGrades.forEach(grade => {
204            const result = {
205              overall_calculation: {
206                letter_grade: grade,
207                conversion_score: score,
208              },
209            };
210            assert.strictEqual(extractGrade(result), grade);
211            assert.strictEqual(extractScore(result), score);
212          });
213        });
214      });
215    });
216  
217    describe('Grade Thresholds', () => {
218      test('identifies grades requiring resubmit', () => {
219        const lowGrades = ['B-', 'C+', 'C', 'C-', 'D+', 'D', 'D-', 'F'];
220        const highGrades = ['A+', 'A', 'A-', 'B+', 'B'];
221  
222        // Low grades should be recognized
223        lowGrades.forEach(grade => {
224          const result = {
225            overall_calculation: {
226              letter_grade: grade,
227              conversion_score: 50,
228            },
229          };
230          // This validates the grade exists and is extractable
231          assert.strictEqual(extractGrade(result), grade);
232        });
233  
234        // High grades should be recognized
235        highGrades.forEach(grade => {
236          const result = {
237            overall_calculation: {
238              letter_grade: grade,
239              conversion_score: 90,
240            },
241          };
242          assert.strictEqual(extractGrade(result), grade);
243        });
244      });
245    });
246  
247    // Shared factor_scores for high-scoring site (all factors near 10 → grade A+)
248    const highFactorScores = {
249      headline_quality: { score: 9.5 },
250      value_proposition: { score: 9.5 },
251      unique_selling_proposition: { score: 9.5 },
252      call_to_action: { score: 9.5 },
253      urgency_messaging: { score: 9.5 },
254      hook_engagement: { score: 9.5 },
255      trust_signals: { score: 9.5 },
256      imagery_design: { score: 9.5 },
257      offer_clarity: { score: 9.5 },
258      contextual_appropriateness: { score: 9.5 },
259    };
260  
261    // Low-scoring site (all factors near 5 → grade F/D range)
262    const lowFactorScores = {
263      headline_quality: { score: 5.0 },
264      value_proposition: { score: 5.0 },
265      unique_selling_proposition: { score: 5.0 },
266      call_to_action: { score: 5.0 },
267      urgency_messaging: { score: 5.0 },
268      hook_engagement: { score: 5.0 },
269      trust_signals: { score: 5.0 },
270      imagery_design: { score: 5.0 },
271      offer_clarity: { score: 5.0 },
272      contextual_appropriateness: { score: 5.0 },
273    };
274  
275    // Mid-scoring site (all factors ~8.5 → grade B/B+)
276    const midFactorScores = {
277      headline_quality: { score: 8.5 },
278      value_proposition: { score: 8.5 },
279      unique_selling_proposition: { score: 8.5 },
280      call_to_action: { score: 8.5 },
281      urgency_messaging: { score: 8.5 },
282      hook_engagement: { score: 8.5 },
283      trust_signals: { score: 8.5 },
284      imagery_design: { score: 8.5 },
285      offer_clarity: { score: 8.5 },
286      contextual_appropriateness: { score: 8.5 },
287    };
288  
289    describe('scoreWebsite', () => {
290      test('should score website with high grade (no resubmit needed)', async () => {
291        const mockResponse = {
292          content: JSON.stringify({
293            factor_scores: highFactorScores,
294            overall_calculation: { reasoning: 'High quality site' },
295            key_strengths: ['Clear CTA', 'Fast loading'],
296            critical_weaknesses: ['Mobile optimization'],
297          }),
298          usage: {
299            promptTokens: 1000,
300            completionTokens: 500,
301          },
302        };
303  
304        callLLMMock.mock.mockImplementation(async () => mockResponse);
305  
306        const siteData = {
307          url: 'https://example.com',
308          domain: 'example.com',
309          screenshots: {
310            desktop_above: Buffer.from('fake-screenshot'),
311            mobile_above: Buffer.from('fake-screenshot'),
312          },
313          html: '<html><body>Test</body></html>',
314          sslStatus: 'valid',
315          httpHeaders: '{"content-type": "text/html"}',
316        };
317  
318        const result = await scoreWebsite(siteData, 123);
319  
320        assert.ok(result);
321        // Score computed from factor_scores; high scores yield A or A+ grade
322        assert.ok(
323          result.overall_calculation.conversion_score >= 90,
324          'High factors should yield score >= 90'
325        );
326        assert.ok(Array.isArray(result.key_strengths));
327        assert.ok(Array.isArray(result.critical_weaknesses));
328  
329        // Verify LLM was called once (no resubmit for high grades)
330        assert.strictEqual(callLLMMock.mock.calls.length, 1);
331      });
332  
333      test('should resubmit for low grades (B- or below)', async () => {
334        // First call returns low grade (50 = F)
335        const initialResponse = {
336          content: JSON.stringify({
337            factor_scores: lowFactorScores,
338            overall_calculation: { reasoning: 'Poor site' },
339            key_strengths: ['Has contact info'],
340            critical_weaknesses: ['Slow loading', 'Poor design'],
341          }),
342          usage: {
343            promptTokens: 1000,
344            completionTokens: 500,
345          },
346        };
347  
348        // Second call (resubmit) returns updated score — mid factors yield ~85 (B)
349        const resubmitResponse = {
350          content: JSON.stringify({
351            factor_scores: midFactorScores,
352            overall_calculation: { reasoning: 'Better below fold' },
353            key_strengths: ['Has contact info', 'Good below-fold content'],
354            critical_weaknesses: ['Poor design'],
355          }),
356          usage: {
357            promptTokens: 1200,
358            completionTokens: 600,
359          },
360        };
361  
362        let callCount = 0;
363        callLLMMock.mock.mockImplementation(async () => {
364          callCount++;
365          return callCount === 1 ? initialResponse : resubmitResponse;
366        });
367  
368        const siteData = {
369          url: 'https://example.com',
370          domain: 'example.com',
371          screenshots: {
372            desktop_above: Buffer.from('fake-screenshot'),
373            mobile_above: Buffer.from('fake-screenshot'),
374            desktop_below: Buffer.from('fake-screenshot-below'),
375          },
376          screenshotsUncropped: {
377            desktop_below: Buffer.from('fake-screenshot-uncropped'),
378          },
379          html: '<html><body>Test</body></html>',
380          visionText: 'Extracted text from vision',
381          sslStatus: 'valid',
382          httpHeaders: '{"content-type": "text/html"}',
383        };
384  
385        const result = await scoreWebsite(siteData, 123);
386  
387        // Should return resubmit result (higher score from mid factors)
388        assert.ok(result);
389        assert.ok(
390          result.overall_calculation.conversion_score >= 80,
391          'Resubmit result should have higher score'
392        );
393  
394        // Verify LLM was called twice (initial + resubmit)
395        assert.strictEqual(callLLMMock.mock.calls.length, 2);
396      });
397  
398      test('should not resubmit if no below-fold screenshot available', async () => {
399        const mockResponse = {
400          content: JSON.stringify({
401            factor_scores: lowFactorScores,
402            overall_calculation: { reasoning: 'Poor site, no below fold available' },
403          }),
404          usage: {
405            promptTokens: 1000,
406            completionTokens: 500,
407          },
408        };
409  
410        callLLMMock.mock.mockImplementation(async () => mockResponse);
411  
412        const siteData = {
413          url: 'https://example.com',
414          domain: 'example.com',
415          screenshots: {
416            desktop_above: Buffer.from('fake-screenshot'),
417            mobile_above: Buffer.from('fake-screenshot'),
418            // No desktop_below screenshot
419          },
420          html: '<html><body>Test</body></html>',
421          sslStatus: 'valid',
422          httpHeaders: '{"content-type": "text/html"}',
423        };
424  
425        const result = await scoreWebsite(siteData, 123);
426  
427        // Should return initial result (no resubmit possible without below-fold)
428        assert.ok(result);
429        assert.ok(
430          typeof result.overall_calculation.conversion_score === 'number',
431          'Should have computed score'
432        );
433  
434        // Verify LLM was called only once
435        assert.strictEqual(callLLMMock.mock.calls.length, 1);
436      });
437  
438      test('should handle grades at resubmit threshold (B)', async () => {
439        const mockResponse = {
440          content: JSON.stringify({
441            factor_scores: midFactorScores,
442            overall_calculation: { reasoning: 'Good site' },
443          }),
444          usage: {
445            promptTokens: 1000,
446            completionTokens: 500,
447          },
448        };
449  
450        callLLMMock.mock.mockImplementation(async () => mockResponse);
451  
452        const siteData = {
453          url: 'https://example.com',
454          domain: 'example.com',
455          screenshots: {
456            desktop_above: Buffer.from('fake-screenshot'),
457            mobile_above: Buffer.from('fake-screenshot'),
458            desktop_below: Buffer.from('fake-screenshot-below'),
459          },
460          html: '<html><body>Test</body></html>',
461          sslStatus: 'valid',
462          httpHeaders: '{"content-type": "text/html"}',
463        };
464  
465        const result = await scoreWebsite(siteData);
466  
467        // Mid factors (~85) yield B grade — should NOT trigger resubmit (threshold is B- or below)
468        assert.strictEqual(callLLMMock.mock.calls.length, 1);
469        assert.ok(
470          result.overall_calculation.conversion_score >= 83,
471          'Mid factors should yield B or above (>= 83)'
472        );
473      });
474  
475      test('should handle invalid JSON response', async () => {
476        callLLMMock.mock.mockImplementation(async () => ({
477          content: 'Not valid JSON',
478          usage: { promptTokens: 100, completionTokens: 50 },
479        }));
480  
481        const siteData = {
482          url: 'https://example.com',
483          domain: 'example.com',
484          screenshots: {
485            desktop_above: Buffer.from('fake-screenshot'),
486            mobile_above: Buffer.from('fake-screenshot'),
487          },
488          html: '<html><body>Test</body></html>',
489          sslStatus: 'valid',
490          httpHeaders: '{"content-type": "text/html"}',
491        };
492  
493        await assert.rejects(
494          async () => {
495            await scoreWebsite(siteData);
496          },
497          { message: 'Failed to parse JSON response' }
498        );
499      });
500  
501      test('should handle LLM API errors with retry', async () => {
502        // Circuit breaker is already mocked at module level to pass through
503        const siteData = {
504          id: 1,
505          domain: 'example.com',
506          url: 'https://example.com',
507          landing_page_url: 'https://example.com',
508          screenshot_path: './screenshots/test.png',
509          html: '<html><body>Test</body></html>',
510          screenshots: {
511            desktop_above: Buffer.from('fake-screenshot'),
512            mobile_above: Buffer.from('fake-screenshot'),
513          },
514        };
515  
516        let attemptCount = 0;
517        const mockSuccessResponse = {
518          content: JSON.stringify({
519            factor_scores: midFactorScores,
520            overall_calculation: {
521              reasoning: 'Good site',
522            },
523          }),
524          usage: { promptTokens: 100, completionTokens: 50 },
525        };
526  
527        callLLMMock.mock.mockImplementation(async () => {
528          attemptCount++;
529          // maxRetries=3 means 4 total attempts (0,1,2,3), so fail first 3 and succeed on 4th
530          // Use a retryable error (rate limit matches isRetryableError check)
531          if (attemptCount < 4) {
532            throw new Error('rate limit exceeded');
533          }
534          return mockSuccessResponse;
535        });
536  
537        const result = await scoreWebsite(siteData);
538  
539        assert.strictEqual(attemptCount, 4, 'Should retry 3 times before succeeding on 4th attempt');
540        // Score is computed from midFactorScores (all 8.5) — approximately B/B+ range (83-89)
541        assert.ok(
542          typeof result.overall_calculation.conversion_score === 'number',
543          'Should have a computed conversion_score'
544        );
545        assert.ok(
546          result.overall_calculation.conversion_score >= 80,
547          'Mid factors should yield B range score'
548        );
549      });
550  
551      test('should include all required data in LLM request', async () => {
552        let capturedRequest = null;
553        callLLMMock.mock.mockImplementation(async params => {
554          capturedRequest = params;
555          return {
556            content: JSON.stringify({
557              factor_scores: midFactorScores,
558              overall_calculation: {
559                reasoning: 'Good site',
560              },
561            }),
562            usage: { promptTokens: 1000, completionTokens: 500 },
563          };
564        });
565  
566        const siteData = {
567          url: 'https://example.com',
568          domain: 'example.com',
569          screenshots: {
570            desktop_above: Buffer.from('fake-desktop'),
571            mobile_above: Buffer.from('fake-mobile'),
572          },
573          html: '<html><body>Test HTML</body></html>',
574          sslStatus: 'valid',
575          httpHeaders: '{"server": "nginx"}',
576        };
577  
578        await scoreWebsite(siteData);
579  
580        // Verify request structure
581        assert.ok(capturedRequest, 'Should have captured request');
582        assert.ok(capturedRequest.messages, 'Should have messages array');
583        assert.strictEqual(capturedRequest.messages.length, 2); // system + user
584        assert.strictEqual(capturedRequest.messages[0].role, 'system');
585        assert.strictEqual(capturedRequest.messages[1].role, 'user');
586  
587        // Verify user message has text and images
588        const userContent = capturedRequest.messages[1].content;
589        assert.ok(Array.isArray(userContent));
590        assert.ok(userContent.find(c => c.type === 'text'));
591        assert.ok(userContent.find(c => c.type === 'image_url'));
592  
593        // Verify images are base64 encoded
594        const imageContent = userContent.filter(c => c.type === 'image_url');
595        assert.strictEqual(imageContent.length, 2); // desktop + mobile
596        imageContent.forEach(img => {
597          assert.ok(img.image_url.url.startsWith('data:image/jpeg;base64,'));
598        });
599      });
600    });
601  });