score.test.js
1 /** 2 * Tests for Scoring Module 3 * Uses Node.js 22+ mock.module() to mock LLM provider for comprehensive testing 4 */ 5 6 import { test, describe, mock, beforeEach, afterEach } from 'node:test'; 7 import assert from 'node:assert'; 8 9 // Ensure vision scoring is enabled for tests 10 // Must be set BEFORE score.js is imported (module captures at load time) 11 process.env.ENABLE_VISION = 'true'; 12 process.env.USE_COMPUTER_VISION_SCORING = 'true'; 13 process.env.USE_COMPUTER_VISION_RESCORING = 'true'; 14 15 // Create mock functions 16 const callLLMMock = mock.fn(); 17 const getProviderMock = mock.fn(() => 'openrouter'); 18 const getProviderDisplayNameMock = mock.fn(() => 'OpenRouter'); 19 20 // Mock llm-provider module BEFORE importing score.js 21 mock.module('../../src/utils/llm-provider.js', { 22 namedExports: { 23 callLLM: callLLMMock, 24 getProvider: getProviderMock, 25 getProviderDisplayName: getProviderDisplayNameMock, 26 }, 27 }); 28 29 // Mock llm-usage-tracker to avoid DB writes in tests 30 mock.module('../../src/utils/llm-usage-tracker.js', { 31 namedExports: { 32 logLLMUsage: mock.fn(), 33 }, 34 }); 35 36 // Mock circuit-breaker module to pass through without opening 37 const mockOpenRouterBreaker = { 38 fire: async fn => fn(), 39 opened: false, 40 halfOpen: false, 41 }; 42 43 mock.module('../../src/utils/circuit-breaker.js', { 44 namedExports: { 45 openRouterBreaker: mockOpenRouterBreaker, 46 }, 47 }); 48 49 // Now import score module 50 const { scoreWebsite, extractGrade, extractScore } = await import('../../src/score.js'); 51 52 describe('Score Module', () => { 53 beforeEach(() => { 54 callLLMMock.mock.resetCalls(); 55 getProviderMock.mock.resetCalls(); 56 getProviderDisplayNameMock.mock.resetCalls(); 57 }); 58 59 describe('extractGrade', () => { 60 test('extracts letter grade from valid result', () => { 61 const result = { 62 overall_calculation: { 63 letter_grade: 'B+', 64 conversion_score: 87.5, 65 }, 66 }; 67 68 assert.strictEqual(extractGrade(result), 'B+'); 69 }); 70 71 test('returns null for missing grade', () => { 72 const result = {}; 73 assert.strictEqual(extractGrade(result), null); 74 }); 75 76 test('handles null input', () => { 77 assert.strictEqual(extractGrade(null), null); 78 }); 79 80 test('handles undefined input', () => { 81 assert.strictEqual(extractGrade(undefined), null); 82 }); 83 84 test('handles missing overall_calculation', () => { 85 const result = { some_other_field: 'value' }; 86 assert.strictEqual(extractGrade(result), null); 87 }); 88 89 test('handles null overall_calculation', () => { 90 const result = { overall_calculation: null }; 91 assert.strictEqual(extractGrade(result), null); 92 }); 93 94 test('extracts all letter grades correctly', () => { 95 const grades = ['A+', 'A', 'A-', 'B+', 'B', 'B-', 'C+', 'C', 'C-', 'D+', 'D', 'D-', 'F']; 96 97 grades.forEach(grade => { 98 const result = { 99 overall_calculation: { 100 letter_grade: grade, 101 conversion_score: 50, 102 }, 103 }; 104 assert.strictEqual(extractGrade(result), grade); 105 }); 106 }); 107 }); 108 109 describe('extractScore', () => { 110 test('extracts numeric score from valid result', () => { 111 const result = { 112 overall_calculation: { 113 letter_grade: 'A-', 114 conversion_score: 92.3, 115 }, 116 }; 117 118 assert.strictEqual(extractScore(result), 92.3); 119 }); 120 121 test('returns null for missing score', () => { 122 const result = {}; 123 assert.strictEqual(extractScore(result), null); 124 }); 125 126 test('handles null input', () => { 127 assert.strictEqual(extractScore(null), null); 128 }); 129 130 test('handles undefined input', () => { 131 assert.strictEqual(extractScore(undefined), null); 132 }); 133 134 test('handles missing overall_calculation', () => { 135 const result = { some_other_field: 'value' }; 136 assert.strictEqual(extractScore(result), null); 137 }); 138 139 test('handles null overall_calculation', () => { 140 const result = { overall_calculation: null }; 141 assert.strictEqual(extractScore(result), null); 142 }); 143 144 test('handles zero score (returns null due to falsy check)', () => { 145 const result = { 146 overall_calculation: { 147 letter_grade: 'F', 148 conversion_score: 0, 149 }, 150 }; 151 // Note: Current implementation treats 0 as falsy and returns null 152 assert.strictEqual(extractScore(result), null); 153 }); 154 155 test('handles decimal scores', () => { 156 const scores = [100.0, 95.5, 87.25, 72.75, 50.5, 25.25, 0.5]; 157 158 scores.forEach(score => { 159 const result = { 160 overall_calculation: { 161 letter_grade: 'C', 162 conversion_score: score, 163 }, 164 }; 165 assert.strictEqual(extractScore(result), score); 166 }); 167 }); 168 169 test('handles negative score (edge case)', () => { 170 const result = { 171 overall_calculation: { 172 letter_grade: 'F', 173 conversion_score: -5, 174 }, 175 }; 176 assert.strictEqual(extractScore(result), -5); 177 }); 178 179 test('handles very high score (edge case)', () => { 180 const result = { 181 overall_calculation: { 182 letter_grade: 'A+', 183 conversion_score: 105, 184 }, 185 }; 186 assert.strictEqual(extractScore(result), 105); 187 }); 188 }); 189 190 describe('Score Validation', () => { 191 test('validates score and grade consistency', () => { 192 const testCases = [ 193 { score: 100, expectedGrades: ['A+'] }, 194 { score: 95, expectedGrades: ['A+', 'A'] }, 195 { score: 85, expectedGrades: ['A-', 'B+'] }, 196 { score: 70, expectedGrades: ['B-', 'C+'] }, 197 { score: 50, expectedGrades: ['C', 'C-'] }, 198 { score: 25, expectedGrades: ['D', 'D-'] }, 199 // Note: score=0 is excluded because extractScore treats 0 as falsy and returns null 200 ]; 201 202 testCases.forEach(({ score, expectedGrades }) => { 203 expectedGrades.forEach(grade => { 204 const result = { 205 overall_calculation: { 206 letter_grade: grade, 207 conversion_score: score, 208 }, 209 }; 210 assert.strictEqual(extractGrade(result), grade); 211 assert.strictEqual(extractScore(result), score); 212 }); 213 }); 214 }); 215 }); 216 217 describe('Grade Thresholds', () => { 218 test('identifies grades requiring resubmit', () => { 219 const lowGrades = ['B-', 'C+', 'C', 'C-', 'D+', 'D', 'D-', 'F']; 220 const highGrades = ['A+', 'A', 'A-', 'B+', 'B']; 221 222 // Low grades should be recognized 223 lowGrades.forEach(grade => { 224 const result = { 225 overall_calculation: { 226 letter_grade: grade, 227 conversion_score: 50, 228 }, 229 }; 230 // This validates the grade exists and is extractable 231 assert.strictEqual(extractGrade(result), grade); 232 }); 233 234 // High grades should be recognized 235 highGrades.forEach(grade => { 236 const result = { 237 overall_calculation: { 238 letter_grade: grade, 239 conversion_score: 90, 240 }, 241 }; 242 assert.strictEqual(extractGrade(result), grade); 243 }); 244 }); 245 }); 246 247 // Shared factor_scores for high-scoring site (all factors near 10 → grade A+) 248 const highFactorScores = { 249 headline_quality: { score: 9.5 }, 250 value_proposition: { score: 9.5 }, 251 unique_selling_proposition: { score: 9.5 }, 252 call_to_action: { score: 9.5 }, 253 urgency_messaging: { score: 9.5 }, 254 hook_engagement: { score: 9.5 }, 255 trust_signals: { score: 9.5 }, 256 imagery_design: { score: 9.5 }, 257 offer_clarity: { score: 9.5 }, 258 contextual_appropriateness: { score: 9.5 }, 259 }; 260 261 // Low-scoring site (all factors near 5 → grade F/D range) 262 const lowFactorScores = { 263 headline_quality: { score: 5.0 }, 264 value_proposition: { score: 5.0 }, 265 unique_selling_proposition: { score: 5.0 }, 266 call_to_action: { score: 5.0 }, 267 urgency_messaging: { score: 5.0 }, 268 hook_engagement: { score: 5.0 }, 269 trust_signals: { score: 5.0 }, 270 imagery_design: { score: 5.0 }, 271 offer_clarity: { score: 5.0 }, 272 contextual_appropriateness: { score: 5.0 }, 273 }; 274 275 // Mid-scoring site (all factors ~8.5 → grade B/B+) 276 const midFactorScores = { 277 headline_quality: { score: 8.5 }, 278 value_proposition: { score: 8.5 }, 279 unique_selling_proposition: { score: 8.5 }, 280 call_to_action: { score: 8.5 }, 281 urgency_messaging: { score: 8.5 }, 282 hook_engagement: { score: 8.5 }, 283 trust_signals: { score: 8.5 }, 284 imagery_design: { score: 8.5 }, 285 offer_clarity: { score: 8.5 }, 286 contextual_appropriateness: { score: 8.5 }, 287 }; 288 289 describe('scoreWebsite', () => { 290 test('should score website with high grade (no resubmit needed)', async () => { 291 const mockResponse = { 292 content: JSON.stringify({ 293 factor_scores: highFactorScores, 294 overall_calculation: { reasoning: 'High quality site' }, 295 key_strengths: ['Clear CTA', 'Fast loading'], 296 critical_weaknesses: ['Mobile optimization'], 297 }), 298 usage: { 299 promptTokens: 1000, 300 completionTokens: 500, 301 }, 302 }; 303 304 callLLMMock.mock.mockImplementation(async () => mockResponse); 305 306 const siteData = { 307 url: 'https://example.com', 308 domain: 'example.com', 309 screenshots: { 310 desktop_above: Buffer.from('fake-screenshot'), 311 mobile_above: Buffer.from('fake-screenshot'), 312 }, 313 html: '<html><body>Test</body></html>', 314 sslStatus: 'valid', 315 httpHeaders: '{"content-type": "text/html"}', 316 }; 317 318 const result = await scoreWebsite(siteData, 123); 319 320 assert.ok(result); 321 // Score computed from factor_scores; high scores yield A or A+ grade 322 assert.ok( 323 result.overall_calculation.conversion_score >= 90, 324 'High factors should yield score >= 90' 325 ); 326 assert.ok(Array.isArray(result.key_strengths)); 327 assert.ok(Array.isArray(result.critical_weaknesses)); 328 329 // Verify LLM was called once (no resubmit for high grades) 330 assert.strictEqual(callLLMMock.mock.calls.length, 1); 331 }); 332 333 test('should resubmit for low grades (B- or below)', async () => { 334 // First call returns low grade (50 = F) 335 const initialResponse = { 336 content: JSON.stringify({ 337 factor_scores: lowFactorScores, 338 overall_calculation: { reasoning: 'Poor site' }, 339 key_strengths: ['Has contact info'], 340 critical_weaknesses: ['Slow loading', 'Poor design'], 341 }), 342 usage: { 343 promptTokens: 1000, 344 completionTokens: 500, 345 }, 346 }; 347 348 // Second call (resubmit) returns updated score — mid factors yield ~85 (B) 349 const resubmitResponse = { 350 content: JSON.stringify({ 351 factor_scores: midFactorScores, 352 overall_calculation: { reasoning: 'Better below fold' }, 353 key_strengths: ['Has contact info', 'Good below-fold content'], 354 critical_weaknesses: ['Poor design'], 355 }), 356 usage: { 357 promptTokens: 1200, 358 completionTokens: 600, 359 }, 360 }; 361 362 let callCount = 0; 363 callLLMMock.mock.mockImplementation(async () => { 364 callCount++; 365 return callCount === 1 ? initialResponse : resubmitResponse; 366 }); 367 368 const siteData = { 369 url: 'https://example.com', 370 domain: 'example.com', 371 screenshots: { 372 desktop_above: Buffer.from('fake-screenshot'), 373 mobile_above: Buffer.from('fake-screenshot'), 374 desktop_below: Buffer.from('fake-screenshot-below'), 375 }, 376 screenshotsUncropped: { 377 desktop_below: Buffer.from('fake-screenshot-uncropped'), 378 }, 379 html: '<html><body>Test</body></html>', 380 visionText: 'Extracted text from vision', 381 sslStatus: 'valid', 382 httpHeaders: '{"content-type": "text/html"}', 383 }; 384 385 const result = await scoreWebsite(siteData, 123); 386 387 // Should return resubmit result (higher score from mid factors) 388 assert.ok(result); 389 assert.ok( 390 result.overall_calculation.conversion_score >= 80, 391 'Resubmit result should have higher score' 392 ); 393 394 // Verify LLM was called twice (initial + resubmit) 395 assert.strictEqual(callLLMMock.mock.calls.length, 2); 396 }); 397 398 test('should not resubmit if no below-fold screenshot available', async () => { 399 const mockResponse = { 400 content: JSON.stringify({ 401 factor_scores: lowFactorScores, 402 overall_calculation: { reasoning: 'Poor site, no below fold available' }, 403 }), 404 usage: { 405 promptTokens: 1000, 406 completionTokens: 500, 407 }, 408 }; 409 410 callLLMMock.mock.mockImplementation(async () => mockResponse); 411 412 const siteData = { 413 url: 'https://example.com', 414 domain: 'example.com', 415 screenshots: { 416 desktop_above: Buffer.from('fake-screenshot'), 417 mobile_above: Buffer.from('fake-screenshot'), 418 // No desktop_below screenshot 419 }, 420 html: '<html><body>Test</body></html>', 421 sslStatus: 'valid', 422 httpHeaders: '{"content-type": "text/html"}', 423 }; 424 425 const result = await scoreWebsite(siteData, 123); 426 427 // Should return initial result (no resubmit possible without below-fold) 428 assert.ok(result); 429 assert.ok( 430 typeof result.overall_calculation.conversion_score === 'number', 431 'Should have computed score' 432 ); 433 434 // Verify LLM was called only once 435 assert.strictEqual(callLLMMock.mock.calls.length, 1); 436 }); 437 438 test('should handle grades at resubmit threshold (B)', async () => { 439 const mockResponse = { 440 content: JSON.stringify({ 441 factor_scores: midFactorScores, 442 overall_calculation: { reasoning: 'Good site' }, 443 }), 444 usage: { 445 promptTokens: 1000, 446 completionTokens: 500, 447 }, 448 }; 449 450 callLLMMock.mock.mockImplementation(async () => mockResponse); 451 452 const siteData = { 453 url: 'https://example.com', 454 domain: 'example.com', 455 screenshots: { 456 desktop_above: Buffer.from('fake-screenshot'), 457 mobile_above: Buffer.from('fake-screenshot'), 458 desktop_below: Buffer.from('fake-screenshot-below'), 459 }, 460 html: '<html><body>Test</body></html>', 461 sslStatus: 'valid', 462 httpHeaders: '{"content-type": "text/html"}', 463 }; 464 465 const result = await scoreWebsite(siteData); 466 467 // Mid factors (~85) yield B grade — should NOT trigger resubmit (threshold is B- or below) 468 assert.strictEqual(callLLMMock.mock.calls.length, 1); 469 assert.ok( 470 result.overall_calculation.conversion_score >= 83, 471 'Mid factors should yield B or above (>= 83)' 472 ); 473 }); 474 475 test('should handle invalid JSON response', async () => { 476 callLLMMock.mock.mockImplementation(async () => ({ 477 content: 'Not valid JSON', 478 usage: { promptTokens: 100, completionTokens: 50 }, 479 })); 480 481 const siteData = { 482 url: 'https://example.com', 483 domain: 'example.com', 484 screenshots: { 485 desktop_above: Buffer.from('fake-screenshot'), 486 mobile_above: Buffer.from('fake-screenshot'), 487 }, 488 html: '<html><body>Test</body></html>', 489 sslStatus: 'valid', 490 httpHeaders: '{"content-type": "text/html"}', 491 }; 492 493 await assert.rejects( 494 async () => { 495 await scoreWebsite(siteData); 496 }, 497 { message: 'Failed to parse JSON response' } 498 ); 499 }); 500 501 test('should handle LLM API errors with retry', async () => { 502 // Circuit breaker is already mocked at module level to pass through 503 const siteData = { 504 id: 1, 505 domain: 'example.com', 506 url: 'https://example.com', 507 landing_page_url: 'https://example.com', 508 screenshot_path: './screenshots/test.png', 509 html: '<html><body>Test</body></html>', 510 screenshots: { 511 desktop_above: Buffer.from('fake-screenshot'), 512 mobile_above: Buffer.from('fake-screenshot'), 513 }, 514 }; 515 516 let attemptCount = 0; 517 const mockSuccessResponse = { 518 content: JSON.stringify({ 519 factor_scores: midFactorScores, 520 overall_calculation: { 521 reasoning: 'Good site', 522 }, 523 }), 524 usage: { promptTokens: 100, completionTokens: 50 }, 525 }; 526 527 callLLMMock.mock.mockImplementation(async () => { 528 attemptCount++; 529 // maxRetries=3 means 4 total attempts (0,1,2,3), so fail first 3 and succeed on 4th 530 // Use a retryable error (rate limit matches isRetryableError check) 531 if (attemptCount < 4) { 532 throw new Error('rate limit exceeded'); 533 } 534 return mockSuccessResponse; 535 }); 536 537 const result = await scoreWebsite(siteData); 538 539 assert.strictEqual(attemptCount, 4, 'Should retry 3 times before succeeding on 4th attempt'); 540 // Score is computed from midFactorScores (all 8.5) — approximately B/B+ range (83-89) 541 assert.ok( 542 typeof result.overall_calculation.conversion_score === 'number', 543 'Should have a computed conversion_score' 544 ); 545 assert.ok( 546 result.overall_calculation.conversion_score >= 80, 547 'Mid factors should yield B range score' 548 ); 549 }); 550 551 test('should include all required data in LLM request', async () => { 552 let capturedRequest = null; 553 callLLMMock.mock.mockImplementation(async params => { 554 capturedRequest = params; 555 return { 556 content: JSON.stringify({ 557 factor_scores: midFactorScores, 558 overall_calculation: { 559 reasoning: 'Good site', 560 }, 561 }), 562 usage: { promptTokens: 1000, completionTokens: 500 }, 563 }; 564 }); 565 566 const siteData = { 567 url: 'https://example.com', 568 domain: 'example.com', 569 screenshots: { 570 desktop_above: Buffer.from('fake-desktop'), 571 mobile_above: Buffer.from('fake-mobile'), 572 }, 573 html: '<html><body>Test HTML</body></html>', 574 sslStatus: 'valid', 575 httpHeaders: '{"server": "nginx"}', 576 }; 577 578 await scoreWebsite(siteData); 579 580 // Verify request structure 581 assert.ok(capturedRequest, 'Should have captured request'); 582 assert.ok(capturedRequest.messages, 'Should have messages array'); 583 assert.strictEqual(capturedRequest.messages.length, 2); // system + user 584 assert.strictEqual(capturedRequest.messages[0].role, 'system'); 585 assert.strictEqual(capturedRequest.messages[1].role, 'user'); 586 587 // Verify user message has text and images 588 const userContent = capturedRequest.messages[1].content; 589 assert.ok(Array.isArray(userContent)); 590 assert.ok(userContent.find(c => c.type === 'text')); 591 assert.ok(userContent.find(c => c.type === 'image_url')); 592 593 // Verify images are base64 encoded 594 const imageContent = userContent.filter(c => c.type === 'image_url'); 595 assert.strictEqual(imageContent.length, 2); // desktop + mobile 596 imageContent.forEach(img => { 597 assert.ok(img.image_url.url.startsWith('data:image/jpeg;base64,')); 598 }); 599 }); 600 }); 601 });