Cradicle Explorer

/ src / score.js
score.js
  1  /**
  2   * LLM Scoring Module
  3   * Integrates with OpenRouter or Anthropic Claude for website conversion scoring
  4   */
  5  
  6  import { readFileSync } from 'fs';
  7  import { join, dirname } from 'path';
  8  import { fileURLToPath } from 'url';
  9  import Logger from './utils/logger.js';
 10  import { retryWithBackoff, isRetryableError, safeJsonParse } from './utils/error-handler.js';
 11  import { openRouterBreaker } from './utils/circuit-breaker.js';
 12  import { openRouterLimiter } from './utils/rate-limiter.js';
 13  import { callLLM, getProviderDisplayName } from './utils/llm-provider.js';
 14  import { sanitizeHtmlForPrompt, wrapUntrusted } from './utils/llm-sanitizer.js';
 15  import { validateScoringResponse } from './utils/llm-response-validator.js';
 16  import './utils/load-env.js';
 17  
 18  const __filename = fileURLToPath(import.meta.url);
 19  const __dirname = dirname(__filename);
 20  const projectRoot = join(__dirname, '..');
 21  
 22  const logger = new Logger('Score');
 23  
 24  // Factor weights for programmatic score calculation (matches prompt scoring framework)
 25  export const FACTOR_WEIGHTS = {
 26    headline_quality: 0.15,
 27    value_proposition: 0.14,
 28    unique_selling_proposition: 0.13,
 29    call_to_action: 0.13,
 30    urgency_messaging: 0.1,
 31    hook_engagement: 0.09,
 32    trust_signals: 0.11,
 33    imagery_design: 0.08,
 34    offer_clarity: 0.04,
 35    contextual_appropriateness: 0.03,
 36  };
 37  
 38  // Standard academic grade scale with +/- modifiers
 39  const GRADE_THRESHOLDS = [
 40    { min: 97, grade: 'A+' },
 41    { min: 93, grade: 'A' },
 42    { min: 90, grade: 'A-' },
 43    { min: 87, grade: 'B+' },
 44    { min: 83, grade: 'B' },
 45    { min: 80, grade: 'B-' },
 46    { min: 77, grade: 'C+' },
 47    { min: 73, grade: 'C' },
 48    { min: 70, grade: 'C-' },
 49    { min: 67, grade: 'D+' },
 50    { min: 63, grade: 'D' },
 51    { min: 60, grade: 'D-' },
 52    { min: 0, grade: 'F' },
 53  ];
 54  
 55  /**
 56   * Compute weighted total score (0-100) from individual factor scores (0-10)
 57   */
 58  export function computeScoreFromFactors(factorScores) {
 59    if (!factorScores || typeof factorScores !== 'object') return null;
 60  
 61    let total = 0;
 62    for (const [factor, weight] of Object.entries(FACTOR_WEIGHTS)) {
 63      const score = factorScores[factor]?.score ?? 0;
 64      total += score * weight;
 65    }
 66    // Factor scores are 0-10, weights sum to 1.0, so total is 0-10. Scale to 0-100.
 67    return Math.round(total * 10 * 10) / 10;
 68  }
 69  
 70  /**
 71   * Derive letter grade from numeric score using the business grade scale
 72   */
 73  export function computeGrade(score) {
 74    if (score === null || score === undefined || score < 0) return 'F';
 75    for (const { min, grade } of GRADE_THRESHOLDS) {
 76      if (score >= min) return grade;
 77    }
 78    return 'F';
 79  }
 80  
 81  // Check ENABLE_VISION flag (consolidates old flags)
 82  const ENABLE_VISION = process.env.ENABLE_VISION !== 'false';
 83  
 84  // Show deprecation warning if old flags are used
 85  const legacyFlags = [
 86    process.env.USE_COMPUTER_VISION_SCORING,
 87    process.env.USE_COMPUTER_VISION_RESCORING,
 88    process.env.USE_COMPUTER_VISION_ENRICHMENT,
 89  ];
 90  if (legacyFlags.some(flag => flag !== undefined)) {
 91    console.warn(
 92      '[score] WARN: Vision flags (USE_COMPUTER_VISION_*) are deprecated. Use ENABLE_VISION instead.'
 93    );
 94  }
 95  
 96  // Load prompts based on ENABLE_VISION flag
 97  const SCORING_PROMPT_VISION = readFileSync(
 98    join(projectRoot, 'prompts/CONVERSION-SCORING-VISION.md'),
 99    'utf-8'
100  );
101  const SCORING_PROMPT_NOVIS = readFileSync(
102    join(projectRoot, 'prompts/CONVERSION-SCORING-NOVIS.md'),
103    'utf-8'
104  );
105  
106  const RESUBMIT_PROMPT_BASE = readFileSync(
107    join(projectRoot, 'prompts/CONVERSION-RESCORING.md'),
108    'utf-8'
109  );
110  const RESUBMIT_PROMPT_VISION = readFileSync(
111    join(projectRoot, 'prompts/CONVERSION-RESCORING-VISION.md'),
112    'utf-8'
113  );
114  
115  // Select prompt based on ENABLE_VISION flag
116  // Vision enabled: CONVERSION-SCORING-VISION.md (full screenshot + HTML analysis)
117  // Vision disabled: CONVERSION-SCORING-NOVIS.md (HTML-only with contact extraction)
118  const SCORING_PROMPT = ENABLE_VISION ? SCORING_PROMPT_VISION : SCORING_PROMPT_NOVIS;
119  
120  const RESUBMIT_PROMPT = ENABLE_VISION
121    ? `${RESUBMIT_PROMPT_BASE}\n\n${RESUBMIT_PROMPT_VISION}`
122    : RESUBMIT_PROMPT_BASE;
123  
124  // Model configuration (from env or default)
125  const SCORING_MODEL = process.env.SCORING_MODEL || 'openai/gpt-4o-mini';
126  
127  /**
128   * Score a website using OpenRouter GPT-4o-mini
129   * @param {Object} siteData - Site screenshots and HTML
130   * @param {number} siteId - Site ID for usage tracking
131   * @returns {Promise<Object>} Scoring results
132   */
133  export async function scoreWebsite(siteData, siteId = null) {
134    const { url, domain, screenshots, screenshotsUncropped, html, visionText, httpHeaders } =
135      siteData;
136  
137    logger.info(`Scoring website: ${domain}`);
138  
139    try {
140      // Initial scoring with above-fold screenshots
141      const initialScore = await callScoringAPI({
142        url,
143        domain,
144        desktopScreenshot: screenshots.desktop_above,
145        mobileScreenshot: screenshots.mobile_above,
146        html,
147        httpHeaders,
148        prompt: SCORING_PROMPT,
149        siteId,
150      });
151  
152      // Check if we need resubmit (B- or below)
153      const grade = initialScore?.overall_calculation?.letter_grade;
154      const needsResubmit = shouldResubmit(grade);
155  
156      if (needsResubmit && screenshots.desktop_below) {
157        logger.info(`Score ${grade} requires resubmit for ${domain}`);
158  
159        // Use uncropped version for resubmit if available, otherwise fall back to cropped
160        const belowFoldScreenshot = screenshotsUncropped?.desktop_below || screenshots.desktop_below;
161  
162        const finalScore = await callResubmitAPI({
163          url,
164          domain,
165          initialScore,
166          belowFoldScreenshot,
167          html,
168          visionText,
169          prompt: RESUBMIT_PROMPT,
170          siteId,
171        });
172  
173        return finalScore;
174      }
175  
176      return initialScore;
177    } catch (error) {
178      logger.error(`Scoring failed for ${domain}`, error);
179      throw error;
180    }
181  }
182  
183  /**
184   * Call LLM API for initial scoring
185   */
186  // eslint-disable-next-line require-await -- Wraps retryWithBackoff which handles async
187  async function callScoringAPI({
188    url,
189    domain,
190    desktopScreenshot,
191    mobileScreenshot,
192    html,
193    httpHeaders,
194    prompt,
195    siteId,
196  }) {
197    return retryWithBackoff(
198      // eslint-disable-next-line require-await -- Wrapper for circuit breaker fire()
199      async () => {
200        // Wrap the API call with circuit breaker
201        return openRouterBreaker.fire(async () => {
202          // Build user message content
203          const userContent = [
204            {
205              type: 'text',
206              text: `Evaluate this website:\n\nURL: ${url}\nDomain: ${domain}\n\n${wrapUntrusted(httpHeaders ? JSON.stringify(JSON.parse(httpHeaders), null, 2) : 'Not available', 'http_headers')}\n\n${wrapUntrusted(sanitizeHtmlForPrompt(html.substring(0, 50000)), 'website_html')}`,
207            },
208          ];
209  
210          // Only include screenshots if vision enabled AND screenshots are available
211          if (ENABLE_VISION && desktopScreenshot && mobileScreenshot) {
212            userContent.push(
213              {
214                type: 'image_url',
215                image_url: {
216                  url: `data:image/jpeg;base64,${desktopScreenshot.toString('base64')}`,
217                  detail: 'low', // Use low detail to save tokens (85 tokens vs 255)
218                },
219              },
220              {
221                type: 'image_url',
222                image_url: {
223                  url: `data:image/jpeg;base64,${mobileScreenshot.toString('base64')}`,
224                  detail: 'low',
225                },
226              }
227            );
228          }
229  
230          const messages = [
231            {
232              role: 'system',
233              content: prompt,
234            },
235            {
236              role: 'user',
237              content: userContent,
238            },
239          ];
240  
241          const response = await openRouterLimiter.schedule(() =>
242            callLLM({
243              model: SCORING_MODEL,
244              messages,
245              temperature: 0.3,
246              max_tokens: 4000,
247              json_mode: true,
248              stage: 'scoring',
249              siteId,
250            })
251          );
252  
253          const { content, usage } = response;
254          const result = safeJsonParse(content);
255  
256          if (!result) {
257            throw new Error('Failed to parse JSON response');
258          }
259  
260          // Sanitize and validate LLM response (clamp scores, drop unexpected fields)
261          validateScoringResponse(result);
262  
263          // Validate the LLM returned factor scores (needed for programmatic score computation)
264          if (!result.factor_scores) {
265            const completionTokens = usage?.completionTokens ?? '?';
266            const truncated = typeof completionTokens === 'number' && completionTokens >= 3900;
267            throw new Error(
268              `Incomplete LLM response: missing factor_scores — ` +
269                `completionTokens=${completionTokens}${truncated ? ' (HIT MAX_TOKENS — increase max_tokens)' : ' (partial JSON — likely rate limit burst)'}`
270            );
271          }
272  
273          // Compute score and grade programmatically from factor scores
274          if (!result.overall_calculation) result.overall_calculation = {};
275          const computedScore = computeScoreFromFactors(result.factor_scores);
276          const computedGrade = computeGrade(computedScore);
277          result.overall_calculation.conversion_score = computedScore;
278          result.overall_calculation.letter_grade = computedGrade;
279  
280          logger.success(
281            `Scored ${domain}: ${computedGrade} (${computedScore}) (${getProviderDisplayName()}) - ${usage.promptTokens + usage.completionTokens} tokens`
282          );
283  
284          return result;
285        });
286      },
287      {
288        maxRetries: 3,
289        shouldRetry: isRetryableError,
290        onRetry: (attempt, error) => {
291          logger.warn(`Retry ${attempt + 1}/3 for ${domain}: ${error.message}`);
292        },
293      }
294    );
295  }
296  
297  /**
298   * Call LLM API for resubmit scoring
299   */
300  // eslint-disable-next-line require-await -- Wraps retryWithBackoff which handles async
301  async function callResubmitAPI({
302    url,
303    domain,
304    initialScore,
305    belowFoldScreenshot,
306    html,
307    visionText,
308    prompt,
309    siteId,
310  }) {
311    return retryWithBackoff(
312      // eslint-disable-next-line require-await -- Wrapper for circuit breaker fire()
313      async () => {
314        // Wrap the API call with circuit breaker
315        return openRouterBreaker.fire(async () => {
316          // Build user message content
317          const userContent = [
318            {
319              type: 'text',
320              text: `Re-evaluate this website with below-fold content:\n\nURL: ${url}\nDomain: ${domain}\n\nInitial Score:\n${JSON.stringify(initialScore, null, 2)}\n\nHTML DOM (first 50000 chars):\n${html.substring(0, 50000)}${visionText ? `\n\nText extracted from below-fold screenshot:\n${visionText}` : ''}`,
321            },
322          ];
323  
324          // Only include screenshot if computer vision is enabled for rescoring AND screenshot exists
325          if (ENABLE_VISION && belowFoldScreenshot) {
326            userContent.push({
327              type: 'image_url',
328              image_url: {
329                url: `data:image/jpeg;base64,${belowFoldScreenshot.toString('base64')}`,
330                detail: 'low',
331              },
332            });
333          }
334  
335          const messages = [
336            {
337              role: 'system',
338              content: prompt,
339            },
340            {
341              role: 'user',
342              content: userContent,
343            },
344          ];
345  
346          const response = await openRouterLimiter.schedule(() =>
347            callLLM({
348              model: SCORING_MODEL,
349              messages,
350              temperature: 0.3,
351              max_tokens: 3000,
352              json_mode: true,
353              stage: 'rescoring',
354              siteId,
355            })
356          );
357  
358          const { content, usage } = response;
359          const result = safeJsonParse(content);
360  
361          if (!result) {
362            throw new Error('Failed to parse resubmit JSON response');
363          }
364  
365          // Compute score and grade programmatically from factor scores
366          if (result.factor_scores) {
367            if (!result.overall_calculation) result.overall_calculation = {};
368            const computedScore = computeScoreFromFactors(result.factor_scores);
369            const computedGrade = computeGrade(computedScore);
370            result.overall_calculation.conversion_score = computedScore;
371            result.overall_calculation.letter_grade = computedGrade;
372          }
373  
374          const resubGrade = result.overall_calculation?.letter_grade || 'N/A';
375          const resubScore = result.overall_calculation?.conversion_score || 'N/A';
376          logger.success(
377            `Resubmit scored ${domain}: ${resubGrade} (${resubScore}) (${getProviderDisplayName()}) - ${usage.promptTokens + usage.completionTokens} tokens`
378          );
379  
380          return result;
381        });
382      },
383      {
384        maxRetries: 3,
385        shouldRetry: isRetryableError,
386      }
387    );
388  }
389  
390  /**
391   * Check if score requires resubmit (B- or below = score < 83)
392   */
393  function shouldResubmit(grade) {
394    if (!grade) return false;
395  
396    const lowGrades = ['B-', 'C+', 'C', 'C-', 'D+', 'D', 'D-', 'F'];
397    return lowGrades.includes(grade);
398  }
399  
400  /**
401   * Extract letter grade from scoring result
402   */
403  export function extractGrade(scoringResult) {
404    return scoringResult?.overall_calculation?.letter_grade || null;
405  }
406  
407  /**
408   * Extract numeric score from scoring result
409   */
410  export function extractScore(scoringResult) {
411    return scoringResult?.overall_calculation?.conversion_score || null;
412  }
413  
414  export default {
415    scoreWebsite,
416    extractGrade,
417    extractScore,
418    shouldResubmit,
419    computeScoreFromFactors,
420    computeGrade,
421  };